Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/aarch64/aarch64-sve.md @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
111:04ced10e8804 | 131:84e7813d76e9 |
---|---|
1 ;; Machine description for AArch64 SVE. | |
2 ;; Copyright (C) 2009-2016 Free Software Foundation, Inc. | |
3 ;; Contributed by ARM Ltd. | |
4 ;; | |
5 ;; This file is part of GCC. | |
6 ;; | |
7 ;; GCC is free software; you can redistribute it and/or modify it | |
8 ;; under the terms of the GNU General Public License as published by | |
9 ;; the Free Software Foundation; either version 3, or (at your option) | |
10 ;; any later version. | |
11 ;; | |
12 ;; GCC is distributed in the hope that it will be useful, but | |
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 ;; General Public License for more details. | |
16 ;; | |
17 ;; You should have received a copy of the GNU General Public License | |
18 ;; along with GCC; see the file COPYING3. If not see | |
19 ;; <http://www.gnu.org/licenses/>. | |
20 | |
21 ;; Note on the handling of big-endian SVE | |
22 ;; -------------------------------------- | |
23 ;; | |
24 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the | |
25 ;; same way as movdi or movti would: the first byte of memory goes | |
26 ;; into the most significant byte of the register and the last byte | |
27 ;; of memory goes into the least significant byte of the register. | |
28 ;; This is the most natural ordering for Advanced SIMD and matches | |
29 ;; the ABI layout for 64-bit and 128-bit vector types. | |
30 ;; | |
31 ;; As a result, the order of bytes within the register is what GCC | |
32 ;; expects for a big-endian target, and subreg offsets therefore work | |
33 ;; as expected, with the first element in memory having subreg offset 0 | |
34 ;; and the last element in memory having the subreg offset associated | |
35 ;; with a big-endian lowpart. However, this ordering also means that | |
36 ;; GCC's lane numbering does not match the architecture's numbering: | |
37 ;; GCC always treats the element at the lowest address in memory | |
38 ;; (subreg offset 0) as element 0, while the architecture treats | |
39 ;; the least significant end of the register as element 0. | |
40 ;; | |
41 ;; The situation for SVE is different. We want the layout of the | |
42 ;; SVE register to be same for mov<mode> as it is for maskload<mode>: | |
43 ;; logically, a mov<mode> load must be indistinguishable from a | |
44 ;; maskload<mode> whose mask is all true. We therefore need the | |
45 ;; register layout to match LD1 rather than LDR. The ABI layout of | |
46 ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering. | |
47 ;; | |
48 ;; As a result, the architecture lane numbering matches GCC's lane | |
49 ;; numbering, with element 0 always being the first in memory. | |
50 ;; However: | |
51 ;; | |
52 ;; - Applying a subreg offset to a register does not give the element | |
53 ;; that GCC expects: the first element in memory has the subreg offset | |
54 ;; associated with a big-endian lowpart while the last element in memory | |
55 ;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS. | |
56 ;; | |
57 ;; - We cannot use LDR and STR for spill slots that might be accessed | |
58 ;; via subregs, since although the elements have the order GCC expects, | |
59 ;; the order of the bytes within the elements is different. We instead | |
60 ;; access spill slots via LD1 and ST1, using secondary reloads to | |
61 ;; reserve a predicate register. | |
62 | |
63 | |
64 ;; SVE data moves. | |
65 (define_expand "mov<mode>" | |
66 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") | |
67 (match_operand:SVE_ALL 1 "general_operand"))] | |
68 "TARGET_SVE" | |
69 { | |
70 /* Use the predicated load and store patterns where possible. | |
71 This is required for big-endian targets (see the comment at the | |
72 head of the file) and increases the addressing choices for | |
73 little-endian. */ | |
74 if ((MEM_P (operands[0]) || MEM_P (operands[1])) | |
75 && can_create_pseudo_p ()) | |
76 { | |
77 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); | |
78 DONE; | |
79 } | |
80 | |
81 if (CONSTANT_P (operands[1])) | |
82 { | |
83 aarch64_expand_mov_immediate (operands[0], operands[1], | |
84 gen_vec_duplicate<mode>); | |
85 DONE; | |
86 } | |
87 | |
88 /* Optimize subregs on big-endian targets: we can use REV[BHW] | |
89 instead of going through memory. */ | |
90 if (BYTES_BIG_ENDIAN | |
91 && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1])) | |
92 DONE; | |
93 } | |
94 ) | |
95 | |
96 ;; A pattern for optimizing SUBREGs that have a reinterpreting effect | |
97 ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move | |
98 ;; for details. We use a special predicate for operand 2 to reduce | |
99 ;; the number of patterns. | |
100 (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be" | |
101 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w") | |
102 (unspec:SVE_ALL | |
103 [(match_operand:VNx16BI 1 "register_operand" "Upl") | |
104 (match_operand 2 "aarch64_any_register_operand" "w")] | |
105 UNSPEC_REV_SUBREG))] | |
106 "TARGET_SVE && BYTES_BIG_ENDIAN" | |
107 "#" | |
108 "&& reload_completed" | |
109 [(const_int 0)] | |
110 { | |
111 aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]); | |
112 DONE; | |
113 } | |
114 ) | |
115 | |
116 ;; Unpredicated moves (little-endian). Only allow memory operations | |
117 ;; during and after RA; before RA we want the predicated load and | |
118 ;; store patterns to be used instead. | |
119 (define_insn "*aarch64_sve_mov<mode>_le" | |
120 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") | |
121 (match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] | |
122 "TARGET_SVE | |
123 && !BYTES_BIG_ENDIAN | |
124 && ((lra_in_progress || reload_completed) | |
125 || (register_operand (operands[0], <MODE>mode) | |
126 && nonmemory_operand (operands[1], <MODE>mode)))" | |
127 "@ | |
128 ldr\t%0, %1 | |
129 str\t%1, %0 | |
130 mov\t%0.d, %1.d | |
131 * return aarch64_output_sve_mov_immediate (operands[1]);" | |
132 ) | |
133 | |
134 ;; Unpredicated moves (big-endian). Memory accesses require secondary | |
135 ;; reloads. | |
136 (define_insn "*aarch64_sve_mov<mode>_be" | |
137 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") | |
138 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))] | |
139 "TARGET_SVE && BYTES_BIG_ENDIAN" | |
140 "@ | |
141 mov\t%0.d, %1.d | |
142 * return aarch64_output_sve_mov_immediate (operands[1]);" | |
143 ) | |
144 | |
145 ;; Handle big-endian memory reloads. We use byte PTRUE for all modes | |
146 ;; to try to encourage reuse. | |
147 (define_expand "aarch64_sve_reload_be" | |
148 [(parallel | |
149 [(set (match_operand 0) | |
150 (match_operand 1)) | |
151 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])] | |
152 "TARGET_SVE && BYTES_BIG_ENDIAN" | |
153 { | |
154 /* Create a PTRUE. */ | |
155 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode)); | |
156 | |
157 /* Refer to the PTRUE in the appropriate mode for this move. */ | |
158 machine_mode mode = GET_MODE (operands[0]); | |
159 machine_mode pred_mode | |
160 = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require (); | |
161 rtx pred = gen_lowpart (pred_mode, operands[2]); | |
162 | |
163 /* Emit a predicated load or store. */ | |
164 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]); | |
165 DONE; | |
166 } | |
167 ) | |
168 | |
169 ;; A predicated load or store for which the predicate is known to be | |
170 ;; all-true. Note that this pattern is generated directly by | |
171 ;; aarch64_emit_sve_pred_move, so changes to this pattern will | |
172 ;; need changes there as well. | |
173 (define_insn "*pred_mov<mode>" | |
174 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, m") | |
175 (unspec:SVE_ALL | |
176 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
177 (match_operand:SVE_ALL 2 "nonimmediate_operand" "m, w")] | |
178 UNSPEC_MERGE_PTRUE))] | |
179 "TARGET_SVE | |
180 && (register_operand (operands[0], <MODE>mode) | |
181 || register_operand (operands[2], <MODE>mode))" | |
182 "@ | |
183 ld1<Vesize>\t%0.<Vetype>, %1/z, %2 | |
184 st1<Vesize>\t%2.<Vetype>, %1, %0" | |
185 ) | |
186 | |
187 (define_expand "movmisalign<mode>" | |
188 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") | |
189 (match_operand:SVE_ALL 1 "general_operand"))] | |
190 "TARGET_SVE" | |
191 { | |
192 /* Equivalent to a normal move for our purpooses. */ | |
193 emit_move_insn (operands[0], operands[1]); | |
194 DONE; | |
195 } | |
196 ) | |
197 | |
198 (define_insn "maskload<mode><vpred>" | |
199 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
200 (unspec:SVE_ALL | |
201 [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
202 (match_operand:SVE_ALL 1 "memory_operand" "m")] | |
203 UNSPEC_LD1_SVE))] | |
204 "TARGET_SVE" | |
205 "ld1<Vesize>\t%0.<Vetype>, %2/z, %1" | |
206 ) | |
207 | |
208 (define_insn "maskstore<mode><vpred>" | |
209 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m") | |
210 (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
211 (match_operand:SVE_ALL 1 "register_operand" "w") | |
212 (match_dup 0)] | |
213 UNSPEC_ST1_SVE))] | |
214 "TARGET_SVE" | |
215 "st1<Vesize>\t%1.<Vetype>, %2, %0" | |
216 ) | |
217 | |
218 ;; Unpredicated gather loads. | |
219 (define_expand "gather_load<mode>" | |
220 [(set (match_operand:SVE_SD 0 "register_operand") | |
221 (unspec:SVE_SD | |
222 [(match_dup 5) | |
223 (match_operand:DI 1 "aarch64_reg_or_zero") | |
224 (match_operand:<V_INT_EQUIV> 2 "register_operand") | |
225 (match_operand:DI 3 "const_int_operand") | |
226 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>") | |
227 (mem:BLK (scratch))] | |
228 UNSPEC_LD1_GATHER))] | |
229 "TARGET_SVE" | |
230 { | |
231 operands[5] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
232 } | |
233 ) | |
234 | |
235 ;; Predicated gather loads for 32-bit elements. Operand 3 is true for | |
236 ;; unsigned extension and false for signed extension. | |
237 (define_insn "mask_gather_load<mode>" | |
238 [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w") | |
239 (unspec:SVE_S | |
240 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
241 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk") | |
242 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w") | |
243 (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1") | |
244 (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i") | |
245 (mem:BLK (scratch))] | |
246 UNSPEC_LD1_GATHER))] | |
247 "TARGET_SVE" | |
248 "@ | |
249 ld1w\t%0.s, %5/z, [%2.s] | |
250 ld1w\t%0.s, %5/z, [%1, %2.s, sxtw] | |
251 ld1w\t%0.s, %5/z, [%1, %2.s, uxtw] | |
252 ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4] | |
253 ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" | |
254 ) | |
255 | |
256 ;; Predicated gather loads for 64-bit elements. The value of operand 3 | |
257 ;; doesn't matter in this case. | |
258 (define_insn "mask_gather_load<mode>" | |
259 [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w") | |
260 (unspec:SVE_D | |
261 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl") | |
262 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk") | |
263 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w") | |
264 (match_operand:DI 3 "const_int_operand") | |
265 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i") | |
266 (mem:BLK (scratch))] | |
267 UNSPEC_LD1_GATHER))] | |
268 "TARGET_SVE" | |
269 "@ | |
270 ld1d\t%0.d, %5/z, [%2.d] | |
271 ld1d\t%0.d, %5/z, [%1, %2.d] | |
272 ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]" | |
273 ) | |
274 | |
275 ;; Unpredicated scatter store. | |
276 (define_expand "scatter_store<mode>" | |
277 [(set (mem:BLK (scratch)) | |
278 (unspec:BLK | |
279 [(match_dup 5) | |
280 (match_operand:DI 0 "aarch64_reg_or_zero") | |
281 (match_operand:<V_INT_EQUIV> 1 "register_operand") | |
282 (match_operand:DI 2 "const_int_operand") | |
283 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>") | |
284 (match_operand:SVE_SD 4 "register_operand")] | |
285 UNSPEC_ST1_SCATTER))] | |
286 "TARGET_SVE" | |
287 { | |
288 operands[5] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
289 } | |
290 ) | |
291 | |
292 ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for | |
293 ;; unsigned extension and false for signed extension. | |
294 (define_insn "mask_scatter_store<mode>" | |
295 [(set (mem:BLK (scratch)) | |
296 (unspec:BLK | |
297 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl") | |
298 (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk") | |
299 (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w") | |
300 (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1") | |
301 (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i") | |
302 (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")] | |
303 UNSPEC_ST1_SCATTER))] | |
304 "TARGET_SVE" | |
305 "@ | |
306 st1w\t%4.s, %5, [%1.s] | |
307 st1w\t%4.s, %5, [%0, %1.s, sxtw] | |
308 st1w\t%4.s, %5, [%0, %1.s, uxtw] | |
309 st1w\t%4.s, %5, [%0, %1.s, sxtw %p3] | |
310 st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]" | |
311 ) | |
312 | |
313 ;; Predicated scatter stores for 64-bit elements. The value of operand 2 | |
314 ;; doesn't matter in this case. | |
315 (define_insn "mask_scatter_store<mode>" | |
316 [(set (mem:BLK (scratch)) | |
317 (unspec:BLK | |
318 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl") | |
319 (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk") | |
320 (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w") | |
321 (match_operand:DI 2 "const_int_operand") | |
322 (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i") | |
323 (match_operand:SVE_D 4 "register_operand" "w, w, w")] | |
324 UNSPEC_ST1_SCATTER))] | |
325 "TARGET_SVE" | |
326 "@ | |
327 st1d\t%4.d, %5, [%1.d] | |
328 st1d\t%4.d, %5, [%0, %1.d] | |
329 st1d\t%4.d, %5, [%0, %1.d, lsl %p3]" | |
330 ) | |
331 | |
332 ;; SVE structure moves. | |
333 (define_expand "mov<mode>" | |
334 [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand") | |
335 (match_operand:SVE_STRUCT 1 "general_operand"))] | |
336 "TARGET_SVE" | |
337 { | |
338 /* Big-endian loads and stores need to be done via LD1 and ST1; | |
339 see the comment at the head of the file for details. */ | |
340 if ((MEM_P (operands[0]) || MEM_P (operands[1])) | |
341 && BYTES_BIG_ENDIAN) | |
342 { | |
343 gcc_assert (can_create_pseudo_p ()); | |
344 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); | |
345 DONE; | |
346 } | |
347 | |
348 if (CONSTANT_P (operands[1])) | |
349 { | |
350 aarch64_expand_mov_immediate (operands[0], operands[1]); | |
351 DONE; | |
352 } | |
353 } | |
354 ) | |
355 | |
356 ;; Unpredicated structure moves (little-endian). | |
357 (define_insn "*aarch64_sve_mov<mode>_le" | |
358 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") | |
359 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] | |
360 "TARGET_SVE && !BYTES_BIG_ENDIAN" | |
361 "#" | |
362 [(set_attr "length" "<insn_length>")] | |
363 ) | |
364 | |
365 ;; Unpredicated structure moves (big-endian). Memory accesses require | |
366 ;; secondary reloads. | |
367 (define_insn "*aarch64_sve_mov<mode>_le" | |
368 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w") | |
369 (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))] | |
370 "TARGET_SVE && BYTES_BIG_ENDIAN" | |
371 "#" | |
372 [(set_attr "length" "<insn_length>")] | |
373 ) | |
374 | |
375 ;; Split unpredicated structure moves into pieces. This is the same | |
376 ;; for both big-endian and little-endian code, although it only needs | |
377 ;; to handle memory operands for little-endian code. | |
378 (define_split | |
379 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand") | |
380 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))] | |
381 "TARGET_SVE && reload_completed" | |
382 [(const_int 0)] | |
383 { | |
384 rtx dest = operands[0]; | |
385 rtx src = operands[1]; | |
386 if (REG_P (dest) && REG_P (src)) | |
387 aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>); | |
388 else | |
389 for (unsigned int i = 0; i < <vector_count>; ++i) | |
390 { | |
391 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode, | |
392 i * BYTES_PER_SVE_VECTOR); | |
393 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode, | |
394 i * BYTES_PER_SVE_VECTOR); | |
395 emit_insn (gen_rtx_SET (subdest, subsrc)); | |
396 } | |
397 DONE; | |
398 } | |
399 ) | |
400 | |
401 ;; Predicated structure moves. This works for both endiannesses but in | |
402 ;; practice is only useful for big-endian. | |
403 (define_insn_and_split "pred_mov<mode>" | |
404 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, Utx") | |
405 (unspec:SVE_STRUCT | |
406 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
407 (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "Utx, w")] | |
408 UNSPEC_MERGE_PTRUE))] | |
409 "TARGET_SVE | |
410 && (register_operand (operands[0], <MODE>mode) | |
411 || register_operand (operands[2], <MODE>mode))" | |
412 "#" | |
413 "&& reload_completed" | |
414 [(const_int 0)] | |
415 { | |
416 for (unsigned int i = 0; i < <vector_count>; ++i) | |
417 { | |
418 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0], | |
419 <MODE>mode, | |
420 i * BYTES_PER_SVE_VECTOR); | |
421 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2], | |
422 <MODE>mode, | |
423 i * BYTES_PER_SVE_VECTOR); | |
424 aarch64_emit_sve_pred_move (subdest, operands[1], subsrc); | |
425 } | |
426 DONE; | |
427 } | |
428 [(set_attr "length" "<insn_length>")] | |
429 ) | |
430 | |
431 (define_expand "mov<mode>" | |
432 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand") | |
433 (match_operand:PRED_ALL 1 "general_operand"))] | |
434 "TARGET_SVE" | |
435 { | |
436 if (GET_CODE (operands[0]) == MEM) | |
437 operands[1] = force_reg (<MODE>mode, operands[1]); | |
438 } | |
439 ) | |
440 | |
441 (define_insn "*aarch64_sve_mov<mode>" | |
442 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa, Upa") | |
443 (match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dz, Dm"))] | |
444 "TARGET_SVE | |
445 && (register_operand (operands[0], <MODE>mode) | |
446 || register_operand (operands[1], <MODE>mode))" | |
447 "@ | |
448 mov\t%0.b, %1.b | |
449 str\t%1, %0 | |
450 ldr\t%0, %1 | |
451 pfalse\t%0.b | |
452 * return aarch64_output_ptrue (<MODE>mode, '<Vetype>');" | |
453 ) | |
454 | |
455 ;; Handle extractions from a predicate by converting to an integer vector | |
456 ;; and extracting from there. | |
457 (define_expand "vec_extract<vpred><Vel>" | |
458 [(match_operand:<VEL> 0 "register_operand") | |
459 (match_operand:<VPRED> 1 "register_operand") | |
460 (match_operand:SI 2 "nonmemory_operand") | |
461 ;; Dummy operand to which we can attach the iterator. | |
462 (reg:SVE_I V0_REGNUM)] | |
463 "TARGET_SVE" | |
464 { | |
465 rtx tmp = gen_reg_rtx (<MODE>mode); | |
466 emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1], | |
467 CONST1_RTX (<MODE>mode), | |
468 CONST0_RTX (<MODE>mode))); | |
469 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2])); | |
470 DONE; | |
471 } | |
472 ) | |
473 | |
474 (define_expand "vec_extract<mode><Vel>" | |
475 [(set (match_operand:<VEL> 0 "register_operand") | |
476 (vec_select:<VEL> | |
477 (match_operand:SVE_ALL 1 "register_operand") | |
478 (parallel [(match_operand:SI 2 "nonmemory_operand")])))] | |
479 "TARGET_SVE" | |
480 { | |
481 poly_int64 val; | |
482 if (poly_int_rtx_p (operands[2], &val) | |
483 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1)) | |
484 { | |
485 /* The last element can be extracted with a LASTB and a false | |
486 predicate. */ | |
487 rtx sel = force_reg (<VPRED>mode, CONST0_RTX (<VPRED>mode)); | |
488 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); | |
489 DONE; | |
490 } | |
491 if (!CONST_INT_P (operands[2])) | |
492 { | |
493 /* Create an index with operand[2] as the base and -1 as the step. | |
494 It will then be zero for the element we care about. */ | |
495 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]); | |
496 index = force_reg (<VEL_INT>mode, index); | |
497 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode); | |
498 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx)); | |
499 | |
500 /* Get a predicate that is true for only that element. */ | |
501 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode); | |
502 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero); | |
503 rtx sel = gen_reg_rtx (<VPRED>mode); | |
504 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero)); | |
505 | |
506 /* Select the element using LASTB. */ | |
507 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); | |
508 DONE; | |
509 } | |
510 } | |
511 ) | |
512 | |
513 ;; Extract element zero. This is a special case because we want to force | |
514 ;; the registers to be the same for the second alternative, and then | |
515 ;; split the instruction into nothing after RA. | |
516 (define_insn_and_split "*vec_extract<mode><Vel>_0" | |
517 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") | |
518 (vec_select:<VEL> | |
519 (match_operand:SVE_ALL 1 "register_operand" "w, 0, w") | |
520 (parallel [(const_int 0)])))] | |
521 "TARGET_SVE" | |
522 { | |
523 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1])); | |
524 switch (which_alternative) | |
525 { | |
526 case 0: | |
527 return "umov\\t%<vwcore>0, %1.<Vetype>[0]"; | |
528 case 1: | |
529 return "#"; | |
530 case 2: | |
531 return "st1\\t{%1.<Vetype>}[0], %0"; | |
532 default: | |
533 gcc_unreachable (); | |
534 } | |
535 } | |
536 "&& reload_completed | |
537 && REG_P (operands[0]) | |
538 && REGNO (operands[0]) == REGNO (operands[1])" | |
539 [(const_int 0)] | |
540 { | |
541 emit_note (NOTE_INSN_DELETED); | |
542 DONE; | |
543 } | |
544 [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")] | |
545 ) | |
546 | |
547 ;; Extract an element from the Advanced SIMD portion of the register. | |
548 ;; We don't just reuse the aarch64-simd.md pattern because we don't | |
549 ;; want any change in lane number on big-endian targets. | |
550 (define_insn "*vec_extract<mode><Vel>_v128" | |
551 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") | |
552 (vec_select:<VEL> | |
553 (match_operand:SVE_ALL 1 "register_operand" "w, w, w") | |
554 (parallel [(match_operand:SI 2 "const_int_operand")])))] | |
555 "TARGET_SVE | |
556 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)" | |
557 { | |
558 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1])); | |
559 switch (which_alternative) | |
560 { | |
561 case 0: | |
562 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; | |
563 case 1: | |
564 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]"; | |
565 case 2: | |
566 return "st1\\t{%1.<Vetype>}[%2], %0"; | |
567 default: | |
568 gcc_unreachable (); | |
569 } | |
570 } | |
571 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")] | |
572 ) | |
573 | |
574 ;; Extract an element in the range of DUP. This pattern allows the | |
575 ;; source and destination to be different. | |
576 (define_insn "*vec_extract<mode><Vel>_dup" | |
577 [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
578 (vec_select:<VEL> | |
579 (match_operand:SVE_ALL 1 "register_operand" "w") | |
580 (parallel [(match_operand:SI 2 "const_int_operand")])))] | |
581 "TARGET_SVE | |
582 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)" | |
583 { | |
584 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); | |
585 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]"; | |
586 } | |
587 ) | |
588 | |
589 ;; Extract an element outside the range of DUP. This pattern requires the | |
590 ;; source and destination to be the same. | |
591 (define_insn "*vec_extract<mode><Vel>_ext" | |
592 [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
593 (vec_select:<VEL> | |
594 (match_operand:SVE_ALL 1 "register_operand" "0") | |
595 (parallel [(match_operand:SI 2 "const_int_operand")])))] | |
596 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64" | |
597 { | |
598 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); | |
599 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode)); | |
600 return "ext\t%0.b, %0.b, %0.b, #%2"; | |
601 } | |
602 ) | |
603 | |
604 ;; Extract the last active element of operand 1 into operand 0. | |
605 ;; If no elements are active, extract the last inactive element instead. | |
606 (define_insn "extract_last_<mode>" | |
607 [(set (match_operand:<VEL> 0 "register_operand" "=r, w") | |
608 (unspec:<VEL> | |
609 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
610 (match_operand:SVE_ALL 2 "register_operand" "w, w")] | |
611 UNSPEC_LASTB))] | |
612 "TARGET_SVE" | |
613 "@ | |
614 lastb\t%<vwcore>0, %1, %2.<Vetype> | |
615 lastb\t%<Vetype>0, %1, %2.<Vetype>" | |
616 ) | |
617 | |
618 (define_expand "vec_duplicate<mode>" | |
619 [(parallel | |
620 [(set (match_operand:SVE_ALL 0 "register_operand") | |
621 (vec_duplicate:SVE_ALL | |
622 (match_operand:<VEL> 1 "aarch64_sve_dup_operand"))) | |
623 (clobber (scratch:<VPRED>))])] | |
624 "TARGET_SVE" | |
625 { | |
626 if (MEM_P (operands[1])) | |
627 { | |
628 rtx ptrue = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
629 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1], | |
630 CONST0_RTX (<MODE>mode))); | |
631 DONE; | |
632 } | |
633 } | |
634 ) | |
635 | |
636 ;; Accept memory operands for the benefit of combine, and also in case | |
637 ;; the scalar input gets spilled to memory during RA. We want to split | |
638 ;; the load at the first opportunity in order to allow the PTRUE to be | |
639 ;; optimized with surrounding code. | |
640 (define_insn_and_split "*vec_duplicate<mode>_reg" | |
641 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w") | |
642 (vec_duplicate:SVE_ALL | |
643 (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty"))) | |
644 (clobber (match_scratch:<VPRED> 2 "=X, X, Upl"))] | |
645 "TARGET_SVE" | |
646 "@ | |
647 mov\t%0.<Vetype>, %<vwcore>1 | |
648 mov\t%0.<Vetype>, %<Vetype>1 | |
649 #" | |
650 "&& MEM_P (operands[1])" | |
651 [(const_int 0)] | |
652 { | |
653 if (GET_CODE (operands[2]) == SCRATCH) | |
654 operands[2] = gen_reg_rtx (<VPRED>mode); | |
655 emit_move_insn (operands[2], CONSTM1_RTX (<VPRED>mode)); | |
656 emit_insn (gen_sve_ld1r<mode> (operands[0], operands[2], operands[1], | |
657 CONST0_RTX (<MODE>mode))); | |
658 DONE; | |
659 } | |
660 [(set_attr "length" "4,4,8")] | |
661 ) | |
662 | |
663 ;; This is used for vec_duplicate<mode>s from memory, but can also | |
664 ;; be used by combine to optimize selects of a a vec_duplicate<mode> | |
665 ;; with zero. | |
666 (define_insn "sve_ld1r<mode>" | |
667 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
668 (unspec:SVE_ALL | |
669 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
670 (vec_duplicate:SVE_ALL | |
671 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty")) | |
672 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")] | |
673 UNSPEC_SEL))] | |
674 "TARGET_SVE" | |
675 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2" | |
676 ) | |
677 | |
678 ;; Load 128 bits from memory and duplicate to fill a vector. Since there | |
679 ;; are so few operations on 128-bit "elements", we don't define a VNx1TI | |
680 ;; and simply use vectors of bytes instead. | |
681 (define_insn "*sve_ld1rq<Vesize>" | |
682 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
683 (unspec:SVE_ALL | |
684 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
685 (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")] | |
686 UNSPEC_LD1RQ))] | |
687 "TARGET_SVE" | |
688 "ld1rq<Vesize>\t%0.<Vetype>, %1/z, %2" | |
689 ) | |
690 | |
691 ;; Implement a predicate broadcast by shifting the low bit of the scalar | |
692 ;; input into the top bit and using a WHILELO. An alternative would be to | |
693 ;; duplicate the input and do a compare with zero. | |
694 (define_expand "vec_duplicate<mode>" | |
695 [(set (match_operand:PRED_ALL 0 "register_operand") | |
696 (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))] | |
697 "TARGET_SVE" | |
698 { | |
699 rtx tmp = gen_reg_rtx (DImode); | |
700 rtx op1 = gen_lowpart (DImode, operands[1]); | |
701 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode))); | |
702 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp)); | |
703 DONE; | |
704 } | |
705 ) | |
706 | |
707 (define_insn "vec_series<mode>" | |
708 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w") | |
709 (vec_series:SVE_I | |
710 (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r") | |
711 (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))] | |
712 "TARGET_SVE" | |
713 "@ | |
714 index\t%0.<Vetype>, #%1, %<vw>2 | |
715 index\t%0.<Vetype>, %<vw>1, #%2 | |
716 index\t%0.<Vetype>, %<vw>1, %<vw>2" | |
717 ) | |
718 | |
719 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range | |
720 ;; of an INDEX instruction. | |
721 (define_insn "*vec_series<mode>_plus" | |
722 [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
723 (plus:SVE_I | |
724 (vec_duplicate:SVE_I | |
725 (match_operand:<VEL> 1 "register_operand" "r")) | |
726 (match_operand:SVE_I 2 "immediate_operand")))] | |
727 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])" | |
728 { | |
729 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]); | |
730 return "index\t%0.<Vetype>, %<vw>1, #%2"; | |
731 } | |
732 ) | |
733 | |
734 ;; Unpredicated LD[234]. | |
735 (define_expand "vec_load_lanes<mode><vsingle>" | |
736 [(set (match_operand:SVE_STRUCT 0 "register_operand") | |
737 (unspec:SVE_STRUCT | |
738 [(match_dup 2) | |
739 (match_operand:SVE_STRUCT 1 "memory_operand")] | |
740 UNSPEC_LDN))] | |
741 "TARGET_SVE" | |
742 { | |
743 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
744 } | |
745 ) | |
746 | |
747 ;; Predicated LD[234]. | |
748 (define_insn "vec_mask_load_lanes<mode><vsingle>" | |
749 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w") | |
750 (unspec:SVE_STRUCT | |
751 [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
752 (match_operand:SVE_STRUCT 1 "memory_operand" "m")] | |
753 UNSPEC_LDN))] | |
754 "TARGET_SVE" | |
755 "ld<vector_count><Vesize>\t%0, %2/z, %1" | |
756 ) | |
757 | |
758 ;; Unpredicated ST[234]. This is always a full update, so the dependence | |
759 ;; on the old value of the memory location (via (match_dup 0)) is redundant. | |
760 ;; There doesn't seem to be any obvious benefit to treating the all-true | |
761 ;; case differently though. In particular, it's very unlikely that we'll | |
762 ;; only find out during RTL that a store_lanes is dead. | |
763 (define_expand "vec_store_lanes<mode><vsingle>" | |
764 [(set (match_operand:SVE_STRUCT 0 "memory_operand") | |
765 (unspec:SVE_STRUCT | |
766 [(match_dup 2) | |
767 (match_operand:SVE_STRUCT 1 "register_operand") | |
768 (match_dup 0)] | |
769 UNSPEC_STN))] | |
770 "TARGET_SVE" | |
771 { | |
772 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
773 } | |
774 ) | |
775 | |
776 ;; Predicated ST[234]. | |
777 (define_insn "vec_mask_store_lanes<mode><vsingle>" | |
778 [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m") | |
779 (unspec:SVE_STRUCT | |
780 [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
781 (match_operand:SVE_STRUCT 1 "register_operand" "w") | |
782 (match_dup 0)] | |
783 UNSPEC_STN))] | |
784 "TARGET_SVE" | |
785 "st<vector_count><Vesize>\t%1, %2, %0" | |
786 ) | |
787 | |
788 (define_expand "vec_perm<mode>" | |
789 [(match_operand:SVE_ALL 0 "register_operand") | |
790 (match_operand:SVE_ALL 1 "register_operand") | |
791 (match_operand:SVE_ALL 2 "register_operand") | |
792 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")] | |
793 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()" | |
794 { | |
795 aarch64_expand_sve_vec_perm (operands[0], operands[1], | |
796 operands[2], operands[3]); | |
797 DONE; | |
798 } | |
799 ) | |
800 | |
801 (define_insn "*aarch64_sve_tbl<mode>" | |
802 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
803 (unspec:SVE_ALL | |
804 [(match_operand:SVE_ALL 1 "register_operand" "w") | |
805 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")] | |
806 UNSPEC_TBL))] | |
807 "TARGET_SVE" | |
808 "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
809 ) | |
810 | |
811 (define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>" | |
812 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
813 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa") | |
814 (match_operand:PRED_ALL 2 "register_operand" "Upa")] | |
815 PERMUTE))] | |
816 "TARGET_SVE" | |
817 "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
818 ) | |
819 | |
820 (define_insn "aarch64_sve_<perm_insn><perm_hilo><mode>" | |
821 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
822 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w") | |
823 (match_operand:SVE_ALL 2 "register_operand" "w")] | |
824 PERMUTE))] | |
825 "TARGET_SVE" | |
826 "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
827 ) | |
828 | |
829 (define_insn "*aarch64_sve_rev64<mode>" | |
830 [(set (match_operand:SVE_BHS 0 "register_operand" "=w") | |
831 (unspec:SVE_BHS | |
832 [(match_operand:VNx2BI 1 "register_operand" "Upl") | |
833 (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")] | |
834 UNSPEC_REV64)] | |
835 UNSPEC_MERGE_PTRUE))] | |
836 "TARGET_SVE" | |
837 "rev<Vesize>\t%0.d, %1/m, %2.d" | |
838 ) | |
839 | |
840 (define_insn "*aarch64_sve_rev32<mode>" | |
841 [(set (match_operand:SVE_BH 0 "register_operand" "=w") | |
842 (unspec:SVE_BH | |
843 [(match_operand:VNx4BI 1 "register_operand" "Upl") | |
844 (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")] | |
845 UNSPEC_REV32)] | |
846 UNSPEC_MERGE_PTRUE))] | |
847 "TARGET_SVE" | |
848 "rev<Vesize>\t%0.s, %1/m, %2.s" | |
849 ) | |
850 | |
851 (define_insn "*aarch64_sve_rev16vnx16qi" | |
852 [(set (match_operand:VNx16QI 0 "register_operand" "=w") | |
853 (unspec:VNx16QI | |
854 [(match_operand:VNx8BI 1 "register_operand" "Upl") | |
855 (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")] | |
856 UNSPEC_REV16)] | |
857 UNSPEC_MERGE_PTRUE))] | |
858 "TARGET_SVE" | |
859 "revb\t%0.h, %1/m, %2.h" | |
860 ) | |
861 | |
862 (define_insn "*aarch64_sve_rev<mode>" | |
863 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
864 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")] | |
865 UNSPEC_REV))] | |
866 "TARGET_SVE" | |
867 "rev\t%0.<Vetype>, %1.<Vetype>") | |
868 | |
869 (define_insn "*aarch64_sve_dup_lane<mode>" | |
870 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
871 (vec_duplicate:SVE_ALL | |
872 (vec_select:<VEL> | |
873 (match_operand:SVE_ALL 1 "register_operand" "w") | |
874 (parallel [(match_operand:SI 2 "const_int_operand")]))))] | |
875 "TARGET_SVE | |
876 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)" | |
877 "dup\t%0.<Vetype>, %1.<Vetype>[%2]" | |
878 ) | |
879 | |
880 ;; Note that the immediate (third) operand is the lane index not | |
881 ;; the byte index. | |
882 (define_insn "*aarch64_sve_ext<mode>" | |
883 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
884 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0") | |
885 (match_operand:SVE_ALL 2 "register_operand" "w") | |
886 (match_operand:SI 3 "const_int_operand")] | |
887 UNSPEC_EXT))] | |
888 "TARGET_SVE | |
889 && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)" | |
890 { | |
891 operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode)); | |
892 return "ext\\t%0.b, %0.b, %2.b, #%3"; | |
893 } | |
894 ) | |
895 | |
896 (define_insn "add<mode>3" | |
897 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w") | |
898 (plus:SVE_I | |
899 (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w") | |
900 (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, w")))] | |
901 "TARGET_SVE" | |
902 "@ | |
903 add\t%0.<Vetype>, %0.<Vetype>, #%D2 | |
904 sub\t%0.<Vetype>, %0.<Vetype>, #%N2 | |
905 * return aarch64_output_sve_inc_dec_immediate (\"%0.<Vetype>\", operands[2]); | |
906 add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
907 ) | |
908 | |
909 (define_insn "sub<mode>3" | |
910 [(set (match_operand:SVE_I 0 "register_operand" "=w, w") | |
911 (minus:SVE_I | |
912 (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa") | |
913 (match_operand:SVE_I 2 "register_operand" "w, 0")))] | |
914 "TARGET_SVE" | |
915 "@ | |
916 sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype> | |
917 subr\t%0.<Vetype>, %0.<Vetype>, #%D1" | |
918 ) | |
919 | |
920 ;; Unpredicated multiplication. | |
921 (define_expand "mul<mode>3" | |
922 [(set (match_operand:SVE_I 0 "register_operand") | |
923 (unspec:SVE_I | |
924 [(match_dup 3) | |
925 (mult:SVE_I | |
926 (match_operand:SVE_I 1 "register_operand") | |
927 (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))] | |
928 UNSPEC_MERGE_PTRUE))] | |
929 "TARGET_SVE" | |
930 { | |
931 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
932 } | |
933 ) | |
934 | |
935 ;; Multiplication predicated with a PTRUE. We don't actually need the | |
936 ;; predicate for the first alternative, but using Upa or X isn't likely | |
937 ;; to gain much and would make the instruction seem less uniform to the | |
938 ;; register allocator. | |
939 (define_insn "*mul<mode>3" | |
940 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") | |
941 (unspec:SVE_I | |
942 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
943 (mult:SVE_I | |
944 (match_operand:SVE_I 2 "register_operand" "%0, 0, w") | |
945 (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w, w"))] | |
946 UNSPEC_MERGE_PTRUE))] | |
947 "TARGET_SVE" | |
948 "@ | |
949 mul\t%0.<Vetype>, %0.<Vetype>, #%3 | |
950 mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
951 movprfx\t%0, %2\;mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
952 [(set_attr "movprfx" "*,*,yes")] | |
953 ) | |
954 | |
955 (define_insn "*madd<mode>" | |
956 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") | |
957 (plus:SVE_I | |
958 (unspec:SVE_I | |
959 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
960 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w") | |
961 (match_operand:SVE_I 3 "register_operand" "w, w, w"))] | |
962 UNSPEC_MERGE_PTRUE) | |
963 (match_operand:SVE_I 4 "register_operand" "w, 0, w")))] | |
964 "TARGET_SVE" | |
965 "@ | |
966 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
967 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
968 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
969 [(set_attr "movprfx" "*,*,yes")] | |
970 ) | |
971 | |
972 (define_insn "*msub<mode>3" | |
973 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") | |
974 (minus:SVE_I | |
975 (match_operand:SVE_I 4 "register_operand" "w, 0, w") | |
976 (unspec:SVE_I | |
977 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
978 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w") | |
979 (match_operand:SVE_I 3 "register_operand" "w, w, w"))] | |
980 UNSPEC_MERGE_PTRUE)))] | |
981 "TARGET_SVE" | |
982 "@ | |
983 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
984 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
985 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
986 [(set_attr "movprfx" "*,*,yes")] | |
987 ) | |
988 | |
989 ;; Unpredicated highpart multiplication. | |
990 (define_expand "<su>mul<mode>3_highpart" | |
991 [(set (match_operand:SVE_I 0 "register_operand") | |
992 (unspec:SVE_I | |
993 [(match_dup 3) | |
994 (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand") | |
995 (match_operand:SVE_I 2 "register_operand")] | |
996 MUL_HIGHPART)] | |
997 UNSPEC_MERGE_PTRUE))] | |
998 "TARGET_SVE" | |
999 { | |
1000 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1001 } | |
1002 ) | |
1003 | |
1004 ;; Predicated highpart multiplication. | |
1005 (define_insn "*<su>mul<mode>3_highpart" | |
1006 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
1007 (unspec:SVE_I | |
1008 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1009 (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0, w") | |
1010 (match_operand:SVE_I 3 "register_operand" "w, w")] | |
1011 MUL_HIGHPART)] | |
1012 UNSPEC_MERGE_PTRUE))] | |
1013 "TARGET_SVE" | |
1014 "@ | |
1015 <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1016 movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1017 [(set_attr "movprfx" "*,yes")] | |
1018 ) | |
1019 | |
1020 ;; Unpredicated division. | |
1021 (define_expand "<optab><mode>3" | |
1022 [(set (match_operand:SVE_SDI 0 "register_operand") | |
1023 (unspec:SVE_SDI | |
1024 [(match_dup 3) | |
1025 (SVE_INT_BINARY_SD:SVE_SDI | |
1026 (match_operand:SVE_SDI 1 "register_operand") | |
1027 (match_operand:SVE_SDI 2 "register_operand"))] | |
1028 UNSPEC_MERGE_PTRUE))] | |
1029 "TARGET_SVE" | |
1030 { | |
1031 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1032 } | |
1033 ) | |
1034 | |
1035 ;; Division predicated with a PTRUE. | |
1036 (define_insn "*<optab><mode>3" | |
1037 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w, ?&w") | |
1038 (unspec:SVE_SDI | |
1039 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
1040 (SVE_INT_BINARY_SD:SVE_SDI | |
1041 (match_operand:SVE_SDI 2 "register_operand" "0, w, w") | |
1042 (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0, w"))] | |
1043 UNSPEC_MERGE_PTRUE))] | |
1044 "TARGET_SVE" | |
1045 "@ | |
1046 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1047 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
1048 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1049 [(set_attr "movprfx" "*,*,yes")] | |
1050 ) | |
1051 | |
1052 ;; Unpredicated NEG, NOT and POPCOUNT. | |
1053 (define_expand "<optab><mode>2" | |
1054 [(set (match_operand:SVE_I 0 "register_operand") | |
1055 (unspec:SVE_I | |
1056 [(match_dup 2) | |
1057 (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))] | |
1058 UNSPEC_MERGE_PTRUE))] | |
1059 "TARGET_SVE" | |
1060 { | |
1061 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1062 } | |
1063 ) | |
1064 | |
1065 ;; NEG, NOT and POPCOUNT predicated with a PTRUE. | |
1066 (define_insn "*<optab><mode>2" | |
1067 [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1068 (unspec:SVE_I | |
1069 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1070 (SVE_INT_UNARY:SVE_I | |
1071 (match_operand:SVE_I 2 "register_operand" "w"))] | |
1072 UNSPEC_MERGE_PTRUE))] | |
1073 "TARGET_SVE" | |
1074 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
1075 ) | |
1076 | |
1077 ;; Vector AND, ORR and XOR. | |
1078 (define_insn "<optab><mode>3" | |
1079 [(set (match_operand:SVE_I 0 "register_operand" "=w, w") | |
1080 (LOGICAL:SVE_I | |
1081 (match_operand:SVE_I 1 "register_operand" "%0, w") | |
1082 (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))] | |
1083 "TARGET_SVE" | |
1084 "@ | |
1085 <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2 | |
1086 <logical>\t%0.d, %1.d, %2.d" | |
1087 ) | |
1088 | |
1089 ;; Vector AND, ORR and XOR on floating-point modes. We avoid subregs | |
1090 ;; by providing this, but we need to use UNSPECs since rtx logical ops | |
1091 ;; aren't defined for floating-point modes. | |
1092 (define_insn "*<optab><mode>3" | |
1093 [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
1094 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w") | |
1095 (match_operand:SVE_F 2 "register_operand" "w")] | |
1096 LOGICALF))] | |
1097 "TARGET_SVE" | |
1098 "<logicalf_op>\t%0.d, %1.d, %2.d" | |
1099 ) | |
1100 | |
1101 ;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate | |
1102 ;; this pattern even though the NOT instruction itself is predicated. | |
1103 (define_insn "bic<mode>3" | |
1104 [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1105 (and:SVE_I | |
1106 (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w")) | |
1107 (match_operand:SVE_I 2 "register_operand" "w")))] | |
1108 "TARGET_SVE" | |
1109 "bic\t%0.d, %2.d, %1.d" | |
1110 ) | |
1111 | |
1112 ;; Predicate AND. We can reuse one of the inputs as the GP. | |
1113 (define_insn "and<mode>3" | |
1114 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1115 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa") | |
1116 (match_operand:PRED_ALL 2 "register_operand" "Upa")))] | |
1117 "TARGET_SVE" | |
1118 "and\t%0.b, %1/z, %1.b, %2.b" | |
1119 ) | |
1120 | |
1121 ;; Unpredicated predicate ORR and XOR. | |
1122 (define_expand "<optab><mode>3" | |
1123 [(set (match_operand:PRED_ALL 0 "register_operand") | |
1124 (and:PRED_ALL | |
1125 (LOGICAL_OR:PRED_ALL | |
1126 (match_operand:PRED_ALL 1 "register_operand") | |
1127 (match_operand:PRED_ALL 2 "register_operand")) | |
1128 (match_dup 3)))] | |
1129 "TARGET_SVE" | |
1130 { | |
1131 operands[3] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode)); | |
1132 } | |
1133 ) | |
1134 | |
1135 ;; Predicated predicate ORR and XOR. | |
1136 (define_insn "pred_<optab><mode>3" | |
1137 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1138 (and:PRED_ALL | |
1139 (LOGICAL:PRED_ALL | |
1140 (match_operand:PRED_ALL 2 "register_operand" "Upa") | |
1141 (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
1142 (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
1143 "TARGET_SVE" | |
1144 "<logical>\t%0.b, %1/z, %2.b, %3.b" | |
1145 ) | |
1146 | |
1147 ;; Perform a logical operation on operands 2 and 3, using operand 1 as | |
1148 ;; the GP (which is known to be a PTRUE). Store the result in operand 0 | |
1149 ;; and set the flags in the same way as for PTEST. The (and ...) in the | |
1150 ;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested | |
1151 ;; value is structurally equivalent to rhs of the second set. | |
1152 (define_insn "*<optab><mode>3_cc" | |
1153 [(set (reg:CC CC_REGNUM) | |
1154 (compare:CC | |
1155 (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upa") | |
1156 (and:PRED_ALL | |
1157 (LOGICAL:PRED_ALL | |
1158 (match_operand:PRED_ALL 2 "register_operand" "Upa") | |
1159 (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
1160 (match_dup 1))] | |
1161 UNSPEC_PTEST_PTRUE) | |
1162 (const_int 0))) | |
1163 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1164 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3)) | |
1165 (match_dup 1)))] | |
1166 "TARGET_SVE" | |
1167 "<logical>s\t%0.b, %1/z, %2.b, %3.b" | |
1168 ) | |
1169 | |
1170 ;; Unpredicated predicate inverse. | |
1171 (define_expand "one_cmpl<mode>2" | |
1172 [(set (match_operand:PRED_ALL 0 "register_operand") | |
1173 (and:PRED_ALL | |
1174 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")) | |
1175 (match_dup 2)))] | |
1176 "TARGET_SVE" | |
1177 { | |
1178 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode)); | |
1179 } | |
1180 ) | |
1181 | |
1182 ;; Predicated predicate inverse. | |
1183 (define_insn "*one_cmpl<mode>3" | |
1184 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1185 (and:PRED_ALL | |
1186 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
1187 (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
1188 "TARGET_SVE" | |
1189 "not\t%0.b, %1/z, %2.b" | |
1190 ) | |
1191 | |
1192 ;; Predicated predicate BIC and ORN. | |
1193 (define_insn "*<nlogical><mode>3" | |
1194 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1195 (and:PRED_ALL | |
1196 (NLOGICAL:PRED_ALL | |
1197 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
1198 (match_operand:PRED_ALL 3 "register_operand" "Upa")) | |
1199 (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
1200 "TARGET_SVE" | |
1201 "<nlogical>\t%0.b, %1/z, %3.b, %2.b" | |
1202 ) | |
1203 | |
1204 ;; Predicated predicate NAND and NOR. | |
1205 (define_insn "*<logical_nn><mode>3" | |
1206 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1207 (and:PRED_ALL | |
1208 (NLOGICAL:PRED_ALL | |
1209 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) | |
1210 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))) | |
1211 (match_operand:PRED_ALL 1 "register_operand" "Upa")))] | |
1212 "TARGET_SVE" | |
1213 "<logical_nn>\t%0.b, %1/z, %2.b, %3.b" | |
1214 ) | |
1215 | |
1216 ;; Unpredicated LSL, LSR and ASR by a vector. | |
1217 (define_expand "v<optab><mode>3" | |
1218 [(set (match_operand:SVE_I 0 "register_operand") | |
1219 (unspec:SVE_I | |
1220 [(match_dup 3) | |
1221 (ASHIFT:SVE_I | |
1222 (match_operand:SVE_I 1 "register_operand") | |
1223 (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))] | |
1224 UNSPEC_MERGE_PTRUE))] | |
1225 "TARGET_SVE" | |
1226 { | |
1227 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1228 } | |
1229 ) | |
1230 | |
1231 ;; LSL, LSR and ASR by a vector, predicated with a PTRUE. We don't | |
1232 ;; actually need the predicate for the first alternative, but using Upa | |
1233 ;; or X isn't likely to gain much and would make the instruction seem | |
1234 ;; less uniform to the register allocator. | |
1235 (define_insn "*v<optab><mode>3" | |
1236 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") | |
1237 (unspec:SVE_I | |
1238 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
1239 (ASHIFT:SVE_I | |
1240 (match_operand:SVE_I 2 "register_operand" "w, 0, w") | |
1241 (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, w"))] | |
1242 UNSPEC_MERGE_PTRUE))] | |
1243 "TARGET_SVE" | |
1244 "@ | |
1245 <shift>\t%0.<Vetype>, %2.<Vetype>, #%3 | |
1246 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1247 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1248 [(set_attr "movprfx" "*,*,yes")] | |
1249 ) | |
1250 | |
1251 ;; LSL, LSR and ASR by a scalar, which expands into one of the vector | |
1252 ;; shifts above. | |
1253 (define_expand "<ASHIFT:optab><mode>3" | |
1254 [(set (match_operand:SVE_I 0 "register_operand") | |
1255 (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand") | |
1256 (match_operand:<VEL> 2 "general_operand")))] | |
1257 "TARGET_SVE" | |
1258 { | |
1259 rtx amount; | |
1260 if (CONST_INT_P (operands[2])) | |
1261 { | |
1262 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]); | |
1263 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode)) | |
1264 amount = force_reg (<MODE>mode, amount); | |
1265 } | |
1266 else | |
1267 { | |
1268 amount = gen_reg_rtx (<MODE>mode); | |
1269 emit_insn (gen_vec_duplicate<mode> (amount, | |
1270 convert_to_mode (<VEL>mode, | |
1271 operands[2], 0))); | |
1272 } | |
1273 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount)); | |
1274 DONE; | |
1275 } | |
1276 ) | |
1277 | |
1278 ;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE. | |
1279 ;; | |
1280 ;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP | |
1281 ;; is a PTRUE even if the optimizers haven't yet been able to propagate | |
1282 ;; the constant. We would use a separate unspec code for PTESTs involving | |
1283 ;; GPs that might not be PTRUEs. | |
1284 (define_insn "ptest_ptrue<mode>" | |
1285 [(set (reg:CC CC_REGNUM) | |
1286 (compare:CC | |
1287 (unspec:SI [(match_operand:PRED_ALL 0 "register_operand" "Upa") | |
1288 (match_operand:PRED_ALL 1 "register_operand" "Upa")] | |
1289 UNSPEC_PTEST_PTRUE) | |
1290 (const_int 0)))] | |
1291 "TARGET_SVE" | |
1292 "ptest\t%0, %1.b" | |
1293 ) | |
1294 | |
1295 ;; Set element I of the result if operand1 + J < operand2 for all J in [0, I]. | |
1296 ;; with the comparison being unsigned. | |
1297 (define_insn "while_ult<GPI:mode><PRED_ALL:mode>" | |
1298 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1299 (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") | |
1300 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] | |
1301 UNSPEC_WHILE_LO)) | |
1302 (clobber (reg:CC CC_REGNUM))] | |
1303 "TARGET_SVE" | |
1304 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" | |
1305 ) | |
1306 | |
1307 ;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP. | |
1308 ;; Handle the case in which both results are useful. The GP operand | |
1309 ;; to the PTEST isn't needed, so we allow it to be anything. | |
1310 (define_insn_and_split "while_ult<GPI:mode><PRED_ALL:mode>_cc" | |
1311 [(set (reg:CC CC_REGNUM) | |
1312 (compare:CC | |
1313 (unspec:SI [(match_operand:PRED_ALL 1) | |
1314 (unspec:PRED_ALL | |
1315 [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ") | |
1316 (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")] | |
1317 UNSPEC_WHILE_LO)] | |
1318 UNSPEC_PTEST_PTRUE) | |
1319 (const_int 0))) | |
1320 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") | |
1321 (unspec:PRED_ALL [(match_dup 2) | |
1322 (match_dup 3)] | |
1323 UNSPEC_WHILE_LO))] | |
1324 "TARGET_SVE" | |
1325 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3" | |
1326 ;; Force the compiler to drop the unused predicate operand, so that we | |
1327 ;; don't have an unnecessary PTRUE. | |
1328 "&& !CONSTANT_P (operands[1])" | |
1329 [(const_int 0)] | |
1330 { | |
1331 emit_insn (gen_while_ult<GPI:mode><PRED_ALL:mode>_cc | |
1332 (operands[0], CONSTM1_RTX (<MODE>mode), | |
1333 operands[2], operands[3])); | |
1334 DONE; | |
1335 } | |
1336 ) | |
1337 | |
1338 ;; Integer comparisons predicated with a PTRUE. | |
1339 (define_insn "*cmp<cmp_op><mode>" | |
1340 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1341 (unspec:<VPRED> | |
1342 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1343 (SVE_INT_CMP:<VPRED> | |
1344 (match_operand:SVE_I 2 "register_operand" "w, w") | |
1345 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
1346 UNSPEC_MERGE_PTRUE)) | |
1347 (clobber (reg:CC CC_REGNUM))] | |
1348 "TARGET_SVE" | |
1349 "@ | |
1350 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 | |
1351 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1352 ) | |
1353 | |
1354 ;; Integer comparisons predicated with a PTRUE in which only the flags result | |
1355 ;; is interesting. | |
1356 (define_insn "*cmp<cmp_op><mode>_ptest" | |
1357 [(set (reg:CC CC_REGNUM) | |
1358 (compare:CC | |
1359 (unspec:SI | |
1360 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1361 (unspec:<VPRED> | |
1362 [(match_dup 1) | |
1363 (SVE_INT_CMP:<VPRED> | |
1364 (match_operand:SVE_I 2 "register_operand" "w, w") | |
1365 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
1366 UNSPEC_MERGE_PTRUE)] | |
1367 UNSPEC_PTEST_PTRUE) | |
1368 (const_int 0))) | |
1369 (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))] | |
1370 "TARGET_SVE" | |
1371 "@ | |
1372 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 | |
1373 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1374 ) | |
1375 | |
1376 ;; Integer comparisons predicated with a PTRUE in which both the flag and | |
1377 ;; predicate results are interesting. | |
1378 (define_insn "*cmp<cmp_op><mode>_cc" | |
1379 [(set (reg:CC CC_REGNUM) | |
1380 (compare:CC | |
1381 (unspec:SI | |
1382 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1383 (unspec:<VPRED> | |
1384 [(match_dup 1) | |
1385 (SVE_INT_CMP:<VPRED> | |
1386 (match_operand:SVE_I 2 "register_operand" "w, w") | |
1387 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
1388 UNSPEC_MERGE_PTRUE)] | |
1389 UNSPEC_PTEST_PTRUE) | |
1390 (const_int 0))) | |
1391 (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1392 (unspec:<VPRED> | |
1393 [(match_dup 1) | |
1394 (SVE_INT_CMP:<VPRED> | |
1395 (match_dup 2) | |
1396 (match_dup 3))] | |
1397 UNSPEC_MERGE_PTRUE))] | |
1398 "TARGET_SVE" | |
1399 "@ | |
1400 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 | |
1401 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1402 ) | |
1403 | |
1404 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated | |
1405 ;; comparison with an AND. Split the instruction into its preferred form | |
1406 ;; (below) at the earliest opportunity, in order to get rid of the | |
1407 ;; redundant operand 1. | |
1408 (define_insn_and_split "*pred_cmp<cmp_op><mode>_combine" | |
1409 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1410 (and:<VPRED> | |
1411 (unspec:<VPRED> | |
1412 [(match_operand:<VPRED> 1) | |
1413 (SVE_INT_CMP:<VPRED> | |
1414 (match_operand:SVE_I 2 "register_operand" "w, w") | |
1415 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] | |
1416 UNSPEC_MERGE_PTRUE) | |
1417 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl"))) | |
1418 (clobber (reg:CC CC_REGNUM))] | |
1419 "TARGET_SVE" | |
1420 "#" | |
1421 "&& 1" | |
1422 [(parallel | |
1423 [(set (match_dup 0) | |
1424 (and:<VPRED> | |
1425 (SVE_INT_CMP:<VPRED> | |
1426 (match_dup 2) | |
1427 (match_dup 3)) | |
1428 (match_dup 4))) | |
1429 (clobber (reg:CC CC_REGNUM))])] | |
1430 ) | |
1431 | |
1432 ;; Predicated integer comparisons. | |
1433 (define_insn "*pred_cmp<cmp_op><mode>" | |
1434 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1435 (and:<VPRED> | |
1436 (SVE_INT_CMP:<VPRED> | |
1437 (match_operand:SVE_I 2 "register_operand" "w, w") | |
1438 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w")) | |
1439 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl"))) | |
1440 (clobber (reg:CC CC_REGNUM))] | |
1441 "TARGET_SVE" | |
1442 "@ | |
1443 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 | |
1444 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1445 ) | |
1446 | |
1447 ;; Floating-point comparisons predicated with a PTRUE. | |
1448 (define_insn "*fcm<cmp_op><mode>" | |
1449 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1450 (unspec:<VPRED> | |
1451 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1452 (SVE_FP_CMP:<VPRED> | |
1453 (match_operand:SVE_F 2 "register_operand" "w, w") | |
1454 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))] | |
1455 UNSPEC_MERGE_PTRUE))] | |
1456 "TARGET_SVE" | |
1457 "@ | |
1458 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0 | |
1459 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1460 ) | |
1461 | |
1462 (define_insn "*fcmuo<mode>" | |
1463 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
1464 (unspec:<VPRED> | |
1465 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1466 (unordered:<VPRED> | |
1467 (match_operand:SVE_F 2 "register_operand" "w") | |
1468 (match_operand:SVE_F 3 "register_operand" "w"))] | |
1469 UNSPEC_MERGE_PTRUE))] | |
1470 "TARGET_SVE" | |
1471 "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1472 ) | |
1473 | |
1474 ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed | |
1475 ;; with another predicate P. This does not have the same trapping behavior | |
1476 ;; as predicating the comparison itself on P, but it's a legitimate fold, | |
1477 ;; since we can drop any potentially-trapping operations whose results | |
1478 ;; are not needed. | |
1479 ;; | |
1480 ;; Split the instruction into its preferred form (below) at the earliest | |
1481 ;; opportunity, in order to get rid of the redundant operand 1. | |
1482 (define_insn_and_split "*fcm<cmp_op><mode>_and_combine" | |
1483 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1484 (and:<VPRED> | |
1485 (unspec:<VPRED> | |
1486 [(match_operand:<VPRED> 1) | |
1487 (SVE_FP_CMP | |
1488 (match_operand:SVE_F 2 "register_operand" "w, w") | |
1489 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))] | |
1490 UNSPEC_MERGE_PTRUE) | |
1491 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))] | |
1492 "TARGET_SVE" | |
1493 "#" | |
1494 "&& 1" | |
1495 [(set (match_dup 0) | |
1496 (and:<VPRED> | |
1497 (SVE_FP_CMP:<VPRED> | |
1498 (match_dup 2) | |
1499 (match_dup 3)) | |
1500 (match_dup 4)))] | |
1501 ) | |
1502 | |
1503 (define_insn_and_split "*fcmuo<mode>_and_combine" | |
1504 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
1505 (and:<VPRED> | |
1506 (unspec:<VPRED> | |
1507 [(match_operand:<VPRED> 1) | |
1508 (unordered | |
1509 (match_operand:SVE_F 2 "register_operand" "w") | |
1510 (match_operand:SVE_F 3 "register_operand" "w"))] | |
1511 UNSPEC_MERGE_PTRUE) | |
1512 (match_operand:<VPRED> 4 "register_operand" "Upl")))] | |
1513 "TARGET_SVE" | |
1514 "#" | |
1515 "&& 1" | |
1516 [(set (match_dup 0) | |
1517 (and:<VPRED> | |
1518 (unordered:<VPRED> | |
1519 (match_dup 2) | |
1520 (match_dup 3)) | |
1521 (match_dup 4)))] | |
1522 ) | |
1523 | |
1524 ;; Unpredicated floating-point comparisons, with the results ANDed | |
1525 ;; with another predicate. This is a valid fold for the same reasons | |
1526 ;; as above. | |
1527 (define_insn "*fcm<cmp_op><mode>_and" | |
1528 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1529 (and:<VPRED> | |
1530 (SVE_FP_CMP:<VPRED> | |
1531 (match_operand:SVE_F 2 "register_operand" "w, w") | |
1532 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")) | |
1533 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))] | |
1534 "TARGET_SVE" | |
1535 "@ | |
1536 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0 | |
1537 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1538 ) | |
1539 | |
1540 (define_insn "*fcmuo<mode>_and" | |
1541 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") | |
1542 (and:<VPRED> | |
1543 (unordered:<VPRED> | |
1544 (match_operand:SVE_F 2 "register_operand" "w") | |
1545 (match_operand:SVE_F 3 "register_operand" "w")) | |
1546 (match_operand:<VPRED> 1 "register_operand" "Upl")))] | |
1547 "TARGET_SVE" | |
1548 "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1549 ) | |
1550 | |
1551 ;; Predicated floating-point comparisons. We don't need a version | |
1552 ;; of this for unordered comparisons. | |
1553 (define_insn "*pred_fcm<cmp_op><mode>" | |
1554 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") | |
1555 (unspec:<VPRED> | |
1556 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1557 (match_operand:SVE_F 2 "register_operand" "w, w") | |
1558 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] | |
1559 SVE_COND_FP_CMP))] | |
1560 "TARGET_SVE" | |
1561 "@ | |
1562 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0 | |
1563 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" | |
1564 ) | |
1565 | |
1566 ;; vcond_mask operand order: true, false, mask | |
1567 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR) | |
1568 ;; SEL operand order: mask, true, false | |
1569 (define_insn "vcond_mask_<mode><vpred>" | |
1570 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") | |
1571 (unspec:SVE_ALL | |
1572 [(match_operand:<VPRED> 3 "register_operand" "Upa") | |
1573 (match_operand:SVE_ALL 1 "register_operand" "w") | |
1574 (match_operand:SVE_ALL 2 "register_operand" "w")] | |
1575 UNSPEC_SEL))] | |
1576 "TARGET_SVE" | |
1577 "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>" | |
1578 ) | |
1579 | |
1580 ;; Selects between a duplicated immediate and zero. | |
1581 (define_insn "aarch64_sve_dup<mode>_const" | |
1582 [(set (match_operand:SVE_I 0 "register_operand" "=w") | |
1583 (unspec:SVE_I | |
1584 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1585 (match_operand:SVE_I 2 "aarch64_sve_dup_immediate") | |
1586 (match_operand:SVE_I 3 "aarch64_simd_imm_zero")] | |
1587 UNSPEC_SEL))] | |
1588 "TARGET_SVE" | |
1589 "mov\t%0.<Vetype>, %1/z, #%2" | |
1590 ) | |
1591 | |
1592 ;; Integer (signed) vcond. Don't enforce an immediate range here, since it | |
1593 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. | |
1594 (define_expand "vcond<mode><v_int_equiv>" | |
1595 [(set (match_operand:SVE_ALL 0 "register_operand") | |
1596 (if_then_else:SVE_ALL | |
1597 (match_operator 3 "comparison_operator" | |
1598 [(match_operand:<V_INT_EQUIV> 4 "register_operand") | |
1599 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")]) | |
1600 (match_operand:SVE_ALL 1 "register_operand") | |
1601 (match_operand:SVE_ALL 2 "register_operand")))] | |
1602 "TARGET_SVE" | |
1603 { | |
1604 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands); | |
1605 DONE; | |
1606 } | |
1607 ) | |
1608 | |
1609 ;; Integer vcondu. Don't enforce an immediate range here, since it | |
1610 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. | |
1611 (define_expand "vcondu<mode><v_int_equiv>" | |
1612 [(set (match_operand:SVE_ALL 0 "register_operand") | |
1613 (if_then_else:SVE_ALL | |
1614 (match_operator 3 "comparison_operator" | |
1615 [(match_operand:<V_INT_EQUIV> 4 "register_operand") | |
1616 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")]) | |
1617 (match_operand:SVE_ALL 1 "register_operand") | |
1618 (match_operand:SVE_ALL 2 "register_operand")))] | |
1619 "TARGET_SVE" | |
1620 { | |
1621 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands); | |
1622 DONE; | |
1623 } | |
1624 ) | |
1625 | |
1626 ;; Floating-point vcond. All comparisons except FCMUO allow a zero | |
1627 ;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO | |
1628 ;; with zero. | |
1629 (define_expand "vcond<mode><v_fp_equiv>" | |
1630 [(set (match_operand:SVE_SD 0 "register_operand") | |
1631 (if_then_else:SVE_SD | |
1632 (match_operator 3 "comparison_operator" | |
1633 [(match_operand:<V_FP_EQUIV> 4 "register_operand") | |
1634 (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")]) | |
1635 (match_operand:SVE_SD 1 "register_operand") | |
1636 (match_operand:SVE_SD 2 "register_operand")))] | |
1637 "TARGET_SVE" | |
1638 { | |
1639 aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands); | |
1640 DONE; | |
1641 } | |
1642 ) | |
1643 | |
1644 ;; Signed integer comparisons. Don't enforce an immediate range here, since | |
1645 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int | |
1646 ;; instead. | |
1647 (define_expand "vec_cmp<mode><vpred>" | |
1648 [(parallel | |
1649 [(set (match_operand:<VPRED> 0 "register_operand") | |
1650 (match_operator:<VPRED> 1 "comparison_operator" | |
1651 [(match_operand:SVE_I 2 "register_operand") | |
1652 (match_operand:SVE_I 3 "nonmemory_operand")])) | |
1653 (clobber (reg:CC CC_REGNUM))])] | |
1654 "TARGET_SVE" | |
1655 { | |
1656 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), | |
1657 operands[2], operands[3]); | |
1658 DONE; | |
1659 } | |
1660 ) | |
1661 | |
1662 ;; Unsigned integer comparisons. Don't enforce an immediate range here, since | |
1663 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int | |
1664 ;; instead. | |
1665 (define_expand "vec_cmpu<mode><vpred>" | |
1666 [(parallel | |
1667 [(set (match_operand:<VPRED> 0 "register_operand") | |
1668 (match_operator:<VPRED> 1 "comparison_operator" | |
1669 [(match_operand:SVE_I 2 "register_operand") | |
1670 (match_operand:SVE_I 3 "nonmemory_operand")])) | |
1671 (clobber (reg:CC CC_REGNUM))])] | |
1672 "TARGET_SVE" | |
1673 { | |
1674 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), | |
1675 operands[2], operands[3]); | |
1676 DONE; | |
1677 } | |
1678 ) | |
1679 | |
1680 ;; Floating-point comparisons. All comparisons except FCMUO allow a zero | |
1681 ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO | |
1682 ;; with zero. | |
1683 (define_expand "vec_cmp<mode><vpred>" | |
1684 [(set (match_operand:<VPRED> 0 "register_operand") | |
1685 (match_operator:<VPRED> 1 "comparison_operator" | |
1686 [(match_operand:SVE_F 2 "register_operand") | |
1687 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))] | |
1688 "TARGET_SVE" | |
1689 { | |
1690 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]), | |
1691 operands[2], operands[3], false); | |
1692 DONE; | |
1693 } | |
1694 ) | |
1695 | |
1696 ;; Branch based on predicate equality or inequality. | |
1697 (define_expand "cbranch<mode>4" | |
1698 [(set (pc) | |
1699 (if_then_else | |
1700 (match_operator 0 "aarch64_equality_operator" | |
1701 [(match_operand:PRED_ALL 1 "register_operand") | |
1702 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")]) | |
1703 (label_ref (match_operand 3 "")) | |
1704 (pc)))] | |
1705 "" | |
1706 { | |
1707 rtx ptrue = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode)); | |
1708 rtx pred; | |
1709 if (operands[2] == CONST0_RTX (<MODE>mode)) | |
1710 pred = operands[1]; | |
1711 else | |
1712 { | |
1713 pred = gen_reg_rtx (<MODE>mode); | |
1714 emit_insn (gen_pred_xor<mode>3 (pred, ptrue, operands[1], | |
1715 operands[2])); | |
1716 } | |
1717 emit_insn (gen_ptest_ptrue<mode> (ptrue, pred)); | |
1718 operands[1] = gen_rtx_REG (CCmode, CC_REGNUM); | |
1719 operands[2] = const0_rtx; | |
1720 } | |
1721 ) | |
1722 | |
1723 ;; Unpredicated integer MIN/MAX. | |
1724 (define_expand "<su><maxmin><mode>3" | |
1725 [(set (match_operand:SVE_I 0 "register_operand") | |
1726 (unspec:SVE_I | |
1727 [(match_dup 3) | |
1728 (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand") | |
1729 (match_operand:SVE_I 2 "register_operand"))] | |
1730 UNSPEC_MERGE_PTRUE))] | |
1731 "TARGET_SVE" | |
1732 { | |
1733 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1734 } | |
1735 ) | |
1736 | |
1737 ;; Integer MIN/MAX predicated with a PTRUE. | |
1738 (define_insn "*<su><maxmin><mode>3" | |
1739 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
1740 (unspec:SVE_I | |
1741 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1742 (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w") | |
1743 (match_operand:SVE_I 3 "register_operand" "w, w"))] | |
1744 UNSPEC_MERGE_PTRUE))] | |
1745 "TARGET_SVE" | |
1746 "@ | |
1747 <su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1748 movprfx\t%0, %2\;<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1749 [(set_attr "movprfx" "*,yes")] | |
1750 ) | |
1751 | |
1752 ;; Unpredicated floating-point MIN/MAX. | |
1753 (define_expand "<su><maxmin><mode>3" | |
1754 [(set (match_operand:SVE_F 0 "register_operand") | |
1755 (unspec:SVE_F | |
1756 [(match_dup 3) | |
1757 (FMAXMIN:SVE_F (match_operand:SVE_F 1 "register_operand") | |
1758 (match_operand:SVE_F 2 "register_operand"))] | |
1759 UNSPEC_MERGE_PTRUE))] | |
1760 "TARGET_SVE" | |
1761 { | |
1762 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1763 } | |
1764 ) | |
1765 | |
1766 ;; Floating-point MIN/MAX predicated with a PTRUE. | |
1767 (define_insn "*<su><maxmin><mode>3" | |
1768 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
1769 (unspec:SVE_F | |
1770 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1771 (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0, w") | |
1772 (match_operand:SVE_F 3 "register_operand" "w, w"))] | |
1773 UNSPEC_MERGE_PTRUE))] | |
1774 "TARGET_SVE" | |
1775 "@ | |
1776 f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1777 movprfx\t%0, %2\;f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1778 [(set_attr "movprfx" "*,yes")] | |
1779 ) | |
1780 | |
1781 ;; Unpredicated fmin/fmax. | |
1782 (define_expand "<maxmin_uns><mode>3" | |
1783 [(set (match_operand:SVE_F 0 "register_operand") | |
1784 (unspec:SVE_F | |
1785 [(match_dup 3) | |
1786 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand") | |
1787 (match_operand:SVE_F 2 "register_operand")] | |
1788 FMAXMIN_UNS)] | |
1789 UNSPEC_MERGE_PTRUE))] | |
1790 "TARGET_SVE" | |
1791 { | |
1792 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
1793 } | |
1794 ) | |
1795 | |
1796 ;; fmin/fmax predicated with a PTRUE. | |
1797 (define_insn "*<maxmin_uns><mode>3" | |
1798 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
1799 (unspec:SVE_F | |
1800 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1801 (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0, w") | |
1802 (match_operand:SVE_F 3 "register_operand" "w, w")] | |
1803 FMAXMIN_UNS)] | |
1804 UNSPEC_MERGE_PTRUE))] | |
1805 "TARGET_SVE" | |
1806 "@ | |
1807 <maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1808 movprfx\t%0, %2\;<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1809 [(set_attr "movprfx" "*,yes")] | |
1810 ) | |
1811 | |
1812 ;; Predicated integer operations with select. | |
1813 (define_expand "cond_<optab><mode>" | |
1814 [(set (match_operand:SVE_I 0 "register_operand") | |
1815 (unspec:SVE_I | |
1816 [(match_operand:<VPRED> 1 "register_operand") | |
1817 (SVE_INT_BINARY:SVE_I | |
1818 (match_operand:SVE_I 2 "register_operand") | |
1819 (match_operand:SVE_I 3 "register_operand")) | |
1820 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")] | |
1821 UNSPEC_SEL))] | |
1822 "TARGET_SVE" | |
1823 ) | |
1824 | |
1825 (define_expand "cond_<optab><mode>" | |
1826 [(set (match_operand:SVE_SDI 0 "register_operand") | |
1827 (unspec:SVE_SDI | |
1828 [(match_operand:<VPRED> 1 "register_operand") | |
1829 (SVE_INT_BINARY_SD:SVE_SDI | |
1830 (match_operand:SVE_SDI 2 "register_operand") | |
1831 (match_operand:SVE_SDI 3 "register_operand")) | |
1832 (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")] | |
1833 UNSPEC_SEL))] | |
1834 "TARGET_SVE" | |
1835 ) | |
1836 | |
1837 ;; Predicated integer operations with select matching the output operand. | |
1838 (define_insn "*cond_<optab><mode>_0" | |
1839 [(set (match_operand:SVE_I 0 "register_operand" "+w, w, ?&w") | |
1840 (unspec:SVE_I | |
1841 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
1842 (SVE_INT_BINARY:SVE_I | |
1843 (match_operand:SVE_I 2 "register_operand" "0, w, w") | |
1844 (match_operand:SVE_I 3 "register_operand" "w, 0, w")) | |
1845 (match_dup 0)] | |
1846 UNSPEC_SEL))] | |
1847 "TARGET_SVE" | |
1848 "@ | |
1849 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1850 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
1851 movprfx\t%0, %1/m, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1852 [(set_attr "movprfx" "*,*,yes")] | |
1853 ) | |
1854 | |
1855 (define_insn "*cond_<optab><mode>_0" | |
1856 [(set (match_operand:SVE_SDI 0 "register_operand" "+w, w, ?&w") | |
1857 (unspec:SVE_SDI | |
1858 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
1859 (SVE_INT_BINARY_SD:SVE_SDI | |
1860 (match_operand:SVE_SDI 2 "register_operand" "0, w, w") | |
1861 (match_operand:SVE_SDI 3 "register_operand" "w, 0, w")) | |
1862 (match_dup 0)] | |
1863 UNSPEC_SEL))] | |
1864 "TARGET_SVE" | |
1865 "@ | |
1866 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1867 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
1868 movprfx\t%0, %1/m, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1869 [(set_attr "movprfx" "*,*,yes")] | |
1870 ) | |
1871 | |
1872 ;; Predicated integer operations with select matching the first operand. | |
1873 (define_insn "*cond_<optab><mode>_2" | |
1874 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
1875 (unspec:SVE_I | |
1876 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1877 (SVE_INT_BINARY:SVE_I | |
1878 (match_operand:SVE_I 2 "register_operand" "0, w") | |
1879 (match_operand:SVE_I 3 "register_operand" "w, w")) | |
1880 (match_dup 2)] | |
1881 UNSPEC_SEL))] | |
1882 "TARGET_SVE" | |
1883 "@ | |
1884 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1885 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1886 [(set_attr "movprfx" "*,yes")] | |
1887 ) | |
1888 | |
1889 (define_insn "*cond_<optab><mode>_2" | |
1890 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w") | |
1891 (unspec:SVE_SDI | |
1892 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1893 (SVE_INT_BINARY_SD:SVE_SDI | |
1894 (match_operand:SVE_SDI 2 "register_operand" "0, w") | |
1895 (match_operand:SVE_SDI 3 "register_operand" "w, w")) | |
1896 (match_dup 2)] | |
1897 UNSPEC_SEL))] | |
1898 "TARGET_SVE" | |
1899 "@ | |
1900 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
1901 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1902 [(set_attr "movprfx" "*,yes")] | |
1903 ) | |
1904 | |
1905 ;; Predicated integer operations with select matching the second operand. | |
1906 (define_insn "*cond_<optab><mode>_3" | |
1907 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") | |
1908 (unspec:SVE_I | |
1909 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1910 (SVE_INT_BINARY:SVE_I | |
1911 (match_operand:SVE_I 2 "register_operand" "w, w") | |
1912 (match_operand:SVE_I 3 "register_operand" "0, w")) | |
1913 (match_dup 3)] | |
1914 UNSPEC_SEL))] | |
1915 "TARGET_SVE" | |
1916 "@ | |
1917 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
1918 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
1919 [(set_attr "movprfx" "*,yes")] | |
1920 ) | |
1921 | |
1922 (define_insn "*cond_<optab><mode>_3" | |
1923 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w") | |
1924 (unspec:SVE_SDI | |
1925 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
1926 (SVE_INT_BINARY_SD:SVE_SDI | |
1927 (match_operand:SVE_SDI 2 "register_operand" "w, w") | |
1928 (match_operand:SVE_SDI 3 "register_operand" "0, w")) | |
1929 (match_dup 3)] | |
1930 UNSPEC_SEL))] | |
1931 "TARGET_SVE" | |
1932 "@ | |
1933 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
1934 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
1935 [(set_attr "movprfx" "*,yes")] | |
1936 ) | |
1937 | |
1938 ;; Predicated integer operations with select matching zero. | |
1939 (define_insn "*cond_<optab><mode>_z" | |
1940 [(set (match_operand:SVE_I 0 "register_operand" "=&w") | |
1941 (unspec:SVE_I | |
1942 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1943 (SVE_INT_BINARY:SVE_I | |
1944 (match_operand:SVE_I 2 "register_operand" "w") | |
1945 (match_operand:SVE_I 3 "register_operand" "w")) | |
1946 (match_operand:SVE_I 4 "aarch64_simd_imm_zero")] | |
1947 UNSPEC_SEL))] | |
1948 "TARGET_SVE" | |
1949 "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1950 [(set_attr "movprfx" "yes")] | |
1951 ) | |
1952 | |
1953 (define_insn "*cond_<optab><mode>_z" | |
1954 [(set (match_operand:SVE_SDI 0 "register_operand" "=&w") | |
1955 (unspec:SVE_SDI | |
1956 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1957 (SVE_INT_BINARY_SD:SVE_SDI | |
1958 (match_operand:SVE_SDI 2 "register_operand" "w") | |
1959 (match_operand:SVE_SDI 3 "register_operand" "w")) | |
1960 (match_operand:SVE_SDI 4 "aarch64_simd_imm_zero")] | |
1961 UNSPEC_SEL))] | |
1962 "TARGET_SVE" | |
1963 "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
1964 [(set_attr "movprfx" "yes")] | |
1965 ) | |
1966 | |
1967 ;; Synthetic predications with select unmatched. | |
1968 (define_insn "*cond_<optab><mode>_any" | |
1969 [(set (match_operand:SVE_I 0 "register_operand" "=&w") | |
1970 (unspec:SVE_I | |
1971 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1972 (SVE_INT_BINARY:SVE_I | |
1973 (match_operand:SVE_I 2 "register_operand" "w") | |
1974 (match_operand:SVE_I 3 "register_operand" "w")) | |
1975 (match_operand:SVE_I 4 "register_operand" "w")] | |
1976 UNSPEC_SEL))] | |
1977 "TARGET_SVE" | |
1978 "#" | |
1979 ) | |
1980 | |
1981 (define_insn "*cond_<optab><mode>_any" | |
1982 [(set (match_operand:SVE_SDI 0 "register_operand" "=&w") | |
1983 (unspec:SVE_SDI | |
1984 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
1985 (SVE_INT_BINARY_SD:SVE_I | |
1986 (match_operand:SVE_SDI 2 "register_operand" "w") | |
1987 (match_operand:SVE_SDI 3 "register_operand" "w")) | |
1988 (match_operand:SVE_SDI 4 "register_operand" "w")] | |
1989 UNSPEC_SEL))] | |
1990 "TARGET_SVE" | |
1991 "#" | |
1992 ) | |
1993 | |
1994 (define_split | |
1995 [(set (match_operand:SVE_I 0 "register_operand") | |
1996 (unspec:SVE_I | |
1997 [(match_operand:<VPRED> 1 "register_operand") | |
1998 (match_operator:SVE_I 5 "aarch64_sve_any_binary_operator" | |
1999 [(match_operand:SVE_I 2 "register_operand") | |
2000 (match_operand:SVE_I 3 "register_operand")]) | |
2001 (match_operand:SVE_I 4 "register_operand")] | |
2002 UNSPEC_SEL))] | |
2003 "TARGET_SVE && reload_completed | |
2004 && !(rtx_equal_p (operands[0], operands[4]) | |
2005 || rtx_equal_p (operands[2], operands[4]) | |
2006 || rtx_equal_p (operands[3], operands[4]))" | |
2007 ; Not matchable by any one insn or movprfx insn. We need a separate select. | |
2008 [(set (match_dup 0) | |
2009 (unspec:SVE_I [(match_dup 1) (match_dup 2) (match_dup 4)] | |
2010 UNSPEC_SEL)) | |
2011 (set (match_dup 0) | |
2012 (unspec:SVE_I | |
2013 [(match_dup 1) | |
2014 (match_op_dup 5 [(match_dup 0) (match_dup 3)]) | |
2015 (match_dup 0)] | |
2016 UNSPEC_SEL))] | |
2017 ) | |
2018 | |
2019 ;; Set operand 0 to the last active element in operand 3, or to tied | |
2020 ;; operand 1 if no elements are active. | |
2021 (define_insn "fold_extract_last_<mode>" | |
2022 [(set (match_operand:<VEL> 0 "register_operand" "=r, w") | |
2023 (unspec:<VEL> | |
2024 [(match_operand:<VEL> 1 "register_operand" "0, 0") | |
2025 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl") | |
2026 (match_operand:SVE_ALL 3 "register_operand" "w, w")] | |
2027 UNSPEC_CLASTB))] | |
2028 "TARGET_SVE" | |
2029 "@ | |
2030 clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype> | |
2031 clastb\t%<vw>0, %2, %<vw>0, %3.<Vetype>" | |
2032 ) | |
2033 | |
2034 ;; Unpredicated integer add reduction. | |
2035 (define_expand "reduc_plus_scal_<mode>" | |
2036 [(set (match_operand:<VEL> 0 "register_operand") | |
2037 (unspec:<VEL> [(match_dup 2) | |
2038 (match_operand:SVE_I 1 "register_operand")] | |
2039 UNSPEC_ADDV))] | |
2040 "TARGET_SVE" | |
2041 { | |
2042 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2043 } | |
2044 ) | |
2045 | |
2046 ;; Predicated integer add reduction. The result is always 64-bits. | |
2047 (define_insn "*reduc_plus_scal_<mode>" | |
2048 [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2049 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2050 (match_operand:SVE_I 2 "register_operand" "w")] | |
2051 UNSPEC_ADDV))] | |
2052 "TARGET_SVE" | |
2053 "uaddv\t%d0, %1, %2.<Vetype>" | |
2054 ) | |
2055 | |
2056 ;; Unpredicated floating-point add reduction. | |
2057 (define_expand "reduc_plus_scal_<mode>" | |
2058 [(set (match_operand:<VEL> 0 "register_operand") | |
2059 (unspec:<VEL> [(match_dup 2) | |
2060 (match_operand:SVE_F 1 "register_operand")] | |
2061 UNSPEC_FADDV))] | |
2062 "TARGET_SVE" | |
2063 { | |
2064 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2065 } | |
2066 ) | |
2067 | |
2068 ;; Predicated floating-point add reduction. | |
2069 (define_insn "*reduc_plus_scal_<mode>" | |
2070 [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2071 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2072 (match_operand:SVE_F 2 "register_operand" "w")] | |
2073 UNSPEC_FADDV))] | |
2074 "TARGET_SVE" | |
2075 "faddv\t%<Vetype>0, %1, %2.<Vetype>" | |
2076 ) | |
2077 | |
2078 ;; Unpredicated integer MIN/MAX reduction. | |
2079 (define_expand "reduc_<maxmin_uns>_scal_<mode>" | |
2080 [(set (match_operand:<VEL> 0 "register_operand") | |
2081 (unspec:<VEL> [(match_dup 2) | |
2082 (match_operand:SVE_I 1 "register_operand")] | |
2083 MAXMINV))] | |
2084 "TARGET_SVE" | |
2085 { | |
2086 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2087 } | |
2088 ) | |
2089 | |
2090 ;; Predicated integer MIN/MAX reduction. | |
2091 (define_insn "*reduc_<maxmin_uns>_scal_<mode>" | |
2092 [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2093 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2094 (match_operand:SVE_I 2 "register_operand" "w")] | |
2095 MAXMINV))] | |
2096 "TARGET_SVE" | |
2097 "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>" | |
2098 ) | |
2099 | |
2100 ;; Unpredicated floating-point MIN/MAX reduction. | |
2101 (define_expand "reduc_<maxmin_uns>_scal_<mode>" | |
2102 [(set (match_operand:<VEL> 0 "register_operand") | |
2103 (unspec:<VEL> [(match_dup 2) | |
2104 (match_operand:SVE_F 1 "register_operand")] | |
2105 FMAXMINV))] | |
2106 "TARGET_SVE" | |
2107 { | |
2108 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2109 } | |
2110 ) | |
2111 | |
2112 ;; Predicated floating-point MIN/MAX reduction. | |
2113 (define_insn "*reduc_<maxmin_uns>_scal_<mode>" | |
2114 [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2115 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2116 (match_operand:SVE_F 2 "register_operand" "w")] | |
2117 FMAXMINV))] | |
2118 "TARGET_SVE" | |
2119 "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>" | |
2120 ) | |
2121 | |
2122 (define_expand "reduc_<optab>_scal_<mode>" | |
2123 [(set (match_operand:<VEL> 0 "register_operand") | |
2124 (unspec:<VEL> [(match_dup 2) | |
2125 (match_operand:SVE_I 1 "register_operand")] | |
2126 BITWISEV))] | |
2127 "TARGET_SVE" | |
2128 { | |
2129 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2130 } | |
2131 ) | |
2132 | |
2133 (define_insn "*reduc_<optab>_scal_<mode>" | |
2134 [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2135 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2136 (match_operand:SVE_I 2 "register_operand" "w")] | |
2137 BITWISEV))] | |
2138 "TARGET_SVE" | |
2139 "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>" | |
2140 ) | |
2141 | |
2142 ;; Unpredicated in-order FP reductions. | |
2143 (define_expand "fold_left_plus_<mode>" | |
2144 [(set (match_operand:<VEL> 0 "register_operand") | |
2145 (unspec:<VEL> [(match_dup 3) | |
2146 (match_operand:<VEL> 1 "register_operand") | |
2147 (match_operand:SVE_F 2 "register_operand")] | |
2148 UNSPEC_FADDA))] | |
2149 "TARGET_SVE" | |
2150 { | |
2151 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2152 } | |
2153 ) | |
2154 | |
2155 ;; In-order FP reductions predicated with PTRUE. | |
2156 (define_insn "*fold_left_plus_<mode>" | |
2157 [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2158 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2159 (match_operand:<VEL> 2 "register_operand" "0") | |
2160 (match_operand:SVE_F 3 "register_operand" "w")] | |
2161 UNSPEC_FADDA))] | |
2162 "TARGET_SVE" | |
2163 "fadda\t%<Vetype>0, %1, %<Vetype>0, %3.<Vetype>" | |
2164 ) | |
2165 | |
2166 ;; Predicated form of the above in-order reduction. | |
2167 (define_insn "*pred_fold_left_plus_<mode>" | |
2168 [(set (match_operand:<VEL> 0 "register_operand" "=w") | |
2169 (unspec:<VEL> | |
2170 [(match_operand:<VEL> 1 "register_operand" "0") | |
2171 (unspec:SVE_F | |
2172 [(match_operand:<VPRED> 2 "register_operand" "Upl") | |
2173 (match_operand:SVE_F 3 "register_operand" "w") | |
2174 (match_operand:SVE_F 4 "aarch64_simd_imm_zero")] | |
2175 UNSPEC_SEL)] | |
2176 UNSPEC_FADDA))] | |
2177 "TARGET_SVE" | |
2178 "fadda\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>" | |
2179 ) | |
2180 | |
2181 ;; Unpredicated floating-point addition. | |
2182 (define_expand "add<mode>3" | |
2183 [(set (match_operand:SVE_F 0 "register_operand") | |
2184 (unspec:SVE_F | |
2185 [(match_dup 3) | |
2186 (plus:SVE_F | |
2187 (match_operand:SVE_F 1 "register_operand") | |
2188 (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))] | |
2189 UNSPEC_MERGE_PTRUE))] | |
2190 "TARGET_SVE" | |
2191 { | |
2192 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2193 } | |
2194 ) | |
2195 | |
2196 ;; Floating-point addition predicated with a PTRUE. | |
2197 (define_insn "*add<mode>3" | |
2198 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w") | |
2199 (unspec:SVE_F | |
2200 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2201 (plus:SVE_F | |
2202 (match_operand:SVE_F 2 "register_operand" "%0, 0, w") | |
2203 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))] | |
2204 UNSPEC_MERGE_PTRUE))] | |
2205 "TARGET_SVE" | |
2206 "@ | |
2207 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2208 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
2209 fadd\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" | |
2210 ) | |
2211 | |
2212 ;; Unpredicated floating-point subtraction. | |
2213 (define_expand "sub<mode>3" | |
2214 [(set (match_operand:SVE_F 0 "register_operand") | |
2215 (unspec:SVE_F | |
2216 [(match_dup 3) | |
2217 (minus:SVE_F | |
2218 (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand") | |
2219 (match_operand:SVE_F 2 "register_operand"))] | |
2220 UNSPEC_MERGE_PTRUE))] | |
2221 "TARGET_SVE" | |
2222 { | |
2223 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2224 } | |
2225 ) | |
2226 | |
2227 ;; Floating-point subtraction predicated with a PTRUE. | |
2228 (define_insn "*sub<mode>3" | |
2229 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w") | |
2230 (unspec:SVE_F | |
2231 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") | |
2232 (minus:SVE_F | |
2233 (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w") | |
2234 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))] | |
2235 UNSPEC_MERGE_PTRUE))] | |
2236 "TARGET_SVE | |
2237 && (register_operand (operands[2], <MODE>mode) | |
2238 || register_operand (operands[3], <MODE>mode))" | |
2239 "@ | |
2240 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2241 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 | |
2242 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 | |
2243 fsub\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" | |
2244 ) | |
2245 | |
2246 ;; Unpredicated floating-point multiplication. | |
2247 (define_expand "mul<mode>3" | |
2248 [(set (match_operand:SVE_F 0 "register_operand") | |
2249 (unspec:SVE_F | |
2250 [(match_dup 3) | |
2251 (mult:SVE_F | |
2252 (match_operand:SVE_F 1 "register_operand") | |
2253 (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))] | |
2254 UNSPEC_MERGE_PTRUE))] | |
2255 "TARGET_SVE" | |
2256 { | |
2257 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2258 } | |
2259 ) | |
2260 | |
2261 ;; Floating-point multiplication predicated with a PTRUE. | |
2262 (define_insn "*mul<mode>3" | |
2263 [(set (match_operand:SVE_F 0 "register_operand" "=w, w") | |
2264 (unspec:SVE_F | |
2265 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2266 (mult:SVE_F | |
2267 (match_operand:SVE_F 2 "register_operand" "%0, w") | |
2268 (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))] | |
2269 UNSPEC_MERGE_PTRUE))] | |
2270 "TARGET_SVE" | |
2271 "@ | |
2272 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 | |
2273 fmul\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" | |
2274 ) | |
2275 | |
2276 ;; Unpredicated fma (%0 = (%1 * %2) + %3). | |
2277 (define_expand "fma<mode>4" | |
2278 [(set (match_operand:SVE_F 0 "register_operand") | |
2279 (unspec:SVE_F | |
2280 [(match_dup 4) | |
2281 (fma:SVE_F (match_operand:SVE_F 1 "register_operand") | |
2282 (match_operand:SVE_F 2 "register_operand") | |
2283 (match_operand:SVE_F 3 "register_operand"))] | |
2284 UNSPEC_MERGE_PTRUE))] | |
2285 "TARGET_SVE" | |
2286 { | |
2287 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2288 } | |
2289 ) | |
2290 | |
2291 ;; fma predicated with a PTRUE. | |
2292 (define_insn "*fma<mode>4" | |
2293 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") | |
2294 (unspec:SVE_F | |
2295 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2296 (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w") | |
2297 (match_operand:SVE_F 4 "register_operand" "w, w, w") | |
2298 (match_operand:SVE_F 2 "register_operand" "w, 0, w"))] | |
2299 UNSPEC_MERGE_PTRUE))] | |
2300 "TARGET_SVE" | |
2301 "@ | |
2302 fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> | |
2303 fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
2304 movprfx\t%0, %2\;fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
2305 [(set_attr "movprfx" "*,*,yes")] | |
2306 ) | |
2307 | |
2308 ;; Unpredicated fnma (%0 = (-%1 * %2) + %3). | |
2309 (define_expand "fnma<mode>4" | |
2310 [(set (match_operand:SVE_F 0 "register_operand") | |
2311 (unspec:SVE_F | |
2312 [(match_dup 4) | |
2313 (fma:SVE_F (neg:SVE_F | |
2314 (match_operand:SVE_F 1 "register_operand")) | |
2315 (match_operand:SVE_F 2 "register_operand") | |
2316 (match_operand:SVE_F 3 "register_operand"))] | |
2317 UNSPEC_MERGE_PTRUE))] | |
2318 "TARGET_SVE" | |
2319 { | |
2320 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2321 } | |
2322 ) | |
2323 | |
2324 ;; fnma predicated with a PTRUE. | |
2325 (define_insn "*fnma<mode>4" | |
2326 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") | |
2327 (unspec:SVE_F | |
2328 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2329 (fma:SVE_F (neg:SVE_F | |
2330 (match_operand:SVE_F 3 "register_operand" "%0, w, w")) | |
2331 (match_operand:SVE_F 4 "register_operand" "w, w, w") | |
2332 (match_operand:SVE_F 2 "register_operand" "w, 0, w"))] | |
2333 UNSPEC_MERGE_PTRUE))] | |
2334 "TARGET_SVE" | |
2335 "@ | |
2336 fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> | |
2337 fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
2338 movprfx\t%0, %2\;fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
2339 [(set_attr "movprfx" "*,*,yes")] | |
2340 ) | |
2341 | |
2342 ;; Unpredicated fms (%0 = (%1 * %2) - %3). | |
2343 (define_expand "fms<mode>4" | |
2344 [(set (match_operand:SVE_F 0 "register_operand") | |
2345 (unspec:SVE_F | |
2346 [(match_dup 4) | |
2347 (fma:SVE_F (match_operand:SVE_F 1 "register_operand") | |
2348 (match_operand:SVE_F 2 "register_operand") | |
2349 (neg:SVE_F | |
2350 (match_operand:SVE_F 3 "register_operand")))] | |
2351 UNSPEC_MERGE_PTRUE))] | |
2352 "TARGET_SVE" | |
2353 { | |
2354 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2355 } | |
2356 ) | |
2357 | |
2358 ;; fms predicated with a PTRUE. | |
2359 (define_insn "*fms<mode>4" | |
2360 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") | |
2361 (unspec:SVE_F | |
2362 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2363 (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w") | |
2364 (match_operand:SVE_F 4 "register_operand" "w, w, w") | |
2365 (neg:SVE_F | |
2366 (match_operand:SVE_F 2 "register_operand" "w, 0, w")))] | |
2367 UNSPEC_MERGE_PTRUE))] | |
2368 "TARGET_SVE" | |
2369 "@ | |
2370 fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> | |
2371 fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
2372 movprfx\t%0, %2\;fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
2373 [(set_attr "movprfx" "*,*,yes")] | |
2374 ) | |
2375 | |
2376 ;; Unpredicated fnms (%0 = (-%1 * %2) - %3). | |
2377 (define_expand "fnms<mode>4" | |
2378 [(set (match_operand:SVE_F 0 "register_operand") | |
2379 (unspec:SVE_F | |
2380 [(match_dup 4) | |
2381 (fma:SVE_F (neg:SVE_F | |
2382 (match_operand:SVE_F 1 "register_operand")) | |
2383 (match_operand:SVE_F 2 "register_operand") | |
2384 (neg:SVE_F | |
2385 (match_operand:SVE_F 3 "register_operand")))] | |
2386 UNSPEC_MERGE_PTRUE))] | |
2387 "TARGET_SVE" | |
2388 { | |
2389 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2390 } | |
2391 ) | |
2392 | |
2393 ;; fnms predicated with a PTRUE. | |
2394 (define_insn "*fnms<mode>4" | |
2395 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") | |
2396 (unspec:SVE_F | |
2397 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2398 (fma:SVE_F (neg:SVE_F | |
2399 (match_operand:SVE_F 3 "register_operand" "%0, w, w")) | |
2400 (match_operand:SVE_F 4 "register_operand" "w, w, w") | |
2401 (neg:SVE_F | |
2402 (match_operand:SVE_F 2 "register_operand" "w, 0, w")))] | |
2403 UNSPEC_MERGE_PTRUE))] | |
2404 "TARGET_SVE" | |
2405 "@ | |
2406 fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype> | |
2407 fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
2408 movprfx\t%0, %2\;fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
2409 [(set_attr "movprfx" "*,*,yes")] | |
2410 ) | |
2411 | |
2412 ;; Unpredicated floating-point division. | |
2413 (define_expand "div<mode>3" | |
2414 [(set (match_operand:SVE_F 0 "register_operand") | |
2415 (unspec:SVE_F | |
2416 [(match_dup 3) | |
2417 (div:SVE_F (match_operand:SVE_F 1 "register_operand") | |
2418 (match_operand:SVE_F 2 "register_operand"))] | |
2419 UNSPEC_MERGE_PTRUE))] | |
2420 "TARGET_SVE" | |
2421 { | |
2422 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2423 } | |
2424 ) | |
2425 | |
2426 ;; Floating-point division predicated with a PTRUE. | |
2427 (define_insn "*div<mode>3" | |
2428 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") | |
2429 (unspec:SVE_F | |
2430 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2431 (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w, w") | |
2432 (match_operand:SVE_F 3 "register_operand" "w, 0, w"))] | |
2433 UNSPEC_MERGE_PTRUE))] | |
2434 "TARGET_SVE" | |
2435 "@ | |
2436 fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2437 fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
2438 movprfx\t%0, %2\;fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
2439 [(set_attr "movprfx" "*,*,yes")] | |
2440 ) | |
2441 | |
2442 ;; Unpredicated FNEG, FABS and FSQRT. | |
2443 (define_expand "<optab><mode>2" | |
2444 [(set (match_operand:SVE_F 0 "register_operand") | |
2445 (unspec:SVE_F | |
2446 [(match_dup 2) | |
2447 (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))] | |
2448 UNSPEC_MERGE_PTRUE))] | |
2449 "TARGET_SVE" | |
2450 { | |
2451 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2452 } | |
2453 ) | |
2454 | |
2455 ;; FNEG, FABS and FSQRT predicated with a PTRUE. | |
2456 (define_insn "*<optab><mode>2" | |
2457 [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
2458 (unspec:SVE_F | |
2459 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2460 (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))] | |
2461 UNSPEC_MERGE_PTRUE))] | |
2462 "TARGET_SVE" | |
2463 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
2464 ) | |
2465 | |
2466 ;; Unpredicated FRINTy. | |
2467 (define_expand "<frint_pattern><mode>2" | |
2468 [(set (match_operand:SVE_F 0 "register_operand") | |
2469 (unspec:SVE_F | |
2470 [(match_dup 2) | |
2471 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")] | |
2472 FRINT)] | |
2473 UNSPEC_MERGE_PTRUE))] | |
2474 "TARGET_SVE" | |
2475 { | |
2476 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2477 } | |
2478 ) | |
2479 | |
2480 ;; FRINTy predicated with a PTRUE. | |
2481 (define_insn "*<frint_pattern><mode>2" | |
2482 [(set (match_operand:SVE_F 0 "register_operand" "=w") | |
2483 (unspec:SVE_F | |
2484 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2485 (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")] | |
2486 FRINT)] | |
2487 UNSPEC_MERGE_PTRUE))] | |
2488 "TARGET_SVE" | |
2489 "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>" | |
2490 ) | |
2491 | |
2492 ;; Unpredicated conversion of floats to integers of the same size (HF to HI, | |
2493 ;; SF to SI or DF to DI). | |
2494 (define_expand "<fix_trunc_optab><mode><v_int_equiv>2" | |
2495 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") | |
2496 (unspec:<V_INT_EQUIV> | |
2497 [(match_dup 2) | |
2498 (FIXUORS:<V_INT_EQUIV> | |
2499 (match_operand:SVE_F 1 "register_operand"))] | |
2500 UNSPEC_MERGE_PTRUE))] | |
2501 "TARGET_SVE" | |
2502 { | |
2503 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2504 } | |
2505 ) | |
2506 | |
2507 ;; Conversion of SF to DI, SI or HI, predicated with a PTRUE. | |
2508 (define_insn "*<fix_trunc_optab>v16hsf<mode>2" | |
2509 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w") | |
2510 (unspec:SVE_HSDI | |
2511 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2512 (FIXUORS:SVE_HSDI | |
2513 (match_operand:VNx8HF 2 "register_operand" "w"))] | |
2514 UNSPEC_MERGE_PTRUE))] | |
2515 "TARGET_SVE" | |
2516 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h" | |
2517 ) | |
2518 | |
2519 ;; Conversion of SF to DI or SI, predicated with a PTRUE. | |
2520 (define_insn "*<fix_trunc_optab>vnx4sf<mode>2" | |
2521 [(set (match_operand:SVE_SDI 0 "register_operand" "=w") | |
2522 (unspec:SVE_SDI | |
2523 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2524 (FIXUORS:SVE_SDI | |
2525 (match_operand:VNx4SF 2 "register_operand" "w"))] | |
2526 UNSPEC_MERGE_PTRUE))] | |
2527 "TARGET_SVE" | |
2528 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s" | |
2529 ) | |
2530 | |
2531 ;; Conversion of DF to DI or SI, predicated with a PTRUE. | |
2532 (define_insn "*<fix_trunc_optab>vnx2df<mode>2" | |
2533 [(set (match_operand:SVE_SDI 0 "register_operand" "=w") | |
2534 (unspec:SVE_SDI | |
2535 [(match_operand:VNx2BI 1 "register_operand" "Upl") | |
2536 (FIXUORS:SVE_SDI | |
2537 (match_operand:VNx2DF 2 "register_operand" "w"))] | |
2538 UNSPEC_MERGE_PTRUE))] | |
2539 "TARGET_SVE" | |
2540 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d" | |
2541 ) | |
2542 | |
2543 ;; Unpredicated conversion of integers to floats of the same size | |
2544 ;; (HI to HF, SI to SF or DI to DF). | |
2545 (define_expand "<optab><v_int_equiv><mode>2" | |
2546 [(set (match_operand:SVE_F 0 "register_operand") | |
2547 (unspec:SVE_F | |
2548 [(match_dup 2) | |
2549 (FLOATUORS:SVE_F | |
2550 (match_operand:<V_INT_EQUIV> 1 "register_operand"))] | |
2551 UNSPEC_MERGE_PTRUE))] | |
2552 "TARGET_SVE" | |
2553 { | |
2554 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode)); | |
2555 } | |
2556 ) | |
2557 | |
2558 ;; Conversion of DI, SI or HI to the same number of HFs, predicated | |
2559 ;; with a PTRUE. | |
2560 (define_insn "*<optab><mode>vnx8hf2" | |
2561 [(set (match_operand:VNx8HF 0 "register_operand" "=w") | |
2562 (unspec:VNx8HF | |
2563 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2564 (FLOATUORS:VNx8HF | |
2565 (match_operand:SVE_HSDI 2 "register_operand" "w"))] | |
2566 UNSPEC_MERGE_PTRUE))] | |
2567 "TARGET_SVE" | |
2568 "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>" | |
2569 ) | |
2570 | |
2571 ;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE. | |
2572 (define_insn "*<optab><mode>vnx4sf2" | |
2573 [(set (match_operand:VNx4SF 0 "register_operand" "=w") | |
2574 (unspec:VNx4SF | |
2575 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2576 (FLOATUORS:VNx4SF | |
2577 (match_operand:SVE_SDI 2 "register_operand" "w"))] | |
2578 UNSPEC_MERGE_PTRUE))] | |
2579 "TARGET_SVE" | |
2580 "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>" | |
2581 ) | |
2582 | |
2583 ;; Conversion of DI or SI to DF, predicated with a PTRUE. | |
2584 (define_insn "aarch64_sve_<optab><mode>vnx2df2" | |
2585 [(set (match_operand:VNx2DF 0 "register_operand" "=w") | |
2586 (unspec:VNx2DF | |
2587 [(match_operand:VNx2BI 1 "register_operand" "Upl") | |
2588 (FLOATUORS:VNx2DF | |
2589 (match_operand:SVE_SDI 2 "register_operand" "w"))] | |
2590 UNSPEC_MERGE_PTRUE))] | |
2591 "TARGET_SVE" | |
2592 "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>" | |
2593 ) | |
2594 | |
2595 ;; Conversion of DFs to the same number of SFs, or SFs to the same number | |
2596 ;; of HFs. | |
2597 (define_insn "*trunc<Vwide><mode>2" | |
2598 [(set (match_operand:SVE_HSF 0 "register_operand" "=w") | |
2599 (unspec:SVE_HSF | |
2600 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl") | |
2601 (unspec:SVE_HSF | |
2602 [(match_operand:<VWIDE> 2 "register_operand" "w")] | |
2603 UNSPEC_FLOAT_CONVERT)] | |
2604 UNSPEC_MERGE_PTRUE))] | |
2605 "TARGET_SVE" | |
2606 "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>" | |
2607 ) | |
2608 | |
2609 ;; Conversion of SFs to the same number of DFs, or HFs to the same number | |
2610 ;; of SFs. | |
2611 (define_insn "aarch64_sve_extend<mode><Vwide>2" | |
2612 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
2613 (unspec:<VWIDE> | |
2614 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl") | |
2615 (unspec:<VWIDE> | |
2616 [(match_operand:SVE_HSF 2 "register_operand" "w")] | |
2617 UNSPEC_FLOAT_CONVERT)] | |
2618 UNSPEC_MERGE_PTRUE))] | |
2619 "TARGET_SVE" | |
2620 "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>" | |
2621 ) | |
2622 | |
2623 ;; Unpack the low or high half of a predicate, where "high" refers to | |
2624 ;; the low-numbered lanes for big-endian and the high-numbered lanes | |
2625 ;; for little-endian. | |
2626 (define_expand "vec_unpack<su>_<perm_hilo>_<mode>" | |
2627 [(match_operand:<VWIDE> 0 "register_operand") | |
2628 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")] | |
2629 UNPACK)] | |
2630 "TARGET_SVE" | |
2631 { | |
2632 emit_insn ((<hi_lanes_optab> | |
2633 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode> | |
2634 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>) | |
2635 (operands[0], operands[1])); | |
2636 DONE; | |
2637 } | |
2638 ) | |
2639 | |
2640 ;; PUNPKHI and PUNPKLO. | |
2641 (define_insn "aarch64_sve_punpk<perm_hilo>_<mode>" | |
2642 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa") | |
2643 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")] | |
2644 UNPACK_UNSIGNED))] | |
2645 "TARGET_SVE" | |
2646 "punpk<perm_hilo>\t%0.h, %1.b" | |
2647 ) | |
2648 | |
2649 ;; Unpack the low or high half of a vector, where "high" refers to | |
2650 ;; the low-numbered lanes for big-endian and the high-numbered lanes | |
2651 ;; for little-endian. | |
2652 (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>" | |
2653 [(match_operand:<VWIDE> 0 "register_operand") | |
2654 (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)] | |
2655 "TARGET_SVE" | |
2656 { | |
2657 emit_insn ((<hi_lanes_optab> | |
2658 ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode> | |
2659 : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>) | |
2660 (operands[0], operands[1])); | |
2661 DONE; | |
2662 } | |
2663 ) | |
2664 | |
2665 ;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO. | |
2666 (define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>" | |
2667 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
2668 (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")] | |
2669 UNPACK))] | |
2670 "TARGET_SVE" | |
2671 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>" | |
2672 ) | |
2673 | |
2674 ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF. | |
2675 ;; First unpack the source without conversion, then float-convert the | |
2676 ;; unpacked source. | |
2677 (define_expand "vec_unpacks_<perm_hilo>_<mode>" | |
2678 [(match_operand:<VWIDE> 0 "register_operand") | |
2679 (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")] | |
2680 UNPACK_UNSIGNED)] | |
2681 "TARGET_SVE" | |
2682 { | |
2683 /* Use ZIP to do the unpack, since we don't care about the upper halves | |
2684 and since it has the nice property of not needing any subregs. | |
2685 If using UUNPK* turns out to be preferable, we could model it as | |
2686 a ZIP whose first operand is zero. */ | |
2687 rtx temp = gen_reg_rtx (<MODE>mode); | |
2688 emit_insn ((<hi_lanes_optab> | |
2689 ? gen_aarch64_sve_zip2<mode> | |
2690 : gen_aarch64_sve_zip1<mode>) | |
2691 (temp, operands[1], operands[1])); | |
2692 rtx ptrue = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode)); | |
2693 emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0], | |
2694 ptrue, temp)); | |
2695 DONE; | |
2696 } | |
2697 ) | |
2698 | |
2699 ;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI | |
2700 ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the | |
2701 ;; unpacked VNx4SI to VNx2DF. | |
2702 (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si" | |
2703 [(match_operand:VNx2DF 0 "register_operand") | |
2704 (FLOATUORS:VNx2DF | |
2705 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")] | |
2706 UNPACK_UNSIGNED))] | |
2707 "TARGET_SVE" | |
2708 { | |
2709 /* Use ZIP to do the unpack, since we don't care about the upper halves | |
2710 and since it has the nice property of not needing any subregs. | |
2711 If using UUNPK* turns out to be preferable, we could model it as | |
2712 a ZIP whose first operand is zero. */ | |
2713 rtx temp = gen_reg_rtx (VNx4SImode); | |
2714 emit_insn ((<hi_lanes_optab> | |
2715 ? gen_aarch64_sve_zip2vnx4si | |
2716 : gen_aarch64_sve_zip1vnx4si) | |
2717 (temp, operands[1], operands[1])); | |
2718 rtx ptrue = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode)); | |
2719 emit_insn (gen_aarch64_sve_<FLOATUORS:optab>vnx4sivnx2df2 (operands[0], | |
2720 ptrue, temp)); | |
2721 DONE; | |
2722 } | |
2723 ) | |
2724 | |
2725 ;; Predicate pack. Use UZP1 on the narrower type, which discards | |
2726 ;; the high part of each wide element. | |
2727 (define_insn "vec_pack_trunc_<Vwide>" | |
2728 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa") | |
2729 (unspec:PRED_BHS | |
2730 [(match_operand:<VWIDE> 1 "register_operand" "Upa") | |
2731 (match_operand:<VWIDE> 2 "register_operand" "Upa")] | |
2732 UNSPEC_PACK))] | |
2733 "TARGET_SVE" | |
2734 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
2735 ) | |
2736 | |
2737 ;; Integer pack. Use UZP1 on the narrower type, which discards | |
2738 ;; the high part of each wide element. | |
2739 (define_insn "vec_pack_trunc_<Vwide>" | |
2740 [(set (match_operand:SVE_BHSI 0 "register_operand" "=w") | |
2741 (unspec:SVE_BHSI | |
2742 [(match_operand:<VWIDE> 1 "register_operand" "w") | |
2743 (match_operand:<VWIDE> 2 "register_operand" "w")] | |
2744 UNSPEC_PACK))] | |
2745 "TARGET_SVE" | |
2746 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" | |
2747 ) | |
2748 | |
2749 ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack | |
2750 ;; the results into a single vector. | |
2751 (define_expand "vec_pack_trunc_<Vwide>" | |
2752 [(set (match_dup 4) | |
2753 (unspec:SVE_HSF | |
2754 [(match_dup 3) | |
2755 (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")] | |
2756 UNSPEC_FLOAT_CONVERT)] | |
2757 UNSPEC_MERGE_PTRUE)) | |
2758 (set (match_dup 5) | |
2759 (unspec:SVE_HSF | |
2760 [(match_dup 3) | |
2761 (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")] | |
2762 UNSPEC_FLOAT_CONVERT)] | |
2763 UNSPEC_MERGE_PTRUE)) | |
2764 (set (match_operand:SVE_HSF 0 "register_operand") | |
2765 (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] | |
2766 "TARGET_SVE" | |
2767 { | |
2768 operands[3] = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode)); | |
2769 operands[4] = gen_reg_rtx (<MODE>mode); | |
2770 operands[5] = gen_reg_rtx (<MODE>mode); | |
2771 } | |
2772 ) | |
2773 | |
2774 ;; Convert two vectors of DF to SI and pack the results into a single vector. | |
2775 (define_expand "vec_pack_<su>fix_trunc_vnx2df" | |
2776 [(set (match_dup 4) | |
2777 (unspec:VNx4SI | |
2778 [(match_dup 3) | |
2779 (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))] | |
2780 UNSPEC_MERGE_PTRUE)) | |
2781 (set (match_dup 5) | |
2782 (unspec:VNx4SI | |
2783 [(match_dup 3) | |
2784 (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))] | |
2785 UNSPEC_MERGE_PTRUE)) | |
2786 (set (match_operand:VNx4SI 0 "register_operand") | |
2787 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] | |
2788 "TARGET_SVE" | |
2789 { | |
2790 operands[3] = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode)); | |
2791 operands[4] = gen_reg_rtx (VNx4SImode); | |
2792 operands[5] = gen_reg_rtx (VNx4SImode); | |
2793 } | |
2794 ) | |
2795 | |
2796 ;; Predicated floating-point operations with select. | |
2797 (define_expand "cond_<optab><mode>" | |
2798 [(set (match_operand:SVE_F 0 "register_operand") | |
2799 (unspec:SVE_F | |
2800 [(match_operand:<VPRED> 1 "register_operand") | |
2801 (unspec:SVE_F | |
2802 [(match_operand:SVE_F 2 "register_operand") | |
2803 (match_operand:SVE_F 3 "register_operand")] | |
2804 SVE_COND_FP_BINARY) | |
2805 (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] | |
2806 UNSPEC_SEL))] | |
2807 "TARGET_SVE" | |
2808 ) | |
2809 | |
2810 ;; Predicated floating-point operations with select matching output. | |
2811 (define_insn "*cond_<optab><mode>_0" | |
2812 [(set (match_operand:SVE_F 0 "register_operand" "+w, w, ?&w") | |
2813 (unspec:SVE_F | |
2814 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2815 (unspec:SVE_F | |
2816 [(match_operand:SVE_F 2 "register_operand" "0, w, w") | |
2817 (match_operand:SVE_F 3 "register_operand" "w, 0, w")] | |
2818 SVE_COND_FP_BINARY) | |
2819 (match_dup 0)] | |
2820 UNSPEC_SEL))] | |
2821 "TARGET_SVE" | |
2822 "@ | |
2823 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2824 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
2825 movprfx\t%0, %1/m, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
2826 [(set_attr "movprfx" "*,*,yes")] | |
2827 ) | |
2828 | |
2829 ;; Predicated floating-point operations with select matching first operand. | |
2830 (define_insn "*cond_<optab><mode>_2" | |
2831 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
2832 (unspec:SVE_F | |
2833 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2834 (unspec:SVE_F | |
2835 [(match_operand:SVE_F 2 "register_operand" "0, w") | |
2836 (match_operand:SVE_F 3 "register_operand" "w, w")] | |
2837 SVE_COND_FP_BINARY) | |
2838 (match_dup 2)] | |
2839 UNSPEC_SEL))] | |
2840 "TARGET_SVE" | |
2841 "@ | |
2842 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> | |
2843 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
2844 [(set_attr "movprfx" "*,yes")] | |
2845 ) | |
2846 | |
2847 ;; Predicated floating-point operations with select matching second operand. | |
2848 (define_insn "*cond_<optab><mode>_3" | |
2849 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
2850 (unspec:SVE_F | |
2851 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2852 (unspec:SVE_F | |
2853 [(match_operand:SVE_F 2 "register_operand" "w, w") | |
2854 (match_operand:SVE_F 3 "register_operand" "0, w")] | |
2855 SVE_COND_FP_BINARY) | |
2856 (match_dup 3)] | |
2857 UNSPEC_SEL))] | |
2858 "TARGET_SVE" | |
2859 "@ | |
2860 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> | |
2861 movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" | |
2862 [(set_attr "movprfx" "*,yes")] | |
2863 ) | |
2864 | |
2865 ;; Predicated floating-point operations with select matching zero. | |
2866 (define_insn "*cond_<optab><mode>_z" | |
2867 [(set (match_operand:SVE_F 0 "register_operand" "=&w") | |
2868 (unspec:SVE_F | |
2869 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2870 (unspec:SVE_F | |
2871 [(match_operand:SVE_F 2 "register_operand" "w") | |
2872 (match_operand:SVE_F 3 "register_operand" "w")] | |
2873 SVE_COND_FP_BINARY) | |
2874 (match_operand:SVE_F 4 "aarch64_simd_imm_zero")] | |
2875 UNSPEC_SEL))] | |
2876 "TARGET_SVE" | |
2877 "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" | |
2878 [(set_attr "movprfx" "yes")] | |
2879 ) | |
2880 | |
2881 ;; Synthetic predication of floating-point operations with select unmatched. | |
2882 (define_insn_and_split "*cond_<optab><mode>_any" | |
2883 [(set (match_operand:SVE_F 0 "register_operand" "=&w") | |
2884 (unspec:SVE_F | |
2885 [(match_operand:<VPRED> 1 "register_operand" "Upl") | |
2886 (unspec:SVE_F | |
2887 [(match_operand:SVE_F 2 "register_operand" "w") | |
2888 (match_operand:SVE_F 3 "register_operand" "w")] | |
2889 SVE_COND_FP_BINARY) | |
2890 (match_operand:SVE_F 4 "register_operand" "w")] | |
2891 UNSPEC_SEL))] | |
2892 "TARGET_SVE" | |
2893 "#" | |
2894 "&& reload_completed | |
2895 && !(rtx_equal_p (operands[0], operands[4]) | |
2896 || rtx_equal_p (operands[2], operands[4]) | |
2897 || rtx_equal_p (operands[3], operands[4]))" | |
2898 ; Not matchable by any one insn or movprfx insn. We need a separate select. | |
2899 [(set (match_dup 0) | |
2900 (unspec:SVE_F [(match_dup 1) (match_dup 2) (match_dup 4)] UNSPEC_SEL)) | |
2901 (set (match_dup 0) | |
2902 (unspec:SVE_F | |
2903 [(match_dup 1) | |
2904 (unspec:SVE_F [(match_dup 0) (match_dup 3)] SVE_COND_FP_BINARY) | |
2905 (match_dup 0)] | |
2906 UNSPEC_SEL))] | |
2907 ) | |
2908 | |
2909 ;; Predicated floating-point ternary operations with select. | |
2910 (define_expand "cond_<optab><mode>" | |
2911 [(set (match_operand:SVE_F 0 "register_operand") | |
2912 (unspec:SVE_F | |
2913 [(match_operand:<VPRED> 1 "register_operand") | |
2914 (unspec:SVE_F | |
2915 [(match_operand:SVE_F 2 "register_operand") | |
2916 (match_operand:SVE_F 3 "register_operand") | |
2917 (match_operand:SVE_F 4 "register_operand")] | |
2918 SVE_COND_FP_TERNARY) | |
2919 (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")] | |
2920 UNSPEC_SEL))] | |
2921 "TARGET_SVE" | |
2922 { | |
2923 /* Swap the multiplication operands if the fallback value is the | |
2924 second of the two. */ | |
2925 if (rtx_equal_p (operands[3], operands[5])) | |
2926 std::swap (operands[2], operands[3]); | |
2927 }) | |
2928 | |
2929 ;; Predicated floating-point ternary operations using the FMAD-like form. | |
2930 (define_insn "*cond_<optab><mode>_2" | |
2931 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
2932 (unspec:SVE_F | |
2933 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2934 (unspec:SVE_F | |
2935 [(match_operand:SVE_F 2 "register_operand" "0, w") | |
2936 (match_operand:SVE_F 3 "register_operand" "w, w") | |
2937 (match_operand:SVE_F 4 "register_operand" "w, w")] | |
2938 SVE_COND_FP_TERNARY) | |
2939 (match_dup 2)] | |
2940 UNSPEC_SEL))] | |
2941 "TARGET_SVE" | |
2942 "@ | |
2943 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> | |
2944 movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" | |
2945 [(set_attr "movprfx" "*,yes")] | |
2946 ) | |
2947 | |
2948 ;; Predicated floating-point ternary operations using the FMLA-like form. | |
2949 (define_insn "*cond_<optab><mode>_4" | |
2950 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") | |
2951 (unspec:SVE_F | |
2952 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") | |
2953 (unspec:SVE_F | |
2954 [(match_operand:SVE_F 2 "register_operand" "w, w") | |
2955 (match_operand:SVE_F 3 "register_operand" "w, w") | |
2956 (match_operand:SVE_F 4 "register_operand" "0, w")] | |
2957 SVE_COND_FP_TERNARY) | |
2958 (match_dup 4)] | |
2959 UNSPEC_SEL))] | |
2960 "TARGET_SVE" | |
2961 "@ | |
2962 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
2963 movprfx\t%0, %4\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" | |
2964 [(set_attr "movprfx" "*,yes")] | |
2965 ) | |
2966 | |
2967 ;; Predicated floating-point ternary operations in which the value for | |
2968 ;; inactive lanes is distinct from the other inputs. | |
2969 (define_insn_and_split "*cond_<optab><mode>_any" | |
2970 [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w") | |
2971 (unspec:SVE_F | |
2972 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") | |
2973 (unspec:SVE_F | |
2974 [(match_operand:SVE_F 2 "register_operand" "w, w, w") | |
2975 (match_operand:SVE_F 3 "register_operand" "w, w, w") | |
2976 (match_operand:SVE_F 4 "register_operand" "w, w, w")] | |
2977 SVE_COND_FP_TERNARY) | |
2978 (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, 0, w")] | |
2979 UNSPEC_SEL))] | |
2980 "TARGET_SVE | |
2981 && !rtx_equal_p (operands[2], operands[5]) | |
2982 && !rtx_equal_p (operands[3], operands[5]) | |
2983 && !rtx_equal_p (operands[4], operands[5])" | |
2984 "@ | |
2985 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
2986 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> | |
2987 #" | |
2988 "&& reload_completed | |
2989 && !CONSTANT_P (operands[5]) | |
2990 && !rtx_equal_p (operands[0], operands[5])" | |
2991 [(set (match_dup 0) | |
2992 (unspec:SVE_F [(match_dup 1) (match_dup 4) (match_dup 5)] UNSPEC_SEL)) | |
2993 (set (match_dup 0) | |
2994 (unspec:SVE_F | |
2995 [(match_dup 1) | |
2996 (unspec:SVE_F [(match_dup 2) (match_dup 3) (match_dup 0)] | |
2997 SVE_COND_FP_TERNARY) | |
2998 (match_dup 0)] | |
2999 UNSPEC_SEL))] | |
3000 "" | |
3001 [(set_attr "movprfx" "yes")] | |
3002 ) | |
3003 | |
3004 ;; Shift an SVE vector left and insert a scalar into element 0. | |
3005 (define_insn "vec_shl_insert_<mode>" | |
3006 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") | |
3007 (unspec:SVE_ALL | |
3008 [(match_operand:SVE_ALL 1 "register_operand" "0, 0") | |
3009 (match_operand:<VEL> 2 "register_operand" "rZ, w")] | |
3010 UNSPEC_INSR))] | |
3011 "TARGET_SVE" | |
3012 "@ | |
3013 insr\t%0.<Vetype>, %<vwcore>2 | |
3014 insr\t%0.<Vetype>, %<Vetype>2" | |
3015 ) |