comparison gcc/config/aarch64/aarch64-sve-builtins-base.cc @ 145:1830386684a0

gcc-9.2.0
author anatofuz
date Thu, 13 Feb 2020 11:34:05 +0900
parents
children
comparison
equal deleted inserted replaced
131:84e7813d76e9 145:1830386684a0
1 /* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
2 Copyright (C) 2018-2020 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "tree.h"
25 #include "rtl.h"
26 #include "tm_p.h"
27 #include "memmodel.h"
28 #include "insn-codes.h"
29 #include "optabs.h"
30 #include "recog.h"
31 #include "expr.h"
32 #include "basic-block.h"
33 #include "function.h"
34 #include "fold-const.h"
35 #include "gimple.h"
36 #include "gimple-iterator.h"
37 #include "gimplify.h"
38 #include "explow.h"
39 #include "emit-rtl.h"
40 #include "tree-vector-builder.h"
41 #include "rtx-vector-builder.h"
42 #include "vec-perm-indices.h"
43 #include "aarch64-sve-builtins.h"
44 #include "aarch64-sve-builtins-shapes.h"
45 #include "aarch64-sve-builtins-base.h"
46 #include "aarch64-sve-builtins-functions.h"
47
48 using namespace aarch64_sve;
49
50 namespace {
51
52 /* Return the UNSPEC_CMLA* unspec for rotation amount ROT. */
53 static int
54 unspec_cmla (int rot)
55 {
56 switch (rot)
57 {
58 case 0: return UNSPEC_CMLA;
59 case 90: return UNSPEC_CMLA90;
60 case 180: return UNSPEC_CMLA180;
61 case 270: return UNSPEC_CMLA270;
62 default: gcc_unreachable ();
63 }
64 }
65
66 /* Return the UNSPEC_FCMLA* unspec for rotation amount ROT. */
67 static int
68 unspec_fcmla (int rot)
69 {
70 switch (rot)
71 {
72 case 0: return UNSPEC_FCMLA;
73 case 90: return UNSPEC_FCMLA90;
74 case 180: return UNSPEC_FCMLA180;
75 case 270: return UNSPEC_FCMLA270;
76 default: gcc_unreachable ();
77 }
78 }
79
80 /* Return the UNSPEC_COND_FCMLA* unspec for rotation amount ROT. */
81 static int
82 unspec_cond_fcmla (int rot)
83 {
84 switch (rot)
85 {
86 case 0: return UNSPEC_COND_FCMLA;
87 case 90: return UNSPEC_COND_FCMLA90;
88 case 180: return UNSPEC_COND_FCMLA180;
89 case 270: return UNSPEC_COND_FCMLA270;
90 default: gcc_unreachable ();
91 }
92 }
93
94 /* Expand a call to svmad, or svmla after reordering its operands.
95 Make _m forms merge with argument MERGE_ARGNO. */
96 static rtx
97 expand_mad (function_expander &e,
98 unsigned int merge_argno = DEFAULT_MERGE_ARGNO)
99 {
100 if (e.pred == PRED_x)
101 {
102 insn_code icode;
103 if (e.type_suffix (0).integer_p)
104 icode = code_for_aarch64_pred_fma (e.vector_mode (0));
105 else
106 icode = code_for_aarch64_pred (UNSPEC_COND_FMLA, e.vector_mode (0));
107 return e.use_pred_x_insn (icode);
108 }
109
110 insn_code icode = e.direct_optab_handler (cond_fma_optab);
111 return e.use_cond_insn (icode, merge_argno);
112 }
113
114 /* Expand a call to svmla_lane or svmls_lane using floating-point unspec
115 UNSPEC. */
116 static rtx
117 expand_mla_mls_lane (function_expander &e, int unspec)
118 {
119 /* Put the operands in the normal (fma ...) order, with the accumulator
120 last. This fits naturally since that's also the unprinted operand
121 in the asm output. */
122 e.rotate_inputs_left (0, 4);
123 insn_code icode = code_for_aarch64_lane (unspec, e.vector_mode (0));
124 return e.use_exact_insn (icode);
125 }
126
127 /* Expand a call to svmsb, or svmls after reordering its operands.
128 Make _m forms merge with argument MERGE_ARGNO. */
129 static rtx
130 expand_msb (function_expander &e,
131 unsigned int merge_argno = DEFAULT_MERGE_ARGNO)
132 {
133 if (e.pred == PRED_x)
134 {
135 insn_code icode;
136 if (e.type_suffix (0).integer_p)
137 icode = code_for_aarch64_pred_fnma (e.vector_mode (0));
138 else
139 icode = code_for_aarch64_pred (UNSPEC_COND_FMLS, e.vector_mode (0));
140 return e.use_pred_x_insn (icode);
141 }
142
143 insn_code icode = e.direct_optab_handler (cond_fnma_optab);
144 return e.use_cond_insn (icode, merge_argno);
145 }
146
147 class svabd_impl : public function_base
148 {
149 public:
150 rtx
151 expand (function_expander &e) const OVERRIDE
152 {
153 /* The integer operations are represented as the subtraction of the
154 minimum from the maximum, with the signedness of the instruction
155 keyed off the signedness of the maximum operation. */
156 rtx_code max_code = e.type_suffix (0).unsigned_p ? UMAX : SMAX;
157 insn_code icode;
158 if (e.pred == PRED_x)
159 {
160 if (e.type_suffix (0).integer_p)
161 icode = code_for_aarch64_pred_abd (max_code, e.vector_mode (0));
162 else
163 icode = code_for_aarch64_pred_abd (e.vector_mode (0));
164 return e.use_pred_x_insn (icode);
165 }
166
167 if (e.type_suffix (0).integer_p)
168 icode = code_for_aarch64_cond_abd (max_code, e.vector_mode (0));
169 else
170 icode = code_for_aarch64_cond_abd (e.vector_mode (0));
171 return e.use_cond_insn (icode);
172 }
173 };
174
175 /* Implements svacge, svacgt, svacle and svaclt. */
176 class svac_impl : public function_base
177 {
178 public:
179 CONSTEXPR svac_impl (int unspec) : m_unspec (unspec) {}
180
181 rtx
182 expand (function_expander &e) const OVERRIDE
183 {
184 e.add_ptrue_hint (0, e.gp_mode (0));
185 insn_code icode = code_for_aarch64_pred_fac (m_unspec, e.vector_mode (0));
186 return e.use_exact_insn (icode);
187 }
188
189 /* The unspec code for the underlying comparison. */
190 int m_unspec;
191 };
192
193 class svadda_impl : public function_base
194 {
195 public:
196 rtx
197 expand (function_expander &e) const OVERRIDE
198 {
199 /* Put the predicate last, as required by mask_fold_left_plus_optab. */
200 e.rotate_inputs_left (0, 3);
201 machine_mode mode = e.vector_mode (0);
202 insn_code icode = direct_optab_handler (mask_fold_left_plus_optab, mode);
203 return e.use_exact_insn (icode);
204 }
205 };
206
207 /* Implements svadr[bhwd]. */
208 class svadr_bhwd_impl : public function_base
209 {
210 public:
211 CONSTEXPR svadr_bhwd_impl (unsigned int shift) : m_shift (shift) {}
212
213 rtx
214 expand (function_expander &e) const OVERRIDE
215 {
216 machine_mode mode = GET_MODE (e.args[0]);
217 if (m_shift == 0)
218 return e.use_exact_insn (code_for_aarch64_adr (mode));
219
220 /* Turn the access size into an extra shift argument. */
221 rtx shift = gen_int_mode (m_shift, GET_MODE_INNER (mode));
222 e.args.quick_push (expand_vector_broadcast (mode, shift));
223 return e.use_exact_insn (code_for_aarch64_adr_shift (mode));
224 }
225
226 /* How many bits left to shift the vector displacement. */
227 unsigned int m_shift;
228 };
229
230 class svbic_impl : public function_base
231 {
232 public:
233 rtx
234 expand (function_expander &e) const OVERRIDE
235 {
236 /* Convert svbic of a constant into svand of its inverse. */
237 if (CONST_INT_P (e.args[2]))
238 {
239 machine_mode mode = GET_MODE_INNER (e.vector_mode (0));
240 e.args[2] = simplify_unary_operation (NOT, mode, e.args[2], mode);
241 return e.map_to_rtx_codes (AND, AND, -1);
242 }
243
244 if (e.type_suffix_ids[0] == TYPE_SUFFIX_b)
245 {
246 gcc_assert (e.pred == PRED_z);
247 return e.use_exact_insn (CODE_FOR_aarch64_pred_bicvnx16bi_z);
248 }
249
250 if (e.pred == PRED_x)
251 return e.use_unpred_insn (code_for_aarch64_bic (e.vector_mode (0)));
252
253 return e.use_cond_insn (code_for_cond_bic (e.vector_mode (0)));
254 }
255 };
256
257 /* Implements svbrkn, svbrkpa and svbrkpb. */
258 class svbrk_binary_impl : public function_base
259 {
260 public:
261 CONSTEXPR svbrk_binary_impl (int unspec) : m_unspec (unspec) {}
262
263 rtx
264 expand (function_expander &e) const OVERRIDE
265 {
266 return e.use_exact_insn (code_for_aarch64_brk (m_unspec));
267 }
268
269 /* The unspec code associated with the operation. */
270 int m_unspec;
271 };
272
273 /* Implements svbrka and svbrkb. */
274 class svbrk_unary_impl : public function_base
275 {
276 public:
277 CONSTEXPR svbrk_unary_impl (int unspec) : m_unspec (unspec) {}
278
279 rtx
280 expand (function_expander &e) const OVERRIDE
281 {
282 return e.use_cond_insn (code_for_aarch64_brk (m_unspec));
283 }
284
285 /* The unspec code associated with the operation. */
286 int m_unspec;
287 };
288
289 class svcadd_impl : public function_base
290 {
291 public:
292 rtx
293 expand (function_expander &e) const OVERRIDE
294 {
295 /* Convert the rotation amount into a specific unspec. */
296 int rot = INTVAL (e.args.pop ());
297 if (rot == 90)
298 return e.map_to_unspecs (UNSPEC_CADD90, UNSPEC_CADD90,
299 UNSPEC_COND_FCADD90);
300 if (rot == 270)
301 return e.map_to_unspecs (UNSPEC_CADD270, UNSPEC_CADD270,
302 UNSPEC_COND_FCADD270);
303 gcc_unreachable ();
304 }
305 };
306
307 /* Implements svclasta and svclastb. */
308 class svclast_impl : public quiet<function_base>
309 {
310 public:
311 CONSTEXPR svclast_impl (int unspec) : m_unspec (unspec) {}
312
313 rtx
314 expand (function_expander &e) const OVERRIDE
315 {
316 /* Match the fold_extract_optab order. */
317 std::swap (e.args[0], e.args[1]);
318 machine_mode mode = e.vector_mode (0);
319 insn_code icode;
320 if (e.mode_suffix_id == MODE_n)
321 icode = code_for_fold_extract (m_unspec, mode);
322 else
323 icode = code_for_aarch64_fold_extract_vector (m_unspec, mode);
324 return e.use_exact_insn (icode);
325 }
326
327 /* The unspec code associated with the operation. */
328 int m_unspec;
329 };
330
331 class svcmla_impl : public function_base
332 {
333 public:
334 rtx
335 expand (function_expander &e) const OVERRIDE
336 {
337 /* Convert the rotation amount into a specific unspec. */
338 int rot = INTVAL (e.args.pop ());
339 if (e.type_suffix (0).float_p)
340 {
341 /* Make the operand order the same as the one used by the fma optabs,
342 with the accumulator last. */
343 e.rotate_inputs_left (1, 4);
344 return e.map_to_unspecs (-1, -1, unspec_cond_fcmla (rot), 3);
345 }
346 else
347 {
348 int cmla = unspec_cmla (rot);
349 return e.map_to_unspecs (cmla, cmla, -1);
350 }
351 }
352 };
353
354 class svcmla_lane_impl : public function_base
355 {
356 public:
357 rtx
358 expand (function_expander &e) const OVERRIDE
359 {
360 /* Convert the rotation amount into a specific unspec. */
361 int rot = INTVAL (e.args.pop ());
362 machine_mode mode = e.vector_mode (0);
363 if (e.type_suffix (0).float_p)
364 {
365 /* Make the operand order the same as the one used by the fma optabs,
366 with the accumulator last. */
367 e.rotate_inputs_left (0, 4);
368 insn_code icode = code_for_aarch64_lane (unspec_fcmla (rot), mode);
369 return e.use_exact_insn (icode);
370 }
371 else
372 {
373 insn_code icode = code_for_aarch64_lane (unspec_cmla (rot), mode);
374 return e.use_exact_insn (icode);
375 }
376 }
377 };
378
379 /* Implements svcmp<cc> (except svcmpuo, which is handled separately). */
380 class svcmp_impl : public function_base
381 {
382 public:
383 CONSTEXPR svcmp_impl (tree_code code, int unspec_for_fp)
384 : m_code (code), m_unspec_for_fp (unspec_for_fp) {}
385
386 gimple *
387 fold (gimple_folder &f) const OVERRIDE
388 {
389 tree pg = gimple_call_arg (f.call, 0);
390 tree rhs1 = gimple_call_arg (f.call, 1);
391 tree rhs2 = gimple_call_arg (f.call, 2);
392
393 /* Convert a ptrue-predicated integer comparison into the corresponding
394 gimple-level operation. */
395 if (integer_all_onesp (pg)
396 && f.type_suffix (0).element_bytes == 1
397 && f.type_suffix (0).integer_p)
398 {
399 gimple_seq stmts = NULL;
400 rhs2 = f.force_vector (stmts, TREE_TYPE (rhs1), rhs2);
401 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
402 return gimple_build_assign (f.lhs, m_code, rhs1, rhs2);
403 }
404
405 return NULL;
406 }
407
408 rtx
409 expand (function_expander &e) const OVERRIDE
410 {
411 machine_mode mode = e.vector_mode (0);
412
413 /* Comparisons are UNSPEC_PRED_Z operations and so need a hint
414 operand. */
415 e.add_ptrue_hint (0, e.gp_mode (0));
416
417 if (e.type_suffix (0).integer_p)
418 {
419 bool unsigned_p = e.type_suffix (0).unsigned_p;
420 rtx_code code = get_rtx_code (m_code, unsigned_p);
421 return e.use_exact_insn (code_for_aarch64_pred_cmp (code, mode));
422 }
423
424 insn_code icode = code_for_aarch64_pred_fcm (m_unspec_for_fp, mode);
425 return e.use_exact_insn (icode);
426 }
427
428 /* The tree code associated with the comparison. */
429 tree_code m_code;
430
431 /* The unspec code to use for floating-point comparisons. */
432 int m_unspec_for_fp;
433 };
434
435 /* Implements svcmp<cc>_wide. */
436 class svcmp_wide_impl : public function_base
437 {
438 public:
439 CONSTEXPR svcmp_wide_impl (tree_code code, int unspec_for_sint,
440 int unspec_for_uint)
441 : m_code (code), m_unspec_for_sint (unspec_for_sint),
442 m_unspec_for_uint (unspec_for_uint) {}
443
444 rtx
445 expand (function_expander &e) const OVERRIDE
446 {
447 machine_mode mode = e.vector_mode (0);
448 bool unsigned_p = e.type_suffix (0).unsigned_p;
449 rtx_code code = get_rtx_code (m_code, unsigned_p);
450
451 /* Comparisons are UNSPEC_PRED_Z operations and so need a hint
452 operand. */
453 e.add_ptrue_hint (0, e.gp_mode (0));
454
455 /* If the argument is a constant that the unwidened comparisons
456 can handle directly, use them instead. */
457 insn_code icode = code_for_aarch64_pred_cmp (code, mode);
458 rtx op2 = unwrap_const_vec_duplicate (e.args[3]);
459 if (CONSTANT_P (op2)
460 && insn_data[icode].operand[4].predicate (op2, DImode))
461 {
462 e.args[3] = op2;
463 return e.use_exact_insn (icode);
464 }
465
466 int unspec = (unsigned_p ? m_unspec_for_uint : m_unspec_for_sint);
467 return e.use_exact_insn (code_for_aarch64_pred_cmp_wide (unspec, mode));
468 }
469
470 /* The tree code associated with the comparison. */
471 tree_code m_code;
472
473 /* The unspec codes for signed and unsigned wide comparisons
474 respectively. */
475 int m_unspec_for_sint;
476 int m_unspec_for_uint;
477 };
478
479 class svcmpuo_impl : public quiet<function_base>
480 {
481 public:
482 rtx
483 expand (function_expander &e) const OVERRIDE
484 {
485 e.add_ptrue_hint (0, e.gp_mode (0));
486 return e.use_exact_insn (code_for_aarch64_pred_fcmuo (e.vector_mode (0)));
487 }
488 };
489
490 class svcnot_impl : public function_base
491 {
492 public:
493 rtx
494 expand (function_expander &e) const OVERRIDE
495 {
496 machine_mode mode = e.vector_mode (0);
497 if (e.pred == PRED_x)
498 {
499 /* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs
500 a ptrue hint. */
501 e.add_ptrue_hint (0, e.gp_mode (0));
502 return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode));
503 }
504
505 return e.use_cond_insn (code_for_cond_cnot (mode), 0);
506 }
507 };
508
509 /* Implements svcnt[bhwd], which count the number of elements
510 in a particular vector mode. */
511 class svcnt_bhwd_impl : public function_base
512 {
513 public:
514 CONSTEXPR svcnt_bhwd_impl (machine_mode ref_mode) : m_ref_mode (ref_mode) {}
515
516 gimple *
517 fold (gimple_folder &f) const OVERRIDE
518 {
519 tree count = build_int_cstu (TREE_TYPE (f.lhs),
520 GET_MODE_NUNITS (m_ref_mode));
521 return gimple_build_assign (f.lhs, count);
522 }
523
524 rtx
525 expand (function_expander &) const OVERRIDE
526 {
527 return gen_int_mode (GET_MODE_NUNITS (m_ref_mode), DImode);
528 }
529
530 /* The mode of the vector associated with the [bhwd] suffix. */
531 machine_mode m_ref_mode;
532 };
533
534 /* Implements svcnt[bhwd]_pat. */
535 class svcnt_bhwd_pat_impl : public svcnt_bhwd_impl
536 {
537 public:
538 CONSTEXPR svcnt_bhwd_pat_impl (machine_mode ref_mode)
539 : svcnt_bhwd_impl (ref_mode) {}
540
541 gimple *
542 fold (gimple_folder &f) const OVERRIDE
543 {
544 tree pattern_arg = gimple_call_arg (f.call, 0);
545 aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg);
546
547 if (pattern == AARCH64_SV_ALL)
548 /* svcvnt[bwhd]_pat (SV_ALL) == svcnt[bwhd] (). */
549 return svcnt_bhwd_impl::fold (f);
550
551 /* See whether we can count the number of elements in the pattern
552 at compile time. */
553 unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode);
554 HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, elements_per_vq);
555 if (value >= 0)
556 {
557 tree count = build_int_cstu (TREE_TYPE (f.lhs), value);
558 return gimple_build_assign (f.lhs, count);
559 }
560
561 return NULL;
562 }
563
564 rtx
565 expand (function_expander &e) const OVERRIDE
566 {
567 unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode);
568 e.args.quick_push (gen_int_mode (elements_per_vq, DImode));
569 e.args.quick_push (const1_rtx);
570 return e.use_exact_insn (CODE_FOR_aarch64_sve_cnt_pat);
571 }
572 };
573
574 class svcntp_impl : public function_base
575 {
576 public:
577 rtx
578 expand (function_expander &e) const OVERRIDE
579 {
580 machine_mode mode = e.vector_mode (0);
581 e.add_ptrue_hint (0, mode);
582 return e.use_exact_insn (code_for_aarch64_pred_cntp (mode));
583 }
584 };
585
586 /* Implements svcreate2, svcreate3 and svcreate4. */
587 class svcreate_impl : public quiet<multi_vector_function>
588 {
589 public:
590 CONSTEXPR svcreate_impl (unsigned int vectors_per_tuple)
591 : quiet<multi_vector_function> (vectors_per_tuple) {}
592
593 gimple *
594 fold (gimple_folder &f) const OVERRIDE
595 {
596 unsigned int nargs = gimple_call_num_args (f.call);
597 tree lhs_type = TREE_TYPE (f.lhs);
598
599 /* Replace the call with a clobber of the result (to prevent it from
600 becoming upwards exposed) followed by stores into each individual
601 vector of tuple.
602
603 The fold routines expect the replacement statement to have the
604 same lhs as the original call, so return the clobber statement
605 rather than the final vector store. */
606 gassign *clobber = gimple_build_assign (f.lhs, build_clobber (lhs_type));
607
608 for (unsigned int i = nargs; i-- > 0; )
609 {
610 tree rhs_vector = gimple_call_arg (f.call, i);
611 tree field = tuple_type_field (TREE_TYPE (f.lhs));
612 tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
613 unshare_expr (f.lhs), field, NULL_TREE);
614 tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector),
615 lhs_array, size_int (i),
616 NULL_TREE, NULL_TREE);
617 gassign *assign = gimple_build_assign (lhs_vector, rhs_vector);
618 gsi_insert_after (f.gsi, assign, GSI_SAME_STMT);
619 }
620 return clobber;
621 }
622
623 rtx
624 expand (function_expander &e) const OVERRIDE
625 {
626 rtx lhs_tuple = e.get_nonoverlapping_reg_target ();
627
628 /* Record that LHS_TUPLE is dead before the first store. */
629 emit_clobber (lhs_tuple);
630 for (unsigned int i = 0; i < e.args.length (); ++i)
631 {
632 /* Use an lvalue subreg to refer to vector I in LHS_TUPLE. */
633 rtx lhs_vector = simplify_gen_subreg (GET_MODE (e.args[i]),
634 lhs_tuple, GET_MODE (lhs_tuple),
635 i * BYTES_PER_SVE_VECTOR);
636 emit_move_insn (lhs_vector, e.args[i]);
637 }
638 return lhs_tuple;
639 }
640 };
641
642 class svcvt_impl : public function_base
643 {
644 public:
645 rtx
646 expand (function_expander &e) const OVERRIDE
647 {
648 machine_mode mode0 = e.vector_mode (0);
649 machine_mode mode1 = e.vector_mode (1);
650 insn_code icode;
651 /* All this complication comes from the need to select four things
652 simultaneously:
653
654 (1) the kind of conversion (int<-float, float<-int, float<-float)
655 (2) signed vs. unsigned integers, where relevant
656 (3) the predication mode, which must be the wider of the predication
657 modes for MODE0 and MODE1
658 (4) the predication type (m, x or z)
659
660 The only supported int<->float conversions for which the integer is
661 narrower than the float are SI<->DF. It's therefore more convenient
662 to handle (3) by defining two patterns for int<->float conversions:
663 one in which the integer is at least as wide as the float and so
664 determines the predication mode, and another single SI<->DF pattern
665 in which the float's mode determines the predication mode (which is
666 always VNx2BI in that case).
667
668 The names of the patterns follow the optab convention of giving
669 the source mode before the destination mode. */
670 if (e.type_suffix (1).integer_p)
671 {
672 int unspec = (e.type_suffix (1).unsigned_p
673 ? UNSPEC_COND_UCVTF
674 : UNSPEC_COND_SCVTF);
675 if (e.type_suffix (0).element_bytes <= e.type_suffix (1).element_bytes)
676 icode = (e.pred == PRED_x
677 ? code_for_aarch64_sve_nonextend (unspec, mode1, mode0)
678 : code_for_cond_nonextend (unspec, mode1, mode0));
679 else
680 icode = (e.pred == PRED_x
681 ? code_for_aarch64_sve_extend (unspec, mode1, mode0)
682 : code_for_cond_extend (unspec, mode1, mode0));
683 }
684 else
685 {
686 int unspec = (!e.type_suffix (0).integer_p ? UNSPEC_COND_FCVT
687 : e.type_suffix (0).unsigned_p ? UNSPEC_COND_FCVTZU
688 : UNSPEC_COND_FCVTZS);
689 if (e.type_suffix (0).element_bytes >= e.type_suffix (1).element_bytes)
690 icode = (e.pred == PRED_x
691 ? code_for_aarch64_sve_nontrunc (unspec, mode1, mode0)
692 : code_for_cond_nontrunc (unspec, mode1, mode0));
693 else
694 icode = (e.pred == PRED_x
695 ? code_for_aarch64_sve_trunc (unspec, mode1, mode0)
696 : code_for_cond_trunc (unspec, mode1, mode0));
697 }
698
699 if (e.pred == PRED_x)
700 return e.use_pred_x_insn (icode);
701 return e.use_cond_insn (icode);
702 }
703 };
704
705 class svdot_impl : public function_base
706 {
707 public:
708 rtx
709 expand (function_expander &e) const OVERRIDE
710 {
711 /* In the optab, the multiplication operands come before the accumulator
712 operand. The optab is keyed off the multiplication mode. */
713 e.rotate_inputs_left (0, 3);
714 insn_code icode
715 = e.direct_optab_handler_for_sign (sdot_prod_optab, udot_prod_optab,
716 0, GET_MODE (e.args[0]));
717 return e.use_unpred_insn (icode);
718 }
719 };
720
721 class svdotprod_lane_impl : public unspec_based_function_base
722 {
723 public:
724 CONSTEXPR svdotprod_lane_impl (int unspec_for_sint,
725 int unspec_for_uint,
726 int unspec_for_float)
727 : unspec_based_function_base (unspec_for_sint,
728 unspec_for_uint,
729 unspec_for_float) {}
730
731 rtx
732 expand (function_expander &e) const OVERRIDE
733 {
734 /* Use the same ordering as the dot_prod_optab, with the
735 accumulator last. */
736 e.rotate_inputs_left (0, 4);
737 int unspec = unspec_for (e);
738 machine_mode mode = e.vector_mode (0);
739 return e.use_exact_insn (code_for_aarch64_dot_prod_lane (unspec, mode));
740 }
741 };
742
743 class svdup_impl : public quiet<function_base>
744 {
745 public:
746 gimple *
747 fold (gimple_folder &f) const OVERRIDE
748 {
749 tree vec_type = TREE_TYPE (f.lhs);
750 tree rhs = gimple_call_arg (f.call, f.pred == PRED_none ? 0 : 1);
751
752 if (f.pred == PRED_none || f.pred == PRED_x)
753 {
754 if (CONSTANT_CLASS_P (rhs))
755 {
756 if (f.type_suffix (0).bool_p)
757 return (tree_to_shwi (rhs)
758 ? f.fold_to_ptrue ()
759 : f.fold_to_pfalse ());
760
761 tree rhs_vector = build_vector_from_val (vec_type, rhs);
762 return gimple_build_assign (f.lhs, rhs_vector);
763 }
764
765 /* Avoid folding _b to a VEC_DUPLICATE_EXPR, since to do that we
766 would need to introduce an extra and unwanted conversion to
767 the truth vector element type. */
768 if (!f.type_suffix (0).bool_p)
769 return gimple_build_assign (f.lhs, VEC_DUPLICATE_EXPR, rhs);
770 }
771
772 /* svdup_z (pg, x) == VEC_COND_EXPR <pg, VEC_DUPLICATE_EXPR <x>, 0>. */
773 if (f.pred == PRED_z)
774 {
775 gimple_seq stmts = NULL;
776 tree pred = f.convert_pred (stmts, vec_type, 0);
777 rhs = f.force_vector (stmts, vec_type, rhs);
778 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
779 return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, rhs,
780 build_zero_cst (vec_type));
781 }
782
783 return NULL;
784 }
785
786 rtx
787 expand (function_expander &e) const OVERRIDE
788 {
789 if (e.pred == PRED_none || e.pred == PRED_x)
790 /* There's no benefit to using predicated instructions for _x here. */
791 return e.use_unpred_insn (e.direct_optab_handler (vec_duplicate_optab));
792
793 /* Model predicated svdups as a SEL in which the "true" value is
794 the duplicate of the function argument and the "false" value
795 is the value of inactive lanes. */
796 insn_code icode;
797 machine_mode mode = e.vector_mode (0);
798 if (valid_for_const_vector_p (GET_MODE_INNER (mode), e.args.last ()))
799 /* Duplicate the constant to fill a vector. The pattern optimizes
800 various cases involving constant operands, falling back to SEL
801 if necessary. */
802 icode = code_for_vcond_mask (mode, mode);
803 else
804 /* Use the pattern for selecting between a duplicated scalar
805 variable and a vector fallback. */
806 icode = code_for_aarch64_sel_dup (mode);
807 return e.use_vcond_mask_insn (icode);
808 }
809 };
810
811 class svdup_lane_impl : public quiet<function_base>
812 {
813 public:
814 rtx
815 expand (function_expander &e) const OVERRIDE
816 {
817 /* The native DUP lane has an index range of 64 bytes. */
818 machine_mode mode = e.vector_mode (0);
819 if (CONST_INT_P (e.args[1])
820 && IN_RANGE (INTVAL (e.args[1]) * GET_MODE_UNIT_SIZE (mode), 0, 63))
821 return e.use_exact_insn (code_for_aarch64_sve_dup_lane (mode));
822
823 /* Treat svdup_lane as if it were svtbl_n. */
824 return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0)));
825 }
826 };
827
828 class svdupq_impl : public quiet<function_base>
829 {
830 public:
831 gimple *
832 fold (gimple_folder &f) const OVERRIDE
833 {
834 tree vec_type = TREE_TYPE (f.lhs);
835 unsigned int nargs = gimple_call_num_args (f.call);
836 /* For predicates, pad out each argument so that we have one element
837 per bit. */
838 unsigned int factor = (f.type_suffix (0).bool_p
839 ? f.type_suffix (0).element_bytes : 1);
840 tree_vector_builder builder (vec_type, nargs * factor, 1);
841 for (unsigned int i = 0; i < nargs; ++i)
842 {
843 tree elt = gimple_call_arg (f.call, i);
844 if (!CONSTANT_CLASS_P (elt))
845 return NULL;
846 builder.quick_push (elt);
847 for (unsigned int j = 1; j < factor; ++j)
848 builder.quick_push (build_zero_cst (TREE_TYPE (vec_type)));
849 }
850 return gimple_build_assign (f.lhs, builder.build ());
851 }
852
853 rtx
854 expand (function_expander &e) const OVERRIDE
855 {
856 machine_mode mode = e.vector_mode (0);
857 unsigned int elements_per_vq = e.args.length ();
858 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
859 {
860 /* Construct a vector of integers so that we can compare them against
861 zero below. Zero vs. nonzero is the only distinction that
862 matters. */
863 mode = aarch64_sve_int_mode (mode);
864 for (unsigned int i = 0; i < elements_per_vq; ++i)
865 e.args[i] = simplify_gen_unary (ZERO_EXTEND, GET_MODE_INNER (mode),
866 e.args[i], QImode);
867 }
868
869 /* Get the 128-bit Advanced SIMD vector for this data size. */
870 scalar_mode element_mode = GET_MODE_INNER (mode);
871 machine_mode vq_mode = aarch64_vq_mode (element_mode).require ();
872 gcc_assert (known_eq (elements_per_vq, GET_MODE_NUNITS (vq_mode)));
873
874 /* Put the arguments into a 128-bit Advanced SIMD vector. We want
875 argument N to go into architectural lane N, whereas Advanced SIMD
876 vectors are loaded memory lsb to register lsb. We therefore need
877 to reverse the elements for big-endian targets. */
878 rtx vq_reg = gen_reg_rtx (vq_mode);
879 rtvec vec = rtvec_alloc (elements_per_vq);
880 for (unsigned int i = 0; i < elements_per_vq; ++i)
881 {
882 unsigned int argno = BYTES_BIG_ENDIAN ? elements_per_vq - i - 1 : i;
883 RTVEC_ELT (vec, i) = e.args[argno];
884 }
885 aarch64_expand_vector_init (vq_reg, gen_rtx_PARALLEL (vq_mode, vec));
886
887 /* If the result is a boolean, compare the data vector against zero. */
888 if (mode != e.vector_mode (0))
889 {
890 rtx data_dupq = aarch64_expand_sve_dupq (NULL, mode, vq_reg);
891 return aarch64_convert_sve_data_to_pred (e.possible_target,
892 e.vector_mode (0), data_dupq);
893 }
894
895 return aarch64_expand_sve_dupq (e.possible_target, mode, vq_reg);
896 }
897 };
898
899 class svdupq_lane_impl : public quiet<function_base>
900 {
901 public:
902 rtx
903 expand (function_expander &e) const OVERRIDE
904 {
905 machine_mode mode = e.vector_mode (0);
906 rtx index = e.args[1];
907 if (CONST_INT_P (index) && IN_RANGE (INTVAL (index), 0, 3))
908 {
909 /* Use the .Q form of DUP, which is the native instruction for
910 this function. */
911 insn_code icode = code_for_aarch64_sve_dupq_lane (mode);
912 unsigned int num_indices = e.elements_per_vq (0);
913 rtx indices = aarch64_gen_stepped_int_parallel
914 (num_indices, INTVAL (index) * num_indices, 1);
915
916 e.add_output_operand (icode);
917 e.add_input_operand (icode, e.args[0]);
918 e.add_fixed_operand (indices);
919 return e.generate_insn (icode);
920 }
921
922 /* Build a .D TBL index for the pairs of doublewords that we want to
923 duplicate. */
924 if (CONST_INT_P (index))
925 {
926 /* The index vector is a constant. */
927 rtx_vector_builder builder (VNx2DImode, 2, 1);
928 builder.quick_push (gen_int_mode (INTVAL (index) * 2, DImode));
929 builder.quick_push (gen_int_mode (INTVAL (index) * 2 + 1, DImode));
930 index = builder.build ();
931 }
932 else
933 {
934 /* Duplicate INDEX * 2 to fill a DImode vector. The ACLE spec
935 explicitly allows the top of the index to be dropped. */
936 index = force_reg (DImode, simplify_gen_binary (ASHIFT, DImode,
937 index, const1_rtx));
938 index = expand_vector_broadcast (VNx2DImode, index);
939
940 /* Get an alternating 0, 1 predicate. */
941 rtx_vector_builder builder (VNx2BImode, 2, 1);
942 builder.quick_push (const0_rtx);
943 builder.quick_push (constm1_rtx);
944 rtx pg = force_reg (VNx2BImode, builder.build ());
945
946 /* Add one to the odd elements of the index. */
947 rtx one = force_reg (VNx2DImode, CONST1_RTX (VNx2DImode));
948 rtx target = gen_reg_rtx (VNx2DImode);
949 emit_insn (gen_cond_addvnx2di (target, pg, index, one, index));
950 index = target;
951 }
952
953 e.args[0] = gen_lowpart (VNx2DImode, e.args[0]);
954 e.args[1] = index;
955 return e.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di);
956 }
957 };
958
959 /* Implements svextb, svexth and svextw. */
960 class svext_bhw_impl : public function_base
961 {
962 public:
963 CONSTEXPR svext_bhw_impl (scalar_int_mode from_mode)
964 : m_from_mode (from_mode) {}
965
966 rtx
967 expand (function_expander &e) const OVERRIDE
968 {
969 if (e.type_suffix (0).unsigned_p)
970 {
971 /* Convert to an AND. The widest we go is 0xffffffff, which fits
972 in a CONST_INT. */
973 e.args.quick_push (GEN_INT (GET_MODE_MASK (m_from_mode)));
974 if (e.pred == PRED_m)
975 /* We now have arguments "(inactive, pg, op, mask)". Convert this
976 to "(pg, op, mask, inactive)" so that the order matches svand_m
977 with an extra argument on the end. Take the inactive elements
978 from this extra argument. */
979 e.rotate_inputs_left (0, 4);
980 return e.map_to_rtx_codes (AND, AND, -1, 3);
981 }
982
983 machine_mode wide_mode = e.vector_mode (0);
984 poly_uint64 nunits = GET_MODE_NUNITS (wide_mode);
985 machine_mode narrow_mode
986 = aarch64_sve_data_mode (m_from_mode, nunits).require ();
987 if (e.pred == PRED_x)
988 {
989 insn_code icode = code_for_aarch64_pred_sxt (wide_mode, narrow_mode);
990 return e.use_pred_x_insn (icode);
991 }
992
993 insn_code icode = code_for_aarch64_cond_sxt (wide_mode, narrow_mode);
994 return e.use_cond_insn (icode);
995 }
996
997 /* The element mode that we're extending from. */
998 scalar_int_mode m_from_mode;
999 };
1000
1001 /* Implements svget2, svget3 and svget4. */
1002 class svget_impl : public quiet<multi_vector_function>
1003 {
1004 public:
1005 CONSTEXPR svget_impl (unsigned int vectors_per_tuple)
1006 : quiet<multi_vector_function> (vectors_per_tuple) {}
1007
1008 gimple *
1009 fold (gimple_folder &f) const OVERRIDE
1010 {
1011 /* Fold into a normal gimple component access. */
1012 tree rhs_tuple = gimple_call_arg (f.call, 0);
1013 tree index = gimple_call_arg (f.call, 1);
1014 tree field = tuple_type_field (TREE_TYPE (rhs_tuple));
1015 tree rhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
1016 rhs_tuple, field, NULL_TREE);
1017 tree rhs_vector = build4 (ARRAY_REF, TREE_TYPE (f.lhs),
1018 rhs_array, index, NULL_TREE, NULL_TREE);
1019 return gimple_build_assign (f.lhs, rhs_vector);
1020 }
1021
1022 rtx
1023 expand (function_expander &e) const OVERRIDE
1024 {
1025 /* Fold the access into a subreg rvalue. */
1026 return simplify_gen_subreg (e.vector_mode (0), e.args[0],
1027 GET_MODE (e.args[0]),
1028 INTVAL (e.args[1]) * BYTES_PER_SVE_VECTOR);
1029 }
1030 };
1031
1032 class svindex_impl : public function_base
1033 {
1034 public:
1035 rtx
1036 expand (function_expander &e) const OVERRIDE
1037 {
1038 return e.use_exact_insn (e.direct_optab_handler (vec_series_optab));
1039 }
1040 };
1041
1042 class svinsr_impl : public quiet<function_base>
1043 {
1044 public:
1045 gimple *
1046 fold (gimple_folder &f) const OVERRIDE
1047 {
1048 gcall *new_call = gimple_build_call_internal (IFN_VEC_SHL_INSERT, 2,
1049 gimple_call_arg (f.call, 0),
1050 gimple_call_arg (f.call, 1));
1051 gimple_call_set_lhs (new_call, f.lhs);
1052 return new_call;
1053 }
1054
1055 rtx
1056 expand (function_expander &e) const OVERRIDE
1057 {
1058 insn_code icode = direct_optab_handler (vec_shl_insert_optab,
1059 e.vector_mode (0));
1060 return e.use_exact_insn (icode);
1061 }
1062 };
1063
1064 /* Implements svlasta and svlastb. */
1065 class svlast_impl : public quiet<function_base>
1066 {
1067 public:
1068 CONSTEXPR svlast_impl (int unspec) : m_unspec (unspec) {}
1069
1070 rtx
1071 expand (function_expander &e) const OVERRIDE
1072 {
1073 return e.use_exact_insn (code_for_extract (m_unspec, e.vector_mode (0)));
1074 }
1075
1076 /* The unspec code associated with the operation. */
1077 int m_unspec;
1078 };
1079
1080 class svld1_impl : public full_width_access
1081 {
1082 public:
1083 unsigned int
1084 call_properties (const function_instance &) const OVERRIDE
1085 {
1086 return CP_READ_MEMORY;
1087 }
1088
1089 gimple *
1090 fold (gimple_folder &f) const OVERRIDE
1091 {
1092 tree vectype = f.vector_type (0);
1093
1094 /* Get the predicate and base pointer. */
1095 gimple_seq stmts = NULL;
1096 tree pred = f.convert_pred (stmts, vectype, 0);
1097 tree base = f.fold_contiguous_base (stmts, vectype);
1098 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
1099
1100 tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
1101 gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
1102 base, cookie, pred);
1103 gimple_call_set_lhs (new_call, f.lhs);
1104 return new_call;
1105 }
1106
1107 rtx
1108 expand (function_expander &e) const OVERRIDE
1109 {
1110 insn_code icode = convert_optab_handler (maskload_optab,
1111 e.vector_mode (0), e.gp_mode (0));
1112 return e.use_contiguous_load_insn (icode);
1113 }
1114 };
1115
1116 /* Implements extending contiguous forms of svld1. */
1117 class svld1_extend_impl : public extending_load
1118 {
1119 public:
1120 CONSTEXPR svld1_extend_impl (type_suffix_index memory_type)
1121 : extending_load (memory_type) {}
1122
1123 rtx
1124 expand (function_expander &e) const OVERRIDE
1125 {
1126 insn_code icode = code_for_aarch64_load (extend_rtx_code (),
1127 e.vector_mode (0),
1128 e.memory_vector_mode ());
1129 return e.use_contiguous_load_insn (icode);
1130 }
1131 };
1132
1133 class svld1_gather_impl : public full_width_access
1134 {
1135 public:
1136 unsigned int
1137 call_properties (const function_instance &) const OVERRIDE
1138 {
1139 return CP_READ_MEMORY;
1140 }
1141
1142 rtx
1143 expand (function_expander &e) const OVERRIDE
1144 {
1145 e.prepare_gather_address_operands (1);
1146 /* Put the predicate last, as required by mask_gather_load_optab. */
1147 e.rotate_inputs_left (0, 5);
1148 machine_mode mem_mode = e.memory_vector_mode ();
1149 machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
1150 insn_code icode = convert_optab_handler (mask_gather_load_optab,
1151 mem_mode, int_mode);
1152 return e.use_exact_insn (icode);
1153 }
1154 };
1155
1156 /* Implements extending forms of svld1_gather. */
1157 class svld1_gather_extend_impl : public extending_load
1158 {
1159 public:
1160 CONSTEXPR svld1_gather_extend_impl (type_suffix_index memory_type)
1161 : extending_load (memory_type) {}
1162
1163 rtx
1164 expand (function_expander &e) const OVERRIDE
1165 {
1166 e.prepare_gather_address_operands (1);
1167 /* Put the predicate last, since the extending gathers use the same
1168 operand order as mask_gather_load_optab. */
1169 e.rotate_inputs_left (0, 5);
1170 /* Add a constant predicate for the extension rtx. */
1171 e.args.quick_push (CONSTM1_RTX (VNx16BImode));
1172 insn_code icode = code_for_aarch64_gather_load (extend_rtx_code (),
1173 e.vector_mode (0),
1174 e.memory_vector_mode ());
1175 return e.use_exact_insn (icode);
1176 }
1177 };
1178
1179 class load_replicate : public function_base
1180 {
1181 public:
1182 unsigned int
1183 call_properties (const function_instance &) const OVERRIDE
1184 {
1185 return CP_READ_MEMORY;
1186 }
1187
1188 tree
1189 memory_scalar_type (const function_instance &fi) const OVERRIDE
1190 {
1191 return fi.scalar_type (0);
1192 }
1193 };
1194
1195 class svld1rq_impl : public load_replicate
1196 {
1197 public:
1198 machine_mode
1199 memory_vector_mode (const function_instance &fi) const OVERRIDE
1200 {
1201 return aarch64_vq_mode (GET_MODE_INNER (fi.vector_mode (0))).require ();
1202 }
1203
1204 rtx
1205 expand (function_expander &e) const OVERRIDE
1206 {
1207 insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0));
1208 return e.use_contiguous_load_insn (icode);
1209 }
1210 };
1211
1212 class svld1ro_impl : public load_replicate
1213 {
1214 public:
1215 machine_mode
1216 memory_vector_mode (const function_instance &) const OVERRIDE
1217 {
1218 return OImode;
1219 }
1220
1221 rtx
1222 expand (function_expander &e) const OVERRIDE
1223 {
1224 insn_code icode = code_for_aarch64_sve_ld1ro (e.vector_mode (0));
1225 return e.use_contiguous_load_insn (icode);
1226 }
1227 };
1228
1229 /* Implements svld2, svld3 and svld4. */
1230 class svld234_impl : public full_width_access
1231 {
1232 public:
1233 CONSTEXPR svld234_impl (unsigned int vectors_per_tuple)
1234 : full_width_access (vectors_per_tuple) {}
1235
1236 unsigned int
1237 call_properties (const function_instance &) const OVERRIDE
1238 {
1239 return CP_READ_MEMORY;
1240 }
1241
1242 gimple *
1243 fold (gimple_folder &f) const OVERRIDE
1244 {
1245 tree tuple_type = TREE_TYPE (f.lhs);
1246 tree vectype = f.vector_type (0);
1247
1248 /* Get the predicate and base pointer. */
1249 gimple_seq stmts = NULL;
1250 tree pred = f.convert_pred (stmts, vectype, 0);
1251 tree base = f.fold_contiguous_base (stmts, vectype);
1252 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
1253
1254 /* Emit two statements: a clobber of the lhs, so that it isn't
1255 upwards exposed, and then the load itself.
1256
1257 The fold routines expect the replacement statement to have the
1258 same lhs as the original call, so return the clobber statement
1259 rather than the load. */
1260 gimple *clobber = gimple_build_assign (f.lhs, build_clobber (tuple_type));
1261
1262 /* View the loaded data as an array of vectors. */
1263 tree field = tuple_type_field (tuple_type);
1264 tree lhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field),
1265 unshare_expr (f.lhs));
1266
1267 /* Emit the load itself. */
1268 tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
1269 gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
1270 base, cookie, pred);
1271 gimple_call_set_lhs (new_call, lhs_array);
1272 gsi_insert_after (f.gsi, new_call, GSI_SAME_STMT);
1273
1274 return clobber;
1275 }
1276
1277 rtx
1278 expand (function_expander &e) const OVERRIDE
1279 {
1280 machine_mode tuple_mode = TYPE_MODE (TREE_TYPE (e.call_expr));
1281 insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab,
1282 tuple_mode, e.vector_mode (0));
1283 return e.use_contiguous_load_insn (icode);
1284 }
1285 };
1286
1287 class svldff1_gather_impl : public full_width_access
1288 {
1289 public:
1290 unsigned int
1291 call_properties (const function_instance &) const OVERRIDE
1292 {
1293 return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
1294 }
1295
1296 rtx
1297 expand (function_expander &e) const OVERRIDE
1298 {
1299 /* See the block comment in aarch64-sve.md for details about the
1300 FFR handling. */
1301 emit_insn (gen_aarch64_update_ffr_for_load ());
1302
1303 e.prepare_gather_address_operands (1);
1304 /* Put the predicate last, since ldff1_gather uses the same operand
1305 order as mask_gather_load_optab. */
1306 e.rotate_inputs_left (0, 5);
1307 machine_mode mem_mode = e.memory_vector_mode ();
1308 return e.use_exact_insn (code_for_aarch64_ldff1_gather (mem_mode));
1309 }
1310 };
1311
1312 /* Implements extending forms of svldff1_gather. */
1313 class svldff1_gather_extend : public extending_load
1314 {
1315 public:
1316 CONSTEXPR svldff1_gather_extend (type_suffix_index memory_type)
1317 : extending_load (memory_type) {}
1318
1319 rtx
1320 expand (function_expander &e) const OVERRIDE
1321 {
1322 /* See the block comment in aarch64-sve.md for details about the
1323 FFR handling. */
1324 emit_insn (gen_aarch64_update_ffr_for_load ());
1325
1326 e.prepare_gather_address_operands (1);
1327 /* Put the predicate last, since ldff1_gather uses the same operand
1328 order as mask_gather_load_optab. */
1329 e.rotate_inputs_left (0, 5);
1330 /* Add a constant predicate for the extension rtx. */
1331 e.args.quick_push (CONSTM1_RTX (VNx16BImode));
1332 insn_code icode = code_for_aarch64_ldff1_gather (extend_rtx_code (),
1333 e.vector_mode (0),
1334 e.memory_vector_mode ());
1335 return e.use_exact_insn (icode);
1336 }
1337 };
1338
1339 class svldnt1_impl : public full_width_access
1340 {
1341 public:
1342 unsigned int
1343 call_properties (const function_instance &) const OVERRIDE
1344 {
1345 return CP_READ_MEMORY;
1346 }
1347
1348 rtx
1349 expand (function_expander &e) const OVERRIDE
1350 {
1351 insn_code icode = code_for_aarch64_ldnt1 (e.vector_mode (0));
1352 return e.use_contiguous_load_insn (icode);
1353 }
1354 };
1355
1356 /* Implements svldff1 and svldnf1. */
1357 class svldxf1_impl : public full_width_access
1358 {
1359 public:
1360 CONSTEXPR svldxf1_impl (int unspec) : m_unspec (unspec) {}
1361
1362 unsigned int
1363 call_properties (const function_instance &) const OVERRIDE
1364 {
1365 return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
1366 }
1367
1368 rtx
1369 expand (function_expander &e) const OVERRIDE
1370 {
1371 /* See the block comment in aarch64-sve.md for details about the
1372 FFR handling. */
1373 emit_insn (gen_aarch64_update_ffr_for_load ());
1374
1375 machine_mode mode = e.vector_mode (0);
1376 return e.use_contiguous_load_insn (code_for_aarch64_ldf1 (m_unspec, mode));
1377 }
1378
1379 /* The unspec associated with the load. */
1380 int m_unspec;
1381 };
1382
1383 /* Implements extending contiguous forms of svldff1 and svldnf1. */
1384 class svldxf1_extend_impl : public extending_load
1385 {
1386 public:
1387 CONSTEXPR svldxf1_extend_impl (type_suffix_index memory_type, int unspec)
1388 : extending_load (memory_type), m_unspec (unspec) {}
1389
1390 unsigned int
1391 call_properties (const function_instance &) const OVERRIDE
1392 {
1393 return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
1394 }
1395
1396 rtx
1397 expand (function_expander &e) const OVERRIDE
1398 {
1399 /* See the block comment in aarch64-sve.md for details about the
1400 FFR handling. */
1401 emit_insn (gen_aarch64_update_ffr_for_load ());
1402
1403 insn_code icode = code_for_aarch64_ldf1 (m_unspec, extend_rtx_code (),
1404 e.vector_mode (0),
1405 e.memory_vector_mode ());
1406 return e.use_contiguous_load_insn (icode);
1407 }
1408
1409 /* The unspec associated with the load. */
1410 int m_unspec;
1411 };
1412
1413 class svlen_impl : public quiet<function_base>
1414 {
1415 public:
1416 gimple *
1417 fold (gimple_folder &f) const OVERRIDE
1418 {
1419 /* The argument only exists for its type. */
1420 tree rhs_type = TREE_TYPE (gimple_call_arg (f.call, 0));
1421 tree count = build_int_cstu (TREE_TYPE (f.lhs),
1422 TYPE_VECTOR_SUBPARTS (rhs_type));
1423 return gimple_build_assign (f.lhs, count);
1424 }
1425
1426 rtx
1427 expand (function_expander &e) const OVERRIDE
1428 {
1429 /* The argument only exists for its type. */
1430 return gen_int_mode (GET_MODE_NUNITS (e.vector_mode (0)), DImode);
1431 }
1432 };
1433
1434 class svmad_impl : public function_base
1435 {
1436 public:
1437 rtx
1438 expand (function_expander &e) const OVERRIDE
1439 {
1440 return expand_mad (e);
1441 }
1442 };
1443
1444 class svmla_impl : public function_base
1445 {
1446 public:
1447 rtx
1448 expand (function_expander &e) const OVERRIDE
1449 {
1450 /* Put the accumulator at the end (argument 3), but keep it as the
1451 merge input for _m functions. */
1452 e.rotate_inputs_left (1, 4);
1453 return expand_mad (e, 3);
1454 }
1455 };
1456
1457 class svmla_lane_impl : public function_base
1458 {
1459 public:
1460 rtx
1461 expand (function_expander &e) const OVERRIDE
1462 {
1463 if (e.type_suffix (0).integer_p)
1464 {
1465 machine_mode mode = e.vector_mode (0);
1466 return e.use_exact_insn (code_for_aarch64_sve_add_mul_lane (mode));
1467 }
1468 return expand_mla_mls_lane (e, UNSPEC_FMLA);
1469 }
1470 };
1471
1472 class svmls_impl : public function_base
1473 {
1474 public:
1475 rtx
1476 expand (function_expander &e) const OVERRIDE
1477 {
1478 /* Put the accumulator at the end (argument 3), but keep it as the
1479 merge input for _m functions. */
1480 e.rotate_inputs_left (1, 4);
1481 return expand_msb (e, 3);
1482 }
1483 };
1484
1485 class svmov_impl : public function_base
1486 {
1487 public:
1488 gimple *
1489 fold (gimple_folder &f) const OVERRIDE
1490 {
1491 return gimple_build_assign (f.lhs, BIT_AND_EXPR,
1492 gimple_call_arg (f.call, 0),
1493 gimple_call_arg (f.call, 1));
1494 }
1495
1496 rtx
1497 expand (function_expander &e) const OVERRIDE
1498 {
1499 /* The canonical form for the assembler alias "MOV Pa.B, Pb/Z, Pc.B"
1500 is "AND Pa.B, Pb/Z, Pc.B, Pc.B". */
1501 gcc_assert (e.pred == PRED_z);
1502 e.args.quick_push (e.args[1]);
1503 return e.use_exact_insn (CODE_FOR_aarch64_pred_andvnx16bi_z);
1504 }
1505 };
1506
1507 class svmls_lane_impl : public function_base
1508 {
1509 public:
1510 rtx
1511 expand (function_expander &e) const OVERRIDE
1512 {
1513 if (e.type_suffix (0).integer_p)
1514 {
1515 machine_mode mode = e.vector_mode (0);
1516 return e.use_exact_insn (code_for_aarch64_sve_sub_mul_lane (mode));
1517 }
1518 return expand_mla_mls_lane (e, UNSPEC_FMLS);
1519 }
1520 };
1521
1522 class svmmla_impl : public function_base
1523 {
1524 public:
1525 rtx
1526 expand (function_expander &e) const OVERRIDE
1527 {
1528 insn_code icode;
1529 if (e.type_suffix (0).integer_p)
1530 {
1531 if (e.type_suffix (0).unsigned_p)
1532 icode = code_for_aarch64_sve_add (UNSPEC_UMATMUL, e.vector_mode (0));
1533 else
1534 icode = code_for_aarch64_sve_add (UNSPEC_SMATMUL, e.vector_mode (0));
1535 }
1536 else
1537 icode = code_for_aarch64_sve (UNSPEC_FMMLA, e.vector_mode (0));
1538 return e.use_exact_insn (icode);
1539 }
1540 };
1541
1542 class svmsb_impl : public function_base
1543 {
1544 public:
1545 rtx
1546 expand (function_expander &e) const OVERRIDE
1547 {
1548 return expand_msb (e);
1549 }
1550 };
1551
1552 class svnand_impl : public function_base
1553 {
1554 public:
1555 rtx
1556 expand (function_expander &e) const OVERRIDE
1557 {
1558 gcc_assert (e.pred == PRED_z);
1559 return e.use_exact_insn (CODE_FOR_aarch64_pred_nandvnx16bi_z);
1560 }
1561 };
1562
1563 class svnor_impl : public function_base
1564 {
1565 public:
1566 rtx
1567 expand (function_expander &e) const OVERRIDE
1568 {
1569 gcc_assert (e.pred == PRED_z);
1570 return e.use_exact_insn (CODE_FOR_aarch64_pred_norvnx16bi_z);
1571 }
1572 };
1573
1574 class svnot_impl : public rtx_code_function
1575 {
1576 public:
1577 CONSTEXPR svnot_impl () : rtx_code_function (NOT, NOT, -1) {}
1578
1579 rtx
1580 expand (function_expander &e) const OVERRIDE
1581 {
1582 if (e.type_suffix_ids[0] == TYPE_SUFFIX_b)
1583 {
1584 /* The canonical form for the assembler alias "NOT Pa.B, Pb/Z, Pc.B"
1585 is "EOR Pa.B, Pb/Z, Pb.B, Pc.B". */
1586 gcc_assert (e.pred == PRED_z);
1587 e.args.quick_insert (1, e.args[0]);
1588 return e.use_exact_insn (CODE_FOR_aarch64_pred_xorvnx16bi_z);
1589 }
1590 return rtx_code_function::expand (e);
1591 }
1592 };
1593
1594 class svorn_impl : public function_base
1595 {
1596 public:
1597 rtx
1598 expand (function_expander &e) const OVERRIDE
1599 {
1600 gcc_assert (e.pred == PRED_z);
1601 return e.use_exact_insn (CODE_FOR_aarch64_pred_ornvnx16bi_z);
1602 }
1603 };
1604
1605 class svpfalse_impl : public function_base
1606 {
1607 public:
1608 gimple *
1609 fold (gimple_folder &f) const OVERRIDE
1610 {
1611 return f.fold_to_pfalse ();
1612 }
1613
1614 rtx
1615 expand (function_expander &) const OVERRIDE
1616 {
1617 return CONST0_RTX (VNx16BImode);
1618 }
1619 };
1620
1621 /* Implements svpfirst and svpnext, which share the same .md patterns. */
1622 class svpfirst_svpnext_impl : public function_base
1623 {
1624 public:
1625 CONSTEXPR svpfirst_svpnext_impl (int unspec) : m_unspec (unspec) {}
1626
1627 rtx
1628 expand (function_expander &e) const OVERRIDE
1629 {
1630 machine_mode mode = e.vector_mode (0);
1631 e.add_ptrue_hint (0, mode);
1632 return e.use_exact_insn (code_for_aarch64_sve (m_unspec, mode));
1633 }
1634
1635 /* The unspec associated with the operation. */
1636 int m_unspec;
1637 };
1638
1639 /* Implements contiguous forms of svprf[bhwd]. */
1640 class svprf_bhwd_impl : public function_base
1641 {
1642 public:
1643 CONSTEXPR svprf_bhwd_impl (machine_mode mode) : m_mode (mode) {}
1644
1645 unsigned int
1646 call_properties (const function_instance &) const OVERRIDE
1647 {
1648 return CP_PREFETCH_MEMORY;
1649 }
1650
1651 rtx
1652 expand (function_expander &e) const OVERRIDE
1653 {
1654 e.prepare_prefetch_operands ();
1655 insn_code icode = code_for_aarch64_sve_prefetch (m_mode);
1656 return e.use_contiguous_prefetch_insn (icode);
1657 }
1658
1659 /* The mode that we'd use to hold one vector of prefetched data. */
1660 machine_mode m_mode;
1661 };
1662
1663 /* Implements svprf[bhwd]_gather. */
1664 class svprf_bhwd_gather_impl : public function_base
1665 {
1666 public:
1667 CONSTEXPR svprf_bhwd_gather_impl (machine_mode mode) : m_mode (mode) {}
1668
1669 unsigned int
1670 call_properties (const function_instance &) const OVERRIDE
1671 {
1672 return CP_PREFETCH_MEMORY;
1673 }
1674
1675 machine_mode
1676 memory_vector_mode (const function_instance &) const OVERRIDE
1677 {
1678 return m_mode;
1679 }
1680
1681 rtx
1682 expand (function_expander &e) const OVERRIDE
1683 {
1684 e.prepare_prefetch_operands ();
1685 e.prepare_gather_address_operands (1);
1686
1687 /* Insert a zero operand to identify the mode of the memory being
1688 accessed. This goes between the gather operands and prefetch
1689 operands created above. */
1690 e.args.quick_insert (5, CONST0_RTX (m_mode));
1691
1692 machine_mode reg_mode = GET_MODE (e.args[2]);
1693 insn_code icode = code_for_aarch64_sve_gather_prefetch (m_mode, reg_mode);
1694 return e.use_exact_insn (icode);
1695 }
1696
1697 /* The mode that we'd use to hold one vector of prefetched data. */
1698 machine_mode m_mode;
1699 };
1700
1701 /* Implements svptest_any, svptest_first and svptest_last. */
1702 class svptest_impl : public function_base
1703 {
1704 public:
1705 CONSTEXPR svptest_impl (rtx_code compare) : m_compare (compare) {}
1706
1707 rtx
1708 expand (function_expander &e) const OVERRIDE
1709 {
1710 /* See whether GP is an exact ptrue for some predicate mode;
1711 i.e. whether converting the GP to that mode will not drop
1712 set bits and will leave all significant bits set. */
1713 machine_mode wide_mode;
1714 int hint;
1715 if (aarch64_ptrue_all_mode (e.args[0]).exists (&wide_mode))
1716 hint = SVE_KNOWN_PTRUE;
1717 else
1718 {
1719 hint = SVE_MAYBE_NOT_PTRUE;
1720 wide_mode = VNx16BImode;
1721 }
1722
1723 /* Generate the PTEST itself. */
1724 rtx pg = force_reg (VNx16BImode, e.args[0]);
1725 rtx wide_pg = gen_lowpart (wide_mode, pg);
1726 rtx hint_rtx = gen_int_mode (hint, DImode);
1727 rtx op = force_reg (wide_mode, gen_lowpart (wide_mode, e.args[1]));
1728 emit_insn (gen_aarch64_ptestvnx16bi (pg, wide_pg, hint_rtx, op));
1729
1730 /* Get the location of the boolean result. We can provide SImode and
1731 DImode values directly; rely on generic code to convert others. */
1732 rtx target = e.possible_target;
1733 if (!target
1734 || !REG_P (target)
1735 || (GET_MODE (target) != SImode && GET_MODE (target) != DImode))
1736 target = gen_reg_rtx (DImode);
1737
1738 /* Generate a CSET to convert the CC result of the PTEST to a boolean. */
1739 rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
1740 rtx compare = gen_rtx_fmt_ee (m_compare, GET_MODE (target),
1741 cc_reg, const0_rtx);
1742 emit_insn (gen_rtx_SET (target, compare));
1743 return target;
1744 }
1745
1746 /* The comparison code associated with ptest condition. */
1747 rtx_code m_compare;
1748 };
1749
1750 class svptrue_impl : public function_base
1751 {
1752 public:
1753 gimple *
1754 fold (gimple_folder &f) const OVERRIDE
1755 {
1756 return f.fold_to_ptrue ();
1757 }
1758
1759 rtx
1760 expand (function_expander &e) const OVERRIDE
1761 {
1762 return aarch64_ptrue_all (e.type_suffix (0).element_bytes);
1763 }
1764 };
1765
1766 class svptrue_pat_impl : public function_base
1767 {
1768 public:
1769 gimple *
1770 fold (gimple_folder &f) const OVERRIDE
1771 {
1772 tree pattern_arg = gimple_call_arg (f.call, 0);
1773 aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg);
1774
1775 if (pattern == AARCH64_SV_ALL)
1776 /* svptrue_pat_bN (SV_ALL) == svptrue_bN (). */
1777 return f.fold_to_ptrue ();
1778
1779 /* See whether we can count the number of elements in the pattern
1780 at compile time. If so, construct a predicate with that number
1781 of 1s followed by all 0s. */
1782 int nelts_per_vq = f.elements_per_vq (0);
1783 HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, nelts_per_vq);
1784 if (value >= 0)
1785 return f.fold_to_vl_pred (value);
1786
1787 return NULL;
1788 }
1789
1790 rtx
1791 expand (function_expander &e) const OVERRIDE
1792 {
1793 /* In rtl, the predicate is represented as the constant:
1794
1795 (const:V16BI (unspec:V16BI [(const_int PATTERN)
1796 (const_vector:VnnBI [zeros])]
1797 UNSPEC_PTRUE))
1798
1799 where nn determines the element size. */
1800 rtvec vec = gen_rtvec (2, e.args[0], CONST0_RTX (e.vector_mode (0)));
1801 return gen_rtx_CONST (VNx16BImode,
1802 gen_rtx_UNSPEC (VNx16BImode, vec, UNSPEC_PTRUE));
1803 }
1804 };
1805
1806 /* Implements svqdec[bhwd]{,_pat} and svqinc[bhwd]{,_pat}. */
1807 class svqdec_svqinc_bhwd_impl : public function_base
1808 {
1809 public:
1810 CONSTEXPR svqdec_svqinc_bhwd_impl (rtx_code code_for_sint,
1811 rtx_code code_for_uint,
1812 scalar_int_mode elem_mode)
1813 : m_code_for_sint (code_for_sint),
1814 m_code_for_uint (code_for_uint),
1815 m_elem_mode (elem_mode)
1816 {}
1817
1818 rtx
1819 expand (function_expander &e) const OVERRIDE
1820 {
1821 /* Treat non-_pat functions in the same way as _pat functions with
1822 an SV_ALL argument. */
1823 if (e.args.length () == 2)
1824 e.args.quick_insert (1, gen_int_mode (AARCH64_SV_ALL, DImode));
1825
1826 /* Insert the number of elements per 128-bit block as a fake argument,
1827 between the pattern and the multiplier. Arguments 1, 2 and 3 then
1828 correspond exactly with the 3 UNSPEC_SVE_CNT_PAT operands; see
1829 aarch64_sve_cnt_pat for details. */
1830 unsigned int elements_per_vq = 128 / GET_MODE_BITSIZE (m_elem_mode);
1831 e.args.quick_insert (2, gen_int_mode (elements_per_vq, DImode));
1832
1833 rtx_code code = (e.type_suffix (0).unsigned_p
1834 ? m_code_for_uint
1835 : m_code_for_sint);
1836
1837 /* Choose between operating on integer scalars or integer vectors. */
1838 machine_mode mode = e.vector_mode (0);
1839 if (e.mode_suffix_id == MODE_n)
1840 mode = GET_MODE_INNER (mode);
1841 return e.use_exact_insn (code_for_aarch64_sve_pat (code, mode));
1842 }
1843
1844 /* The saturating addition or subtraction codes to use for signed and
1845 unsigned values respectively. */
1846 rtx_code m_code_for_sint;
1847 rtx_code m_code_for_uint;
1848
1849 /* The integer mode associated with the [bhwd] suffix. */
1850 scalar_int_mode m_elem_mode;
1851 };
1852
1853 /* Implements svqdec[bhwd]{,_pat}. */
1854 class svqdec_bhwd_impl : public svqdec_svqinc_bhwd_impl
1855 {
1856 public:
1857 CONSTEXPR svqdec_bhwd_impl (scalar_int_mode elem_mode)
1858 : svqdec_svqinc_bhwd_impl (SS_MINUS, US_MINUS, elem_mode) {}
1859 };
1860
1861 /* Implements svqinc[bhwd]{,_pat}. */
1862 class svqinc_bhwd_impl : public svqdec_svqinc_bhwd_impl
1863 {
1864 public:
1865 CONSTEXPR svqinc_bhwd_impl (scalar_int_mode elem_mode)
1866 : svqdec_svqinc_bhwd_impl (SS_PLUS, US_PLUS, elem_mode) {}
1867 };
1868
1869 /* Implements svqdecp and svqincp. */
1870 class svqdecp_svqincp_impl : public function_base
1871 {
1872 public:
1873 CONSTEXPR svqdecp_svqincp_impl (rtx_code code_for_sint,
1874 rtx_code code_for_uint)
1875 : m_code_for_sint (code_for_sint),
1876 m_code_for_uint (code_for_uint)
1877 {}
1878
1879 rtx
1880 expand (function_expander &e) const OVERRIDE
1881 {
1882 rtx_code code = (e.type_suffix (0).unsigned_p
1883 ? m_code_for_uint
1884 : m_code_for_sint);
1885 insn_code icode;
1886 if (e.mode_suffix_id == MODE_n)
1887 {
1888 /* Increment or decrement a scalar (whose mode is given by the first
1889 type suffix) by the number of active elements in a predicate
1890 (whose mode is given by the second type suffix). */
1891 machine_mode mode = GET_MODE_INNER (e.vector_mode (0));
1892 icode = code_for_aarch64_sve_cntp (code, mode, e.vector_mode (1));
1893 }
1894 else
1895 /* Increment a vector by the number of active elements in a predicate,
1896 with the vector mode determining the predicate mode. */
1897 icode = code_for_aarch64_sve_cntp (code, e.vector_mode (0));
1898 return e.use_exact_insn (icode);
1899 }
1900
1901 /* The saturating addition or subtraction codes to use for signed and
1902 unsigned values respectively. */
1903 rtx_code m_code_for_sint;
1904 rtx_code m_code_for_uint;
1905 };
1906
1907 class svrdffr_impl : public function_base
1908 {
1909 public:
1910 unsigned int
1911 call_properties (const function_instance &) const OVERRIDE
1912 {
1913 return CP_READ_FFR;
1914 }
1915
1916 rtx
1917 expand (function_expander &e) const OVERRIDE
1918 {
1919 /* See the block comment in aarch64-sve.md for details about the
1920 FFR handling. */
1921 emit_insn (gen_aarch64_copy_ffr_to_ffrt ());
1922 rtx result = e.use_exact_insn (e.pred == PRED_z
1923 ? CODE_FOR_aarch64_rdffr_z
1924 : CODE_FOR_aarch64_rdffr);
1925 emit_insn (gen_aarch64_update_ffrt ());
1926 return result;
1927 }
1928 };
1929
1930 class svreinterpret_impl : public quiet<function_base>
1931 {
1932 public:
1933 gimple *
1934 fold (gimple_folder &f) const OVERRIDE
1935 {
1936 /* Punt to rtl if the effect of the reinterpret on registers does not
1937 conform to GCC's endianness model. */
1938 if (!targetm.can_change_mode_class (f.vector_mode (0),
1939 f.vector_mode (1), FP_REGS))
1940 return NULL;
1941
1942 /* Otherwise svreinterpret corresponds directly to a VIEW_CONVERT_EXPR
1943 reinterpretation. */
1944 tree rhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (f.lhs),
1945 gimple_call_arg (f.call, 0));
1946 return gimple_build_assign (f.lhs, VIEW_CONVERT_EXPR, rhs);
1947 }
1948
1949 rtx
1950 expand (function_expander &e) const OVERRIDE
1951 {
1952 machine_mode mode = e.vector_mode (0);
1953 return e.use_exact_insn (code_for_aarch64_sve_reinterpret (mode));
1954 }
1955 };
1956
1957 class svrev_impl : public permute
1958 {
1959 public:
1960 gimple *
1961 fold (gimple_folder &f) const OVERRIDE
1962 {
1963 /* Punt for now on _b16 and wider; we'd need more complex evpc logic
1964 to rerecognize the result. */
1965 if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8)
1966 return NULL;
1967
1968 /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }. */
1969 poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
1970 vec_perm_builder builder (nelts, 1, 3);
1971 for (int i = 0; i < 3; ++i)
1972 builder.quick_push (nelts - i - 1);
1973 return fold_permute (f, builder);
1974 }
1975
1976 rtx
1977 expand (function_expander &e) const OVERRIDE
1978 {
1979 return e.use_exact_insn (code_for_aarch64_sve_rev (e.vector_mode (0)));
1980 }
1981 };
1982
1983 class svsel_impl : public quiet<function_base>
1984 {
1985 public:
1986 gimple *
1987 fold (gimple_folder &f) const OVERRIDE
1988 {
1989 /* svsel corresponds exactly to VEC_COND_EXPR. */
1990 gimple_seq stmts = NULL;
1991 tree pred = f.convert_pred (stmts, f.vector_type (0), 0);
1992 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
1993 return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred,
1994 gimple_call_arg (f.call, 1),
1995 gimple_call_arg (f.call, 2));
1996 }
1997
1998 rtx
1999 expand (function_expander &e) const OVERRIDE
2000 {
2001 /* svsel (cond, truev, falsev) is vcond_mask (truev, falsev, cond). */
2002 e.rotate_inputs_left (0, 3);
2003 insn_code icode = convert_optab_handler (vcond_mask_optab,
2004 e.vector_mode (0),
2005 e.gp_mode (0));
2006 return e.use_exact_insn (icode);
2007 }
2008 };
2009
2010 /* Implements svset2, svset3 and svset4. */
2011 class svset_impl : public quiet<multi_vector_function>
2012 {
2013 public:
2014 CONSTEXPR svset_impl (unsigned int vectors_per_tuple)
2015 : quiet<multi_vector_function> (vectors_per_tuple) {}
2016
2017 gimple *
2018 fold (gimple_folder &f) const OVERRIDE
2019 {
2020 tree rhs_tuple = gimple_call_arg (f.call, 0);
2021 tree index = gimple_call_arg (f.call, 1);
2022 tree rhs_vector = gimple_call_arg (f.call, 2);
2023
2024 /* Replace the call with two statements: a copy of the full tuple
2025 to the call result, followed by an update of the individual vector.
2026
2027 The fold routines expect the replacement statement to have the
2028 same lhs as the original call, so return the copy statement
2029 rather than the field update. */
2030 gassign *copy = gimple_build_assign (unshare_expr (f.lhs), rhs_tuple);
2031
2032 /* Get a reference to the individual vector. */
2033 tree field = tuple_type_field (TREE_TYPE (f.lhs));
2034 tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
2035 f.lhs, field, NULL_TREE);
2036 tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector),
2037 lhs_array, index, NULL_TREE, NULL_TREE);
2038 gassign *update = gimple_build_assign (lhs_vector, rhs_vector);
2039 gsi_insert_after (f.gsi, update, GSI_SAME_STMT);
2040
2041 return copy;
2042 }
2043
2044 rtx
2045 expand (function_expander &e) const OVERRIDE
2046 {
2047 rtx rhs_tuple = e.args[0];
2048 unsigned int index = INTVAL (e.args[1]);
2049 rtx rhs_vector = e.args[2];
2050
2051 /* First copy the full tuple to the target register. */
2052 rtx lhs_tuple = e.get_nonoverlapping_reg_target ();
2053 emit_move_insn (lhs_tuple, rhs_tuple);
2054
2055 /* ...then update the individual vector. */
2056 rtx lhs_vector = simplify_gen_subreg (GET_MODE (rhs_vector),
2057 lhs_tuple, GET_MODE (lhs_tuple),
2058 index * BYTES_PER_SVE_VECTOR);
2059 emit_move_insn (lhs_vector, rhs_vector);
2060 return lhs_vector;
2061 }
2062 };
2063
2064 class svsetffr_impl : public function_base
2065 {
2066 public:
2067 unsigned int
2068 call_properties (const function_instance &) const OVERRIDE
2069 {
2070 return CP_WRITE_FFR;
2071 }
2072
2073 rtx
2074 expand (function_expander &e) const OVERRIDE
2075 {
2076 e.args.quick_push (CONSTM1_RTX (VNx16BImode));
2077 return e.use_exact_insn (CODE_FOR_aarch64_wrffr);
2078 }
2079 };
2080
2081 class svst1_impl : public full_width_access
2082 {
2083 public:
2084 unsigned int
2085 call_properties (const function_instance &) const OVERRIDE
2086 {
2087 return CP_WRITE_MEMORY;
2088 }
2089
2090 gimple *
2091 fold (gimple_folder &f) const OVERRIDE
2092 {
2093 tree vectype = f.vector_type (0);
2094
2095 /* Get the predicate and base pointer. */
2096 gimple_seq stmts = NULL;
2097 tree pred = f.convert_pred (stmts, vectype, 0);
2098 tree base = f.fold_contiguous_base (stmts, vectype);
2099 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
2100
2101 tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
2102 tree rhs = gimple_call_arg (f.call, gimple_call_num_args (f.call) - 1);
2103 return gimple_build_call_internal (IFN_MASK_STORE, 4,
2104 base, cookie, pred, rhs);
2105 }
2106
2107 rtx
2108 expand (function_expander &e) const OVERRIDE
2109 {
2110 insn_code icode = convert_optab_handler (maskstore_optab,
2111 e.vector_mode (0), e.gp_mode (0));
2112 return e.use_contiguous_store_insn (icode);
2113 }
2114 };
2115
2116 class svst1_scatter_impl : public full_width_access
2117 {
2118 public:
2119 unsigned int
2120 call_properties (const function_instance &) const OVERRIDE
2121 {
2122 return CP_WRITE_MEMORY;
2123 }
2124
2125 rtx
2126 expand (function_expander &e) const OVERRIDE
2127 {
2128 e.prepare_gather_address_operands (1);
2129 /* Put the predicate last, as required by mask_scatter_store_optab. */
2130 e.rotate_inputs_left (0, 6);
2131 machine_mode mem_mode = e.memory_vector_mode ();
2132 machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
2133 insn_code icode = convert_optab_handler (mask_scatter_store_optab,
2134 mem_mode, int_mode);
2135 return e.use_exact_insn (icode);
2136 }
2137 };
2138
2139 /* Implements truncating forms of svst1_scatter. */
2140 class svst1_scatter_truncate_impl : public truncating_store
2141 {
2142 public:
2143 CONSTEXPR svst1_scatter_truncate_impl (scalar_int_mode to_mode)
2144 : truncating_store (to_mode) {}
2145
2146 rtx
2147 expand (function_expander &e) const OVERRIDE
2148 {
2149 e.prepare_gather_address_operands (1);
2150 /* Put the predicate last, since the truncating scatters use the same
2151 operand order as mask_scatter_store_optab. */
2152 e.rotate_inputs_left (0, 6);
2153 insn_code icode = code_for_aarch64_scatter_store_trunc
2154 (e.memory_vector_mode (), e.vector_mode (0));
2155 return e.use_exact_insn (icode);
2156 }
2157 };
2158
2159 /* Implements truncating contiguous forms of svst1. */
2160 class svst1_truncate_impl : public truncating_store
2161 {
2162 public:
2163 CONSTEXPR svst1_truncate_impl (scalar_int_mode to_mode)
2164 : truncating_store (to_mode) {}
2165
2166 rtx
2167 expand (function_expander &e) const OVERRIDE
2168 {
2169 insn_code icode = code_for_aarch64_store_trunc (e.memory_vector_mode (),
2170 e.vector_mode (0));
2171 return e.use_contiguous_store_insn (icode);
2172 }
2173 };
2174
2175 /* Implements svst2, svst3 and svst4. */
2176 class svst234_impl : public full_width_access
2177 {
2178 public:
2179 CONSTEXPR svst234_impl (unsigned int vectors_per_tuple)
2180 : full_width_access (vectors_per_tuple) {}
2181
2182 unsigned int
2183 call_properties (const function_instance &) const OVERRIDE
2184 {
2185 return CP_WRITE_MEMORY;
2186 }
2187
2188 gimple *
2189 fold (gimple_folder &f) const OVERRIDE
2190 {
2191 tree vectype = f.vector_type (0);
2192
2193 /* Get the predicate and base pointer. */
2194 gimple_seq stmts = NULL;
2195 tree pred = f.convert_pred (stmts, vectype, 0);
2196 tree base = f.fold_contiguous_base (stmts, vectype);
2197 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
2198
2199 /* View the stored data as an array of vectors. */
2200 unsigned int num_args = gimple_call_num_args (f.call);
2201 tree rhs_tuple = gimple_call_arg (f.call, num_args - 1);
2202 tree field = tuple_type_field (TREE_TYPE (rhs_tuple));
2203 tree rhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field), rhs_tuple);
2204
2205 tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
2206 return gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
2207 base, cookie, pred, rhs_array);
2208 }
2209
2210 rtx
2211 expand (function_expander &e) const OVERRIDE
2212 {
2213 machine_mode tuple_mode = GET_MODE (e.args.last ());
2214 insn_code icode = convert_optab_handler (vec_mask_store_lanes_optab,
2215 tuple_mode, e.vector_mode (0));
2216 return e.use_contiguous_store_insn (icode);
2217 }
2218 };
2219
2220 class svstnt1_impl : public full_width_access
2221 {
2222 public:
2223 unsigned int
2224 call_properties (const function_instance &) const OVERRIDE
2225 {
2226 return CP_WRITE_MEMORY;
2227 }
2228
2229 rtx
2230 expand (function_expander &e) const OVERRIDE
2231 {
2232 insn_code icode = code_for_aarch64_stnt1 (e.vector_mode (0));
2233 return e.use_contiguous_store_insn (icode);
2234 }
2235 };
2236
2237 class svsub_impl : public rtx_code_function
2238 {
2239 public:
2240 CONSTEXPR svsub_impl ()
2241 : rtx_code_function (MINUS, MINUS, UNSPEC_COND_FSUB) {}
2242
2243 rtx
2244 expand (function_expander &e) const OVERRIDE
2245 {
2246 /* Canonicalize subtractions of constants to additions. */
2247 machine_mode mode = e.vector_mode (0);
2248 if (e.try_negating_argument (2, mode))
2249 return e.map_to_rtx_codes (PLUS, PLUS, UNSPEC_COND_FADD);
2250
2251 return rtx_code_function::expand (e);
2252 }
2253 };
2254
2255 class svtbl_impl : public permute
2256 {
2257 public:
2258 rtx
2259 expand (function_expander &e) const OVERRIDE
2260 {
2261 return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0)));
2262 }
2263 };
2264
2265 /* Implements svtrn1 and svtrn2. */
2266 class svtrn_impl : public binary_permute
2267 {
2268 public:
2269 CONSTEXPR svtrn_impl (int base)
2270 : binary_permute (base ? UNSPEC_TRN2 : UNSPEC_TRN1), m_base (base) {}
2271
2272 gimple *
2273 fold (gimple_folder &f) const OVERRIDE
2274 {
2275 /* svtrn1: { 0, nelts, 2, nelts + 2, 4, nelts + 4, ... }
2276 svtrn2: as for svtrn1, but with 1 added to each index. */
2277 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
2278 vec_perm_builder builder (nelts, 2, 3);
2279 for (unsigned int i = 0; i < 3; ++i)
2280 {
2281 builder.quick_push (m_base + i * 2);
2282 builder.quick_push (m_base + i * 2 + nelts);
2283 }
2284 return fold_permute (f, builder);
2285 }
2286
2287 /* 0 for svtrn1, 1 for svtrn2. */
2288 unsigned int m_base;
2289 };
2290
2291 /* Base class for svundef{,2,3,4}. */
2292 class svundef_impl : public quiet<multi_vector_function>
2293 {
2294 public:
2295 CONSTEXPR svundef_impl (unsigned int vectors_per_tuple)
2296 : quiet<multi_vector_function> (vectors_per_tuple) {}
2297
2298 gimple *
2299 fold (gimple_folder &f) const OVERRIDE
2300 {
2301 /* Don't fold svundef at the gimple level. There's no exact
2302 correspondence for SSA_NAMEs, and we explicitly don't want
2303 to generate a specific value (like an all-zeros vector). */
2304 if (vectors_per_tuple () == 1)
2305 return NULL;
2306 return gimple_build_assign (f.lhs, build_clobber (TREE_TYPE (f.lhs)));
2307 }
2308
2309 rtx
2310 expand (function_expander &e) const OVERRIDE
2311 {
2312 rtx target = e.get_reg_target ();
2313 emit_clobber (copy_rtx (target));
2314 return target;
2315 }
2316 };
2317
2318 /* Implements svunpklo and svunpkhi. */
2319 class svunpk_impl : public quiet<function_base>
2320 {
2321 public:
2322 CONSTEXPR svunpk_impl (bool high_p) : m_high_p (high_p) {}
2323
2324 gimple *
2325 fold (gimple_folder &f) const OVERRIDE
2326 {
2327 /* Don't fold the predicate ops, since every bit of the svbool_t
2328 result is significant. */
2329 if (f.type_suffix_ids[0] == TYPE_SUFFIX_b)
2330 return NULL;
2331
2332 /* The first half in memory is VEC_UNPACK_LO_EXPR for little-endian
2333 and VEC_UNPACK_HI_EXPR for big-endian. */
2334 bool high_p = BYTES_BIG_ENDIAN ? !m_high_p : m_high_p;
2335 tree_code code = high_p ? VEC_UNPACK_HI_EXPR : VEC_UNPACK_LO_EXPR;
2336 return gimple_build_assign (f.lhs, code, gimple_call_arg (f.call, 0));
2337 }
2338
2339 rtx
2340 expand (function_expander &e) const OVERRIDE
2341 {
2342 machine_mode mode = GET_MODE (e.args[0]);
2343 unsigned int unpacku = m_high_p ? UNSPEC_UNPACKUHI : UNSPEC_UNPACKULO;
2344 unsigned int unpacks = m_high_p ? UNSPEC_UNPACKSHI : UNSPEC_UNPACKSLO;
2345 insn_code icode;
2346 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
2347 icode = code_for_aarch64_sve_punpk (unpacku, mode);
2348 else
2349 {
2350 int unspec = e.type_suffix (0).unsigned_p ? unpacku : unpacks;
2351 icode = code_for_aarch64_sve_unpk (unspec, unspec, mode);
2352 }
2353 return e.use_exact_insn (icode);
2354 }
2355
2356 /* True for svunpkhi, false for svunpklo. */
2357 bool m_high_p;
2358 };
2359
2360 /* Also implements svsudot. */
2361 class svusdot_impl : public function_base
2362 {
2363 public:
2364 CONSTEXPR svusdot_impl (bool su) : m_su (su) {}
2365
2366 rtx
2367 expand (function_expander &e) const OVERRIDE
2368 {
2369 /* The implementation of the ACLE function svsudot (for the non-lane
2370 version) is through the USDOT instruction but with the second and third
2371 inputs swapped. */
2372 if (m_su)
2373 e.rotate_inputs_left (1, 2);
2374 /* The ACLE function has the same order requirements as for svdot.
2375 While there's no requirement for the RTL pattern to have the same sort
2376 of order as that for <sur>dot_prod, it's easier to read.
2377 Hence we do the same rotation on arguments as svdot_impl does. */
2378 e.rotate_inputs_left (0, 3);
2379 machine_mode mode = e.vector_mode (0);
2380 insn_code icode = code_for_aarch64_dot_prod (UNSPEC_USDOT, mode);
2381 return e.use_exact_insn (icode);
2382 }
2383
2384 private:
2385 bool m_su;
2386 };
2387
2388 /* Implements svuzp1 and svuzp2. */
2389 class svuzp_impl : public binary_permute
2390 {
2391 public:
2392 CONSTEXPR svuzp_impl (unsigned int base)
2393 : binary_permute (base ? UNSPEC_UZP2 : UNSPEC_UZP1), m_base (base) {}
2394
2395 gimple *
2396 fold (gimple_folder &f) const OVERRIDE
2397 {
2398 /* svuzp1: { 0, 2, 4, 6, ... }
2399 svuzp2: { 1, 3, 5, 7, ... }. */
2400 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
2401 vec_perm_builder builder (nelts, 1, 3);
2402 for (unsigned int i = 0; i < 3; ++i)
2403 builder.quick_push (m_base + i * 2);
2404 return fold_permute (f, builder);
2405 }
2406
2407 /* 0 for svuzp1, 1 for svuzp2. */
2408 unsigned int m_base;
2409 };
2410
2411 /* A function_base for svwhilele and svwhilelt functions. */
2412 class svwhilelx_impl : public while_comparison
2413 {
2414 public:
2415 CONSTEXPR svwhilelx_impl (int unspec_for_sint, int unspec_for_uint, bool eq_p)
2416 : while_comparison (unspec_for_sint, unspec_for_uint), m_eq_p (eq_p)
2417 {}
2418
2419 /* Try to fold a call by treating its arguments as constants of type T. */
2420 template<typename T>
2421 gimple *
2422 fold_type (gimple_folder &f) const
2423 {
2424 /* Only handle cases in which both operands are constant. */
2425 T arg0, arg1;
2426 if (!poly_int_tree_p (gimple_call_arg (f.call, 0), &arg0)
2427 || !poly_int_tree_p (gimple_call_arg (f.call, 1), &arg1))
2428 return NULL;
2429
2430 /* Check whether the result is known to be all-false. */
2431 if (m_eq_p ? known_gt (arg0, arg1) : known_ge (arg0, arg1))
2432 return f.fold_to_pfalse ();
2433
2434 /* Punt if we can't tell at compile time whether the result
2435 is all-false. */
2436 if (m_eq_p ? maybe_gt (arg0, arg1) : maybe_ge (arg0, arg1))
2437 return NULL;
2438
2439 /* At this point we know the result has at least one set element. */
2440 poly_uint64 diff = arg1 - arg0;
2441 poly_uint64 nelts = GET_MODE_NUNITS (f.vector_mode (0));
2442
2443 /* Canonicalize the svwhilele form to the svwhilelt form. Subtract
2444 from NELTS rather than adding to DIFF, to prevent overflow. */
2445 if (m_eq_p)
2446 nelts -= 1;
2447
2448 /* Check whether the result is known to be all-true. */
2449 if (known_ge (diff, nelts))
2450 return f.fold_to_ptrue ();
2451
2452 /* Punt if DIFF might not be the actual number of set elements
2453 in the result. Conditional equality is fine. */
2454 if (maybe_gt (diff, nelts))
2455 return NULL;
2456
2457 /* At this point we know that the predicate will have DIFF set elements
2458 for svwhilelt and DIFF + 1 set elements for svwhilele (which stops
2459 after rather than before ARG1 is reached). See if we can create
2460 the predicate at compile time. */
2461 unsigned HOST_WIDE_INT vl;
2462 if (diff.is_constant (&vl))
2463 /* Overflow is no longer possible after the checks above. */
2464 return f.fold_to_vl_pred (m_eq_p ? vl + 1 : vl);
2465
2466 return NULL;
2467 }
2468
2469 gimple *
2470 fold (gimple_folder &f) const OVERRIDE
2471 {
2472 if (f.type_suffix (1).unsigned_p)
2473 return fold_type<poly_uint64> (f);
2474 else
2475 return fold_type<poly_int64> (f);
2476 }
2477
2478 /* True svwhilele, false for svwhilelt. */
2479 bool m_eq_p;
2480 };
2481
2482 class svwrffr_impl : public function_base
2483 {
2484 public:
2485 unsigned int
2486 call_properties (const function_instance &) const OVERRIDE
2487 {
2488 return CP_WRITE_FFR;
2489 }
2490
2491 rtx
2492 expand (function_expander &e) const OVERRIDE
2493 {
2494 return e.use_exact_insn (CODE_FOR_aarch64_wrffr);
2495 }
2496 };
2497
2498 /* Implements svzip1 and svzip2. */
2499 class svzip_impl : public binary_permute
2500 {
2501 public:
2502 CONSTEXPR svzip_impl (unsigned int base)
2503 : binary_permute (base ? UNSPEC_ZIP2 : UNSPEC_ZIP1), m_base (base) {}
2504
2505 gimple *
2506 fold (gimple_folder &f) const OVERRIDE
2507 {
2508 /* svzip1: { 0, nelts, 1, nelts + 1, 2, nelts + 2, ... }
2509 svzip2: as for svzip1, but with nelts / 2 added to each index. */
2510 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
2511 poly_uint64 base = m_base * exact_div (nelts, 2);
2512 vec_perm_builder builder (nelts, 2, 3);
2513 for (unsigned int i = 0; i < 3; ++i)
2514 {
2515 builder.quick_push (base + i);
2516 builder.quick_push (base + i + nelts);
2517 }
2518 return fold_permute (f, builder);
2519 }
2520
2521 /* 0 for svzip1, 1 for svzip2. */
2522 unsigned int m_base;
2523 };
2524
2525 } /* end anonymous namespace */
2526
2527 namespace aarch64_sve {
2528
2529 FUNCTION (svabd, svabd_impl,)
2530 FUNCTION (svabs, quiet<rtx_code_function>, (ABS, ABS, UNSPEC_COND_FABS))
2531 FUNCTION (svacge, svac_impl, (UNSPEC_COND_FCMGE))
2532 FUNCTION (svacgt, svac_impl, (UNSPEC_COND_FCMGT))
2533 FUNCTION (svacle, svac_impl, (UNSPEC_COND_FCMLE))
2534 FUNCTION (svaclt, svac_impl, (UNSPEC_COND_FCMLT))
2535 FUNCTION (svadd, rtx_code_function, (PLUS, PLUS, UNSPEC_COND_FADD))
2536 FUNCTION (svadda, svadda_impl,)
2537 FUNCTION (svaddv, reduction, (UNSPEC_SADDV, UNSPEC_UADDV, UNSPEC_FADDV))
2538 FUNCTION (svadrb, svadr_bhwd_impl, (0))
2539 FUNCTION (svadrd, svadr_bhwd_impl, (3))
2540 FUNCTION (svadrh, svadr_bhwd_impl, (1))
2541 FUNCTION (svadrw, svadr_bhwd_impl, (2))
2542 FUNCTION (svand, rtx_code_function, (AND, AND))
2543 FUNCTION (svandv, reduction, (UNSPEC_ANDV))
2544 FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT))
2545 FUNCTION (svasr_wide, shift_wide, (ASHIFTRT, UNSPEC_ASHIFTRT_WIDE))
2546 FUNCTION (svasrd, unspec_based_function, (UNSPEC_ASRD, -1, -1))
2547 FUNCTION (svbfdot, fixed_insn_function, (CODE_FOR_aarch64_sve_bfdotvnx4sf))
2548 FUNCTION (svbfdot_lane, fixed_insn_function,
2549 (CODE_FOR_aarch64_sve_bfdot_lanevnx4sf))
2550 FUNCTION (svbfmlalb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlalbvnx4sf))
2551 FUNCTION (svbfmlalb_lane, fixed_insn_function,
2552 (CODE_FOR_aarch64_sve_bfmlalb_lanevnx4sf))
2553 FUNCTION (svbfmlalt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlaltvnx4sf))
2554 FUNCTION (svbfmlalt_lane, fixed_insn_function,
2555 (CODE_FOR_aarch64_sve_bfmlalt_lanevnx4sf))
2556 FUNCTION (svbfmmla, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmmlavnx4sf))
2557 FUNCTION (svbic, svbic_impl,)
2558 FUNCTION (svbrka, svbrk_unary_impl, (UNSPEC_BRKA))
2559 FUNCTION (svbrkb, svbrk_unary_impl, (UNSPEC_BRKB))
2560 FUNCTION (svbrkn, svbrk_binary_impl, (UNSPEC_BRKN))
2561 FUNCTION (svbrkpa, svbrk_binary_impl, (UNSPEC_BRKPA))
2562 FUNCTION (svbrkpb, svbrk_binary_impl, (UNSPEC_BRKPB))
2563 FUNCTION (svcadd, svcadd_impl,)
2564 FUNCTION (svclasta, svclast_impl, (UNSPEC_CLASTA))
2565 FUNCTION (svclastb, svclast_impl, (UNSPEC_CLASTB))
2566 FUNCTION (svcls, unary_count, (CLRSB))
2567 FUNCTION (svclz, unary_count, (CLZ))
2568 FUNCTION (svcmla, svcmla_impl,)
2569 FUNCTION (svcmla_lane, svcmla_lane_impl,)
2570 FUNCTION (svcmpeq, svcmp_impl, (EQ_EXPR, UNSPEC_COND_FCMEQ))
2571 FUNCTION (svcmpeq_wide, svcmp_wide_impl, (EQ_EXPR, UNSPEC_COND_CMPEQ_WIDE,
2572 UNSPEC_COND_CMPEQ_WIDE))
2573 FUNCTION (svcmpge, svcmp_impl, (GE_EXPR, UNSPEC_COND_FCMGE))
2574 FUNCTION (svcmpge_wide, svcmp_wide_impl, (GE_EXPR, UNSPEC_COND_CMPGE_WIDE,
2575 UNSPEC_COND_CMPHS_WIDE))
2576 FUNCTION (svcmpgt, svcmp_impl, (GT_EXPR, UNSPEC_COND_FCMGT))
2577 FUNCTION (svcmpgt_wide, svcmp_wide_impl, (GT_EXPR, UNSPEC_COND_CMPGT_WIDE,
2578 UNSPEC_COND_CMPHI_WIDE))
2579 FUNCTION (svcmple, svcmp_impl, (LE_EXPR, UNSPEC_COND_FCMLE))
2580 FUNCTION (svcmple_wide, svcmp_wide_impl, (LE_EXPR, UNSPEC_COND_CMPLE_WIDE,
2581 UNSPEC_COND_CMPLS_WIDE))
2582 FUNCTION (svcmplt, svcmp_impl, (LT_EXPR, UNSPEC_COND_FCMLT))
2583 FUNCTION (svcmplt_wide, svcmp_wide_impl, (LT_EXPR, UNSPEC_COND_CMPLT_WIDE,
2584 UNSPEC_COND_CMPLO_WIDE))
2585 FUNCTION (svcmpne, svcmp_impl, (NE_EXPR, UNSPEC_COND_FCMNE))
2586 FUNCTION (svcmpne_wide, svcmp_wide_impl, (NE_EXPR, UNSPEC_COND_CMPNE_WIDE,
2587 UNSPEC_COND_CMPNE_WIDE))
2588 FUNCTION (svcmpuo, svcmpuo_impl,)
2589 FUNCTION (svcnot, svcnot_impl,)
2590 FUNCTION (svcnt, unary_count, (POPCOUNT))
2591 FUNCTION (svcntb, svcnt_bhwd_impl, (VNx16QImode))
2592 FUNCTION (svcntb_pat, svcnt_bhwd_pat_impl, (VNx16QImode))
2593 FUNCTION (svcntd, svcnt_bhwd_impl, (VNx2DImode))
2594 FUNCTION (svcntd_pat, svcnt_bhwd_pat_impl, (VNx2DImode))
2595 FUNCTION (svcnth, svcnt_bhwd_impl, (VNx8HImode))
2596 FUNCTION (svcnth_pat, svcnt_bhwd_pat_impl, (VNx8HImode))
2597 FUNCTION (svcntp, svcntp_impl,)
2598 FUNCTION (svcntw, svcnt_bhwd_impl, (VNx4SImode))
2599 FUNCTION (svcntw_pat, svcnt_bhwd_pat_impl, (VNx4SImode))
2600 FUNCTION (svcompact, QUIET_CODE_FOR_MODE0 (aarch64_sve_compact),)
2601 FUNCTION (svcreate2, svcreate_impl, (2))
2602 FUNCTION (svcreate3, svcreate_impl, (3))
2603 FUNCTION (svcreate4, svcreate_impl, (4))
2604 FUNCTION (svcvt, svcvt_impl,)
2605 FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),)
2606 FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV))
2607 FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
2608 FUNCTION (svdot, svdot_impl,)
2609 FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, -1))
2610 FUNCTION (svdup, svdup_impl,)
2611 FUNCTION (svdup_lane, svdup_lane_impl,)
2612 FUNCTION (svdupq, svdupq_impl,)
2613 FUNCTION (svdupq_lane, svdupq_lane_impl,)
2614 FUNCTION (sveor, rtx_code_function, (XOR, XOR, -1))
2615 FUNCTION (sveorv, reduction, (UNSPEC_XORV))
2616 FUNCTION (svexpa, unspec_based_function, (-1, -1, UNSPEC_FEXPA))
2617 FUNCTION (svext, QUIET_CODE_FOR_MODE0 (aarch64_sve_ext),)
2618 FUNCTION (svextb, svext_bhw_impl, (QImode))
2619 FUNCTION (svexth, svext_bhw_impl, (HImode))
2620 FUNCTION (svextw, svext_bhw_impl, (SImode))
2621 FUNCTION (svget2, svget_impl, (2))
2622 FUNCTION (svget3, svget_impl, (3))
2623 FUNCTION (svget4, svget_impl, (4))
2624 FUNCTION (svindex, svindex_impl,)
2625 FUNCTION (svinsr, svinsr_impl,)
2626 FUNCTION (svlasta, svlast_impl, (UNSPEC_LASTA))
2627 FUNCTION (svlastb, svlast_impl, (UNSPEC_LASTB))
2628 FUNCTION (svld1, svld1_impl,)
2629 FUNCTION (svld1_gather, svld1_gather_impl,)
2630 FUNCTION (svld1ro, svld1ro_impl,)
2631 FUNCTION (svld1rq, svld1rq_impl,)
2632 FUNCTION (svld1sb, svld1_extend_impl, (TYPE_SUFFIX_s8))
2633 FUNCTION (svld1sb_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s8))
2634 FUNCTION (svld1sh, svld1_extend_impl, (TYPE_SUFFIX_s16))
2635 FUNCTION (svld1sh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s16))
2636 FUNCTION (svld1sw, svld1_extend_impl, (TYPE_SUFFIX_s32))
2637 FUNCTION (svld1sw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s32))
2638 FUNCTION (svld1ub, svld1_extend_impl, (TYPE_SUFFIX_u8))
2639 FUNCTION (svld1ub_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u8))
2640 FUNCTION (svld1uh, svld1_extend_impl, (TYPE_SUFFIX_u16))
2641 FUNCTION (svld1uh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u16))
2642 FUNCTION (svld1uw, svld1_extend_impl, (TYPE_SUFFIX_u32))
2643 FUNCTION (svld1uw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u32))
2644 FUNCTION (svld2, svld234_impl, (2))
2645 FUNCTION (svld3, svld234_impl, (3))
2646 FUNCTION (svld4, svld234_impl, (4))
2647 FUNCTION (svldff1, svldxf1_impl, (UNSPEC_LDFF1))
2648 FUNCTION (svldff1_gather, svldff1_gather_impl,)
2649 FUNCTION (svldff1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDFF1))
2650 FUNCTION (svldff1sb_gather, svldff1_gather_extend, (TYPE_SUFFIX_s8))
2651 FUNCTION (svldff1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDFF1))
2652 FUNCTION (svldff1sh_gather, svldff1_gather_extend, (TYPE_SUFFIX_s16))
2653 FUNCTION (svldff1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDFF1))
2654 FUNCTION (svldff1sw_gather, svldff1_gather_extend, (TYPE_SUFFIX_s32))
2655 FUNCTION (svldff1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDFF1))
2656 FUNCTION (svldff1ub_gather, svldff1_gather_extend, (TYPE_SUFFIX_u8))
2657 FUNCTION (svldff1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDFF1))
2658 FUNCTION (svldff1uh_gather, svldff1_gather_extend, (TYPE_SUFFIX_u16))
2659 FUNCTION (svldff1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDFF1))
2660 FUNCTION (svldff1uw_gather, svldff1_gather_extend, (TYPE_SUFFIX_u32))
2661 FUNCTION (svldnf1, svldxf1_impl, (UNSPEC_LDNF1))
2662 FUNCTION (svldnf1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDNF1))
2663 FUNCTION (svldnf1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDNF1))
2664 FUNCTION (svldnf1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDNF1))
2665 FUNCTION (svldnf1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDNF1))
2666 FUNCTION (svldnf1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDNF1))
2667 FUNCTION (svldnf1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDNF1))
2668 FUNCTION (svldnt1, svldnt1_impl,)
2669 FUNCTION (svlen, svlen_impl,)
2670 FUNCTION (svlsl, rtx_code_function, (ASHIFT, ASHIFT))
2671 FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE))
2672 FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT))
2673 FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE))
2674 FUNCTION (svmad, svmad_impl,)
2675 FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX))
2676 FUNCTION (svmaxnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMAXNM))
2677 FUNCTION (svmaxnmv, reduction, (UNSPEC_FMAXNMV))
2678 FUNCTION (svmaxv, reduction, (UNSPEC_SMAXV, UNSPEC_UMAXV, UNSPEC_FMAXV))
2679 FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN))
2680 FUNCTION (svminnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMINNM))
2681 FUNCTION (svminnmv, reduction, (UNSPEC_FMINNMV))
2682 FUNCTION (svminv, reduction, (UNSPEC_SMINV, UNSPEC_UMINV, UNSPEC_FMINV))
2683 FUNCTION (svmla, svmla_impl,)
2684 FUNCTION (svmla_lane, svmla_lane_impl,)
2685 FUNCTION (svmls, svmls_impl,)
2686 FUNCTION (svmls_lane, svmls_lane_impl,)
2687 FUNCTION (svmmla, svmmla_impl,)
2688 FUNCTION (svmov, svmov_impl,)
2689 FUNCTION (svmsb, svmsb_impl,)
2690 FUNCTION (svmul, rtx_code_function, (MULT, MULT, UNSPEC_COND_FMUL))
2691 FUNCTION (svmul_lane, CODE_FOR_MODE0 (aarch64_mul_lane),)
2692 FUNCTION (svmulh, unspec_based_function, (UNSPEC_SMUL_HIGHPART,
2693 UNSPEC_UMUL_HIGHPART, -1))
2694 FUNCTION (svmulx, unspec_based_function, (-1, -1, UNSPEC_COND_FMULX))
2695 FUNCTION (svnand, svnand_impl,)
2696 FUNCTION (svneg, quiet<rtx_code_function>, (NEG, NEG, UNSPEC_COND_FNEG))
2697 FUNCTION (svnmad, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLA))
2698 FUNCTION (svnmla, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLA))
2699 FUNCTION (svnmls, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLS))
2700 FUNCTION (svnmsb, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLS))
2701 FUNCTION (svnor, svnor_impl,)
2702 FUNCTION (svnot, svnot_impl,)
2703 FUNCTION (svorn, svorn_impl,)
2704 FUNCTION (svorr, rtx_code_function, (IOR, IOR))
2705 FUNCTION (svorv, reduction, (UNSPEC_IORV))
2706 FUNCTION (svpfalse, svpfalse_impl,)
2707 FUNCTION (svpfirst, svpfirst_svpnext_impl, (UNSPEC_PFIRST))
2708 FUNCTION (svpnext, svpfirst_svpnext_impl, (UNSPEC_PNEXT))
2709 FUNCTION (svprfb, svprf_bhwd_impl, (VNx16QImode))
2710 FUNCTION (svprfb_gather, svprf_bhwd_gather_impl, (VNx16QImode))
2711 FUNCTION (svprfd, svprf_bhwd_impl, (VNx2DImode))
2712 FUNCTION (svprfd_gather, svprf_bhwd_gather_impl, (VNx2DImode))
2713 FUNCTION (svprfh, svprf_bhwd_impl, (VNx8HImode))
2714 FUNCTION (svprfh_gather, svprf_bhwd_gather_impl, (VNx8HImode))
2715 FUNCTION (svprfw, svprf_bhwd_impl, (VNx4SImode))
2716 FUNCTION (svprfw_gather, svprf_bhwd_gather_impl, (VNx4SImode))
2717 FUNCTION (svptest_any, svptest_impl, (NE))
2718 FUNCTION (svptest_first, svptest_impl, (LT))
2719 FUNCTION (svptest_last, svptest_impl, (LTU))
2720 FUNCTION (svptrue, svptrue_impl,)
2721 FUNCTION (svptrue_pat, svptrue_pat_impl,)
2722 FUNCTION (svqadd, rtx_code_function, (SS_PLUS, US_PLUS, -1))
2723 FUNCTION (svqdecb, svqdec_bhwd_impl, (QImode))
2724 FUNCTION (svqdecb_pat, svqdec_bhwd_impl, (QImode))
2725 FUNCTION (svqdecd, svqdec_bhwd_impl, (DImode))
2726 FUNCTION (svqdecd_pat, svqdec_bhwd_impl, (DImode))
2727 FUNCTION (svqdech, svqdec_bhwd_impl, (HImode))
2728 FUNCTION (svqdech_pat, svqdec_bhwd_impl, (HImode))
2729 FUNCTION (svqdecp, svqdecp_svqincp_impl, (SS_MINUS, US_MINUS))
2730 FUNCTION (svqdecw, svqdec_bhwd_impl, (SImode))
2731 FUNCTION (svqdecw_pat, svqdec_bhwd_impl, (SImode))
2732 FUNCTION (svqincb, svqinc_bhwd_impl, (QImode))
2733 FUNCTION (svqincb_pat, svqinc_bhwd_impl, (QImode))
2734 FUNCTION (svqincd, svqinc_bhwd_impl, (DImode))
2735 FUNCTION (svqincd_pat, svqinc_bhwd_impl, (DImode))
2736 FUNCTION (svqinch, svqinc_bhwd_impl, (HImode))
2737 FUNCTION (svqinch_pat, svqinc_bhwd_impl, (HImode))
2738 FUNCTION (svqincp, svqdecp_svqincp_impl, (SS_PLUS, US_PLUS))
2739 FUNCTION (svqincw, svqinc_bhwd_impl, (SImode))
2740 FUNCTION (svqincw_pat, svqinc_bhwd_impl, (SImode))
2741 FUNCTION (svqsub, rtx_code_function, (SS_MINUS, US_MINUS, -1))
2742 FUNCTION (svrbit, unspec_based_function, (UNSPEC_RBIT, UNSPEC_RBIT, -1))
2743 FUNCTION (svrdffr, svrdffr_impl,)
2744 FUNCTION (svrecpe, unspec_based_function, (-1, UNSPEC_URECPE, UNSPEC_FRECPE))
2745 FUNCTION (svrecps, unspec_based_function, (-1, -1, UNSPEC_FRECPS))
2746 FUNCTION (svrecpx, unspec_based_function, (-1, -1, UNSPEC_COND_FRECPX))
2747 FUNCTION (svreinterpret, svreinterpret_impl,)
2748 FUNCTION (svrev, svrev_impl,)
2749 FUNCTION (svrevb, unspec_based_function, (UNSPEC_REVB, UNSPEC_REVB, -1))
2750 FUNCTION (svrevh, unspec_based_function, (UNSPEC_REVH, UNSPEC_REVH, -1))
2751 FUNCTION (svrevw, unspec_based_function, (UNSPEC_REVW, UNSPEC_REVW, -1))
2752 FUNCTION (svrinta, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTA))
2753 FUNCTION (svrinti, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTI))
2754 FUNCTION (svrintm, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTM))
2755 FUNCTION (svrintn, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTN))
2756 FUNCTION (svrintp, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTP))
2757 FUNCTION (svrintx, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTX))
2758 FUNCTION (svrintz, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTZ))
2759 FUNCTION (svrsqrte, unspec_based_function, (-1, UNSPEC_RSQRTE, UNSPEC_RSQRTE))
2760 FUNCTION (svrsqrts, unspec_based_function, (-1, -1, UNSPEC_RSQRTS))
2761 FUNCTION (svscale, unspec_based_function, (-1, -1, UNSPEC_COND_FSCALE))
2762 FUNCTION (svsel, svsel_impl,)
2763 FUNCTION (svset2, svset_impl, (2))
2764 FUNCTION (svset3, svset_impl, (3))
2765 FUNCTION (svset4, svset_impl, (4))
2766 FUNCTION (svsetffr, svsetffr_impl,)
2767 FUNCTION (svsplice, QUIET_CODE_FOR_MODE0 (aarch64_sve_splice),)
2768 FUNCTION (svsqrt, rtx_code_function, (SQRT, SQRT, UNSPEC_COND_FSQRT))
2769 FUNCTION (svst1, svst1_impl,)
2770 FUNCTION (svst1_scatter, svst1_scatter_impl,)
2771 FUNCTION (svst1b, svst1_truncate_impl, (QImode))
2772 FUNCTION (svst1b_scatter, svst1_scatter_truncate_impl, (QImode))
2773 FUNCTION (svst1h, svst1_truncate_impl, (HImode))
2774 FUNCTION (svst1h_scatter, svst1_scatter_truncate_impl, (HImode))
2775 FUNCTION (svst1w, svst1_truncate_impl, (SImode))
2776 FUNCTION (svst1w_scatter, svst1_scatter_truncate_impl, (SImode))
2777 FUNCTION (svst2, svst234_impl, (2))
2778 FUNCTION (svst3, svst234_impl, (3))
2779 FUNCTION (svst4, svst234_impl, (4))
2780 FUNCTION (svstnt1, svstnt1_impl,)
2781 FUNCTION (svsub, svsub_impl,)
2782 FUNCTION (svsubr, rtx_code_function_rotated, (MINUS, MINUS, UNSPEC_COND_FSUB))
2783 FUNCTION (svsudot, svusdot_impl, (true))
2784 FUNCTION (svsudot_lane, svdotprod_lane_impl, (UNSPEC_SUDOT, -1, -1))
2785 FUNCTION (svtbl, svtbl_impl,)
2786 FUNCTION (svtmad, CODE_FOR_MODE0 (aarch64_sve_tmad),)
2787 FUNCTION (svtrn1, svtrn_impl, (0))
2788 FUNCTION (svtrn1q, unspec_based_function, (UNSPEC_TRN1Q, UNSPEC_TRN1Q,
2789 UNSPEC_TRN1Q))
2790 FUNCTION (svtrn2, svtrn_impl, (1))
2791 FUNCTION (svtrn2q, unspec_based_function, (UNSPEC_TRN2Q, UNSPEC_TRN2Q,
2792 UNSPEC_TRN2Q))
2793 FUNCTION (svtsmul, unspec_based_function, (-1, -1, UNSPEC_FTSMUL))
2794 FUNCTION (svtssel, unspec_based_function, (-1, -1, UNSPEC_FTSSEL))
2795 FUNCTION (svundef, svundef_impl, (1))
2796 FUNCTION (svundef2, svundef_impl, (2))
2797 FUNCTION (svundef3, svundef_impl, (3))
2798 FUNCTION (svundef4, svundef_impl, (4))
2799 FUNCTION (svunpkhi, svunpk_impl, (true))
2800 FUNCTION (svunpklo, svunpk_impl, (false))
2801 FUNCTION (svusdot, svusdot_impl, (false))
2802 FUNCTION (svusdot_lane, svdotprod_lane_impl, (UNSPEC_USDOT, -1, -1))
2803 FUNCTION (svusmmla, unspec_based_add_function, (UNSPEC_USMATMUL, -1, -1))
2804 FUNCTION (svuzp1, svuzp_impl, (0))
2805 FUNCTION (svuzp1q, unspec_based_function, (UNSPEC_UZP1Q, UNSPEC_UZP1Q,
2806 UNSPEC_UZP1Q))
2807 FUNCTION (svuzp2, svuzp_impl, (1))
2808 FUNCTION (svuzp2q, unspec_based_function, (UNSPEC_UZP2Q, UNSPEC_UZP2Q,
2809 UNSPEC_UZP2Q))
2810 FUNCTION (svwhilele, svwhilelx_impl, (UNSPEC_WHILELE, UNSPEC_WHILELS, true))
2811 FUNCTION (svwhilelt, svwhilelx_impl, (UNSPEC_WHILELT, UNSPEC_WHILELO, false))
2812 FUNCTION (svwrffr, svwrffr_impl,)
2813 FUNCTION (svzip1, svzip_impl, (0))
2814 FUNCTION (svzip1q, unspec_based_function, (UNSPEC_ZIP1Q, UNSPEC_ZIP1Q,
2815 UNSPEC_ZIP1Q))
2816 FUNCTION (svzip2, svzip_impl, (1))
2817 FUNCTION (svzip2q, unspec_based_function, (UNSPEC_ZIP2Q, UNSPEC_ZIP2Q,
2818 UNSPEC_ZIP2Q))
2819
2820 } /* end namespace aarch64_sve */