145
|
1 /* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
|
|
2 Copyright (C) 2018-2020 Free Software Foundation, Inc.
|
|
3
|
|
4 This file is part of GCC.
|
|
5
|
|
6 GCC is free software; you can redistribute it and/or modify it
|
|
7 under the terms of the GNU General Public License as published by
|
|
8 the Free Software Foundation; either version 3, or (at your option)
|
|
9 any later version.
|
|
10
|
|
11 GCC is distributed in the hope that it will be useful, but
|
|
12 WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14 General Public License for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with GCC; see the file COPYING3. If not see
|
|
18 <http://www.gnu.org/licenses/>. */
|
|
19
|
|
20 #include "config.h"
|
|
21 #include "system.h"
|
|
22 #include "coretypes.h"
|
|
23 #include "tm.h"
|
|
24 #include "tree.h"
|
|
25 #include "rtl.h"
|
|
26 #include "tm_p.h"
|
|
27 #include "memmodel.h"
|
|
28 #include "insn-codes.h"
|
|
29 #include "optabs.h"
|
|
30 #include "recog.h"
|
|
31 #include "expr.h"
|
|
32 #include "basic-block.h"
|
|
33 #include "function.h"
|
|
34 #include "fold-const.h"
|
|
35 #include "gimple.h"
|
|
36 #include "gimple-iterator.h"
|
|
37 #include "gimplify.h"
|
|
38 #include "explow.h"
|
|
39 #include "emit-rtl.h"
|
|
40 #include "tree-vector-builder.h"
|
|
41 #include "rtx-vector-builder.h"
|
|
42 #include "vec-perm-indices.h"
|
|
43 #include "aarch64-sve-builtins.h"
|
|
44 #include "aarch64-sve-builtins-shapes.h"
|
|
45 #include "aarch64-sve-builtins-base.h"
|
|
46 #include "aarch64-sve-builtins-functions.h"
|
|
47
|
|
48 using namespace aarch64_sve;
|
|
49
|
|
50 namespace {
|
|
51
|
|
52 /* Return the UNSPEC_CMLA* unspec for rotation amount ROT. */
|
|
53 static int
|
|
54 unspec_cmla (int rot)
|
|
55 {
|
|
56 switch (rot)
|
|
57 {
|
|
58 case 0: return UNSPEC_CMLA;
|
|
59 case 90: return UNSPEC_CMLA90;
|
|
60 case 180: return UNSPEC_CMLA180;
|
|
61 case 270: return UNSPEC_CMLA270;
|
|
62 default: gcc_unreachable ();
|
|
63 }
|
|
64 }
|
|
65
|
|
66 /* Return the UNSPEC_FCMLA* unspec for rotation amount ROT. */
|
|
67 static int
|
|
68 unspec_fcmla (int rot)
|
|
69 {
|
|
70 switch (rot)
|
|
71 {
|
|
72 case 0: return UNSPEC_FCMLA;
|
|
73 case 90: return UNSPEC_FCMLA90;
|
|
74 case 180: return UNSPEC_FCMLA180;
|
|
75 case 270: return UNSPEC_FCMLA270;
|
|
76 default: gcc_unreachable ();
|
|
77 }
|
|
78 }
|
|
79
|
|
80 /* Return the UNSPEC_COND_FCMLA* unspec for rotation amount ROT. */
|
|
81 static int
|
|
82 unspec_cond_fcmla (int rot)
|
|
83 {
|
|
84 switch (rot)
|
|
85 {
|
|
86 case 0: return UNSPEC_COND_FCMLA;
|
|
87 case 90: return UNSPEC_COND_FCMLA90;
|
|
88 case 180: return UNSPEC_COND_FCMLA180;
|
|
89 case 270: return UNSPEC_COND_FCMLA270;
|
|
90 default: gcc_unreachable ();
|
|
91 }
|
|
92 }
|
|
93
|
|
94 /* Expand a call to svmad, or svmla after reordering its operands.
|
|
95 Make _m forms merge with argument MERGE_ARGNO. */
|
|
96 static rtx
|
|
97 expand_mad (function_expander &e,
|
|
98 unsigned int merge_argno = DEFAULT_MERGE_ARGNO)
|
|
99 {
|
|
100 if (e.pred == PRED_x)
|
|
101 {
|
|
102 insn_code icode;
|
|
103 if (e.type_suffix (0).integer_p)
|
|
104 icode = code_for_aarch64_pred_fma (e.vector_mode (0));
|
|
105 else
|
|
106 icode = code_for_aarch64_pred (UNSPEC_COND_FMLA, e.vector_mode (0));
|
|
107 return e.use_pred_x_insn (icode);
|
|
108 }
|
|
109
|
|
110 insn_code icode = e.direct_optab_handler (cond_fma_optab);
|
|
111 return e.use_cond_insn (icode, merge_argno);
|
|
112 }
|
|
113
|
|
114 /* Expand a call to svmla_lane or svmls_lane using floating-point unspec
|
|
115 UNSPEC. */
|
|
116 static rtx
|
|
117 expand_mla_mls_lane (function_expander &e, int unspec)
|
|
118 {
|
|
119 /* Put the operands in the normal (fma ...) order, with the accumulator
|
|
120 last. This fits naturally since that's also the unprinted operand
|
|
121 in the asm output. */
|
|
122 e.rotate_inputs_left (0, 4);
|
|
123 insn_code icode = code_for_aarch64_lane (unspec, e.vector_mode (0));
|
|
124 return e.use_exact_insn (icode);
|
|
125 }
|
|
126
|
|
127 /* Expand a call to svmsb, or svmls after reordering its operands.
|
|
128 Make _m forms merge with argument MERGE_ARGNO. */
|
|
129 static rtx
|
|
130 expand_msb (function_expander &e,
|
|
131 unsigned int merge_argno = DEFAULT_MERGE_ARGNO)
|
|
132 {
|
|
133 if (e.pred == PRED_x)
|
|
134 {
|
|
135 insn_code icode;
|
|
136 if (e.type_suffix (0).integer_p)
|
|
137 icode = code_for_aarch64_pred_fnma (e.vector_mode (0));
|
|
138 else
|
|
139 icode = code_for_aarch64_pred (UNSPEC_COND_FMLS, e.vector_mode (0));
|
|
140 return e.use_pred_x_insn (icode);
|
|
141 }
|
|
142
|
|
143 insn_code icode = e.direct_optab_handler (cond_fnma_optab);
|
|
144 return e.use_cond_insn (icode, merge_argno);
|
|
145 }
|
|
146
|
|
147 class svabd_impl : public function_base
|
|
148 {
|
|
149 public:
|
|
150 rtx
|
|
151 expand (function_expander &e) const OVERRIDE
|
|
152 {
|
|
153 /* The integer operations are represented as the subtraction of the
|
|
154 minimum from the maximum, with the signedness of the instruction
|
|
155 keyed off the signedness of the maximum operation. */
|
|
156 rtx_code max_code = e.type_suffix (0).unsigned_p ? UMAX : SMAX;
|
|
157 insn_code icode;
|
|
158 if (e.pred == PRED_x)
|
|
159 {
|
|
160 if (e.type_suffix (0).integer_p)
|
|
161 icode = code_for_aarch64_pred_abd (max_code, e.vector_mode (0));
|
|
162 else
|
|
163 icode = code_for_aarch64_pred_abd (e.vector_mode (0));
|
|
164 return e.use_pred_x_insn (icode);
|
|
165 }
|
|
166
|
|
167 if (e.type_suffix (0).integer_p)
|
|
168 icode = code_for_aarch64_cond_abd (max_code, e.vector_mode (0));
|
|
169 else
|
|
170 icode = code_for_aarch64_cond_abd (e.vector_mode (0));
|
|
171 return e.use_cond_insn (icode);
|
|
172 }
|
|
173 };
|
|
174
|
|
175 /* Implements svacge, svacgt, svacle and svaclt. */
|
|
176 class svac_impl : public function_base
|
|
177 {
|
|
178 public:
|
|
179 CONSTEXPR svac_impl (int unspec) : m_unspec (unspec) {}
|
|
180
|
|
181 rtx
|
|
182 expand (function_expander &e) const OVERRIDE
|
|
183 {
|
|
184 e.add_ptrue_hint (0, e.gp_mode (0));
|
|
185 insn_code icode = code_for_aarch64_pred_fac (m_unspec, e.vector_mode (0));
|
|
186 return e.use_exact_insn (icode);
|
|
187 }
|
|
188
|
|
189 /* The unspec code for the underlying comparison. */
|
|
190 int m_unspec;
|
|
191 };
|
|
192
|
|
193 class svadda_impl : public function_base
|
|
194 {
|
|
195 public:
|
|
196 rtx
|
|
197 expand (function_expander &e) const OVERRIDE
|
|
198 {
|
|
199 /* Put the predicate last, as required by mask_fold_left_plus_optab. */
|
|
200 e.rotate_inputs_left (0, 3);
|
|
201 machine_mode mode = e.vector_mode (0);
|
|
202 insn_code icode = direct_optab_handler (mask_fold_left_plus_optab, mode);
|
|
203 return e.use_exact_insn (icode);
|
|
204 }
|
|
205 };
|
|
206
|
|
207 /* Implements svadr[bhwd]. */
|
|
208 class svadr_bhwd_impl : public function_base
|
|
209 {
|
|
210 public:
|
|
211 CONSTEXPR svadr_bhwd_impl (unsigned int shift) : m_shift (shift) {}
|
|
212
|
|
213 rtx
|
|
214 expand (function_expander &e) const OVERRIDE
|
|
215 {
|
|
216 machine_mode mode = GET_MODE (e.args[0]);
|
|
217 if (m_shift == 0)
|
|
218 return e.use_exact_insn (code_for_aarch64_adr (mode));
|
|
219
|
|
220 /* Turn the access size into an extra shift argument. */
|
|
221 rtx shift = gen_int_mode (m_shift, GET_MODE_INNER (mode));
|
|
222 e.args.quick_push (expand_vector_broadcast (mode, shift));
|
|
223 return e.use_exact_insn (code_for_aarch64_adr_shift (mode));
|
|
224 }
|
|
225
|
|
226 /* How many bits left to shift the vector displacement. */
|
|
227 unsigned int m_shift;
|
|
228 };
|
|
229
|
|
230 class svbic_impl : public function_base
|
|
231 {
|
|
232 public:
|
|
233 rtx
|
|
234 expand (function_expander &e) const OVERRIDE
|
|
235 {
|
|
236 /* Convert svbic of a constant into svand of its inverse. */
|
|
237 if (CONST_INT_P (e.args[2]))
|
|
238 {
|
|
239 machine_mode mode = GET_MODE_INNER (e.vector_mode (0));
|
|
240 e.args[2] = simplify_unary_operation (NOT, mode, e.args[2], mode);
|
|
241 return e.map_to_rtx_codes (AND, AND, -1);
|
|
242 }
|
|
243
|
|
244 if (e.type_suffix_ids[0] == TYPE_SUFFIX_b)
|
|
245 {
|
|
246 gcc_assert (e.pred == PRED_z);
|
|
247 return e.use_exact_insn (CODE_FOR_aarch64_pred_bicvnx16bi_z);
|
|
248 }
|
|
249
|
|
250 if (e.pred == PRED_x)
|
|
251 return e.use_unpred_insn (code_for_aarch64_bic (e.vector_mode (0)));
|
|
252
|
|
253 return e.use_cond_insn (code_for_cond_bic (e.vector_mode (0)));
|
|
254 }
|
|
255 };
|
|
256
|
|
257 /* Implements svbrkn, svbrkpa and svbrkpb. */
|
|
258 class svbrk_binary_impl : public function_base
|
|
259 {
|
|
260 public:
|
|
261 CONSTEXPR svbrk_binary_impl (int unspec) : m_unspec (unspec) {}
|
|
262
|
|
263 rtx
|
|
264 expand (function_expander &e) const OVERRIDE
|
|
265 {
|
|
266 return e.use_exact_insn (code_for_aarch64_brk (m_unspec));
|
|
267 }
|
|
268
|
|
269 /* The unspec code associated with the operation. */
|
|
270 int m_unspec;
|
|
271 };
|
|
272
|
|
273 /* Implements svbrka and svbrkb. */
|
|
274 class svbrk_unary_impl : public function_base
|
|
275 {
|
|
276 public:
|
|
277 CONSTEXPR svbrk_unary_impl (int unspec) : m_unspec (unspec) {}
|
|
278
|
|
279 rtx
|
|
280 expand (function_expander &e) const OVERRIDE
|
|
281 {
|
|
282 return e.use_cond_insn (code_for_aarch64_brk (m_unspec));
|
|
283 }
|
|
284
|
|
285 /* The unspec code associated with the operation. */
|
|
286 int m_unspec;
|
|
287 };
|
|
288
|
|
289 class svcadd_impl : public function_base
|
|
290 {
|
|
291 public:
|
|
292 rtx
|
|
293 expand (function_expander &e) const OVERRIDE
|
|
294 {
|
|
295 /* Convert the rotation amount into a specific unspec. */
|
|
296 int rot = INTVAL (e.args.pop ());
|
|
297 if (rot == 90)
|
|
298 return e.map_to_unspecs (UNSPEC_CADD90, UNSPEC_CADD90,
|
|
299 UNSPEC_COND_FCADD90);
|
|
300 if (rot == 270)
|
|
301 return e.map_to_unspecs (UNSPEC_CADD270, UNSPEC_CADD270,
|
|
302 UNSPEC_COND_FCADD270);
|
|
303 gcc_unreachable ();
|
|
304 }
|
|
305 };
|
|
306
|
|
307 /* Implements svclasta and svclastb. */
|
|
308 class svclast_impl : public quiet<function_base>
|
|
309 {
|
|
310 public:
|
|
311 CONSTEXPR svclast_impl (int unspec) : m_unspec (unspec) {}
|
|
312
|
|
313 rtx
|
|
314 expand (function_expander &e) const OVERRIDE
|
|
315 {
|
|
316 /* Match the fold_extract_optab order. */
|
|
317 std::swap (e.args[0], e.args[1]);
|
|
318 machine_mode mode = e.vector_mode (0);
|
|
319 insn_code icode;
|
|
320 if (e.mode_suffix_id == MODE_n)
|
|
321 icode = code_for_fold_extract (m_unspec, mode);
|
|
322 else
|
|
323 icode = code_for_aarch64_fold_extract_vector (m_unspec, mode);
|
|
324 return e.use_exact_insn (icode);
|
|
325 }
|
|
326
|
|
327 /* The unspec code associated with the operation. */
|
|
328 int m_unspec;
|
|
329 };
|
|
330
|
|
331 class svcmla_impl : public function_base
|
|
332 {
|
|
333 public:
|
|
334 rtx
|
|
335 expand (function_expander &e) const OVERRIDE
|
|
336 {
|
|
337 /* Convert the rotation amount into a specific unspec. */
|
|
338 int rot = INTVAL (e.args.pop ());
|
|
339 if (e.type_suffix (0).float_p)
|
|
340 {
|
|
341 /* Make the operand order the same as the one used by the fma optabs,
|
|
342 with the accumulator last. */
|
|
343 e.rotate_inputs_left (1, 4);
|
|
344 return e.map_to_unspecs (-1, -1, unspec_cond_fcmla (rot), 3);
|
|
345 }
|
|
346 else
|
|
347 {
|
|
348 int cmla = unspec_cmla (rot);
|
|
349 return e.map_to_unspecs (cmla, cmla, -1);
|
|
350 }
|
|
351 }
|
|
352 };
|
|
353
|
|
354 class svcmla_lane_impl : public function_base
|
|
355 {
|
|
356 public:
|
|
357 rtx
|
|
358 expand (function_expander &e) const OVERRIDE
|
|
359 {
|
|
360 /* Convert the rotation amount into a specific unspec. */
|
|
361 int rot = INTVAL (e.args.pop ());
|
|
362 machine_mode mode = e.vector_mode (0);
|
|
363 if (e.type_suffix (0).float_p)
|
|
364 {
|
|
365 /* Make the operand order the same as the one used by the fma optabs,
|
|
366 with the accumulator last. */
|
|
367 e.rotate_inputs_left (0, 4);
|
|
368 insn_code icode = code_for_aarch64_lane (unspec_fcmla (rot), mode);
|
|
369 return e.use_exact_insn (icode);
|
|
370 }
|
|
371 else
|
|
372 {
|
|
373 insn_code icode = code_for_aarch64_lane (unspec_cmla (rot), mode);
|
|
374 return e.use_exact_insn (icode);
|
|
375 }
|
|
376 }
|
|
377 };
|
|
378
|
|
379 /* Implements svcmp<cc> (except svcmpuo, which is handled separately). */
|
|
380 class svcmp_impl : public function_base
|
|
381 {
|
|
382 public:
|
|
383 CONSTEXPR svcmp_impl (tree_code code, int unspec_for_fp)
|
|
384 : m_code (code), m_unspec_for_fp (unspec_for_fp) {}
|
|
385
|
|
386 gimple *
|
|
387 fold (gimple_folder &f) const OVERRIDE
|
|
388 {
|
|
389 tree pg = gimple_call_arg (f.call, 0);
|
|
390 tree rhs1 = gimple_call_arg (f.call, 1);
|
|
391 tree rhs2 = gimple_call_arg (f.call, 2);
|
|
392
|
|
393 /* Convert a ptrue-predicated integer comparison into the corresponding
|
|
394 gimple-level operation. */
|
|
395 if (integer_all_onesp (pg)
|
|
396 && f.type_suffix (0).element_bytes == 1
|
|
397 && f.type_suffix (0).integer_p)
|
|
398 {
|
|
399 gimple_seq stmts = NULL;
|
|
400 rhs2 = f.force_vector (stmts, TREE_TYPE (rhs1), rhs2);
|
|
401 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
|
|
402 return gimple_build_assign (f.lhs, m_code, rhs1, rhs2);
|
|
403 }
|
|
404
|
|
405 return NULL;
|
|
406 }
|
|
407
|
|
408 rtx
|
|
409 expand (function_expander &e) const OVERRIDE
|
|
410 {
|
|
411 machine_mode mode = e.vector_mode (0);
|
|
412
|
|
413 /* Comparisons are UNSPEC_PRED_Z operations and so need a hint
|
|
414 operand. */
|
|
415 e.add_ptrue_hint (0, e.gp_mode (0));
|
|
416
|
|
417 if (e.type_suffix (0).integer_p)
|
|
418 {
|
|
419 bool unsigned_p = e.type_suffix (0).unsigned_p;
|
|
420 rtx_code code = get_rtx_code (m_code, unsigned_p);
|
|
421 return e.use_exact_insn (code_for_aarch64_pred_cmp (code, mode));
|
|
422 }
|
|
423
|
|
424 insn_code icode = code_for_aarch64_pred_fcm (m_unspec_for_fp, mode);
|
|
425 return e.use_exact_insn (icode);
|
|
426 }
|
|
427
|
|
428 /* The tree code associated with the comparison. */
|
|
429 tree_code m_code;
|
|
430
|
|
431 /* The unspec code to use for floating-point comparisons. */
|
|
432 int m_unspec_for_fp;
|
|
433 };
|
|
434
|
|
435 /* Implements svcmp<cc>_wide. */
|
|
436 class svcmp_wide_impl : public function_base
|
|
437 {
|
|
438 public:
|
|
439 CONSTEXPR svcmp_wide_impl (tree_code code, int unspec_for_sint,
|
|
440 int unspec_for_uint)
|
|
441 : m_code (code), m_unspec_for_sint (unspec_for_sint),
|
|
442 m_unspec_for_uint (unspec_for_uint) {}
|
|
443
|
|
444 rtx
|
|
445 expand (function_expander &e) const OVERRIDE
|
|
446 {
|
|
447 machine_mode mode = e.vector_mode (0);
|
|
448 bool unsigned_p = e.type_suffix (0).unsigned_p;
|
|
449 rtx_code code = get_rtx_code (m_code, unsigned_p);
|
|
450
|
|
451 /* Comparisons are UNSPEC_PRED_Z operations and so need a hint
|
|
452 operand. */
|
|
453 e.add_ptrue_hint (0, e.gp_mode (0));
|
|
454
|
|
455 /* If the argument is a constant that the unwidened comparisons
|
|
456 can handle directly, use them instead. */
|
|
457 insn_code icode = code_for_aarch64_pred_cmp (code, mode);
|
|
458 rtx op2 = unwrap_const_vec_duplicate (e.args[3]);
|
|
459 if (CONSTANT_P (op2)
|
|
460 && insn_data[icode].operand[4].predicate (op2, DImode))
|
|
461 {
|
|
462 e.args[3] = op2;
|
|
463 return e.use_exact_insn (icode);
|
|
464 }
|
|
465
|
|
466 int unspec = (unsigned_p ? m_unspec_for_uint : m_unspec_for_sint);
|
|
467 return e.use_exact_insn (code_for_aarch64_pred_cmp_wide (unspec, mode));
|
|
468 }
|
|
469
|
|
470 /* The tree code associated with the comparison. */
|
|
471 tree_code m_code;
|
|
472
|
|
473 /* The unspec codes for signed and unsigned wide comparisons
|
|
474 respectively. */
|
|
475 int m_unspec_for_sint;
|
|
476 int m_unspec_for_uint;
|
|
477 };
|
|
478
|
|
479 class svcmpuo_impl : public quiet<function_base>
|
|
480 {
|
|
481 public:
|
|
482 rtx
|
|
483 expand (function_expander &e) const OVERRIDE
|
|
484 {
|
|
485 e.add_ptrue_hint (0, e.gp_mode (0));
|
|
486 return e.use_exact_insn (code_for_aarch64_pred_fcmuo (e.vector_mode (0)));
|
|
487 }
|
|
488 };
|
|
489
|
|
490 class svcnot_impl : public function_base
|
|
491 {
|
|
492 public:
|
|
493 rtx
|
|
494 expand (function_expander &e) const OVERRIDE
|
|
495 {
|
|
496 machine_mode mode = e.vector_mode (0);
|
|
497 if (e.pred == PRED_x)
|
|
498 {
|
|
499 /* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs
|
|
500 a ptrue hint. */
|
|
501 e.add_ptrue_hint (0, e.gp_mode (0));
|
|
502 return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode));
|
|
503 }
|
|
504
|
|
505 return e.use_cond_insn (code_for_cond_cnot (mode), 0);
|
|
506 }
|
|
507 };
|
|
508
|
|
509 /* Implements svcnt[bhwd], which count the number of elements
|
|
510 in a particular vector mode. */
|
|
511 class svcnt_bhwd_impl : public function_base
|
|
512 {
|
|
513 public:
|
|
514 CONSTEXPR svcnt_bhwd_impl (machine_mode ref_mode) : m_ref_mode (ref_mode) {}
|
|
515
|
|
516 gimple *
|
|
517 fold (gimple_folder &f) const OVERRIDE
|
|
518 {
|
|
519 tree count = build_int_cstu (TREE_TYPE (f.lhs),
|
|
520 GET_MODE_NUNITS (m_ref_mode));
|
|
521 return gimple_build_assign (f.lhs, count);
|
|
522 }
|
|
523
|
|
524 rtx
|
|
525 expand (function_expander &) const OVERRIDE
|
|
526 {
|
|
527 return gen_int_mode (GET_MODE_NUNITS (m_ref_mode), DImode);
|
|
528 }
|
|
529
|
|
530 /* The mode of the vector associated with the [bhwd] suffix. */
|
|
531 machine_mode m_ref_mode;
|
|
532 };
|
|
533
|
|
534 /* Implements svcnt[bhwd]_pat. */
|
|
535 class svcnt_bhwd_pat_impl : public svcnt_bhwd_impl
|
|
536 {
|
|
537 public:
|
|
538 CONSTEXPR svcnt_bhwd_pat_impl (machine_mode ref_mode)
|
|
539 : svcnt_bhwd_impl (ref_mode) {}
|
|
540
|
|
541 gimple *
|
|
542 fold (gimple_folder &f) const OVERRIDE
|
|
543 {
|
|
544 tree pattern_arg = gimple_call_arg (f.call, 0);
|
|
545 aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg);
|
|
546
|
|
547 if (pattern == AARCH64_SV_ALL)
|
|
548 /* svcvnt[bwhd]_pat (SV_ALL) == svcnt[bwhd] (). */
|
|
549 return svcnt_bhwd_impl::fold (f);
|
|
550
|
|
551 /* See whether we can count the number of elements in the pattern
|
|
552 at compile time. */
|
|
553 unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode);
|
|
554 HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, elements_per_vq);
|
|
555 if (value >= 0)
|
|
556 {
|
|
557 tree count = build_int_cstu (TREE_TYPE (f.lhs), value);
|
|
558 return gimple_build_assign (f.lhs, count);
|
|
559 }
|
|
560
|
|
561 return NULL;
|
|
562 }
|
|
563
|
|
564 rtx
|
|
565 expand (function_expander &e) const OVERRIDE
|
|
566 {
|
|
567 unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode);
|
|
568 e.args.quick_push (gen_int_mode (elements_per_vq, DImode));
|
|
569 e.args.quick_push (const1_rtx);
|
|
570 return e.use_exact_insn (CODE_FOR_aarch64_sve_cnt_pat);
|
|
571 }
|
|
572 };
|
|
573
|
|
574 class svcntp_impl : public function_base
|
|
575 {
|
|
576 public:
|
|
577 rtx
|
|
578 expand (function_expander &e) const OVERRIDE
|
|
579 {
|
|
580 machine_mode mode = e.vector_mode (0);
|
|
581 e.add_ptrue_hint (0, mode);
|
|
582 return e.use_exact_insn (code_for_aarch64_pred_cntp (mode));
|
|
583 }
|
|
584 };
|
|
585
|
|
586 /* Implements svcreate2, svcreate3 and svcreate4. */
|
|
587 class svcreate_impl : public quiet<multi_vector_function>
|
|
588 {
|
|
589 public:
|
|
590 CONSTEXPR svcreate_impl (unsigned int vectors_per_tuple)
|
|
591 : quiet<multi_vector_function> (vectors_per_tuple) {}
|
|
592
|
|
593 gimple *
|
|
594 fold (gimple_folder &f) const OVERRIDE
|
|
595 {
|
|
596 unsigned int nargs = gimple_call_num_args (f.call);
|
|
597 tree lhs_type = TREE_TYPE (f.lhs);
|
|
598
|
|
599 /* Replace the call with a clobber of the result (to prevent it from
|
|
600 becoming upwards exposed) followed by stores into each individual
|
|
601 vector of tuple.
|
|
602
|
|
603 The fold routines expect the replacement statement to have the
|
|
604 same lhs as the original call, so return the clobber statement
|
|
605 rather than the final vector store. */
|
|
606 gassign *clobber = gimple_build_assign (f.lhs, build_clobber (lhs_type));
|
|
607
|
|
608 for (unsigned int i = nargs; i-- > 0; )
|
|
609 {
|
|
610 tree rhs_vector = gimple_call_arg (f.call, i);
|
|
611 tree field = tuple_type_field (TREE_TYPE (f.lhs));
|
|
612 tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
|
|
613 unshare_expr (f.lhs), field, NULL_TREE);
|
|
614 tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector),
|
|
615 lhs_array, size_int (i),
|
|
616 NULL_TREE, NULL_TREE);
|
|
617 gassign *assign = gimple_build_assign (lhs_vector, rhs_vector);
|
|
618 gsi_insert_after (f.gsi, assign, GSI_SAME_STMT);
|
|
619 }
|
|
620 return clobber;
|
|
621 }
|
|
622
|
|
623 rtx
|
|
624 expand (function_expander &e) const OVERRIDE
|
|
625 {
|
|
626 rtx lhs_tuple = e.get_nonoverlapping_reg_target ();
|
|
627
|
|
628 /* Record that LHS_TUPLE is dead before the first store. */
|
|
629 emit_clobber (lhs_tuple);
|
|
630 for (unsigned int i = 0; i < e.args.length (); ++i)
|
|
631 {
|
|
632 /* Use an lvalue subreg to refer to vector I in LHS_TUPLE. */
|
|
633 rtx lhs_vector = simplify_gen_subreg (GET_MODE (e.args[i]),
|
|
634 lhs_tuple, GET_MODE (lhs_tuple),
|
|
635 i * BYTES_PER_SVE_VECTOR);
|
|
636 emit_move_insn (lhs_vector, e.args[i]);
|
|
637 }
|
|
638 return lhs_tuple;
|
|
639 }
|
|
640 };
|
|
641
|
|
642 class svcvt_impl : public function_base
|
|
643 {
|
|
644 public:
|
|
645 rtx
|
|
646 expand (function_expander &e) const OVERRIDE
|
|
647 {
|
|
648 machine_mode mode0 = e.vector_mode (0);
|
|
649 machine_mode mode1 = e.vector_mode (1);
|
|
650 insn_code icode;
|
|
651 /* All this complication comes from the need to select four things
|
|
652 simultaneously:
|
|
653
|
|
654 (1) the kind of conversion (int<-float, float<-int, float<-float)
|
|
655 (2) signed vs. unsigned integers, where relevant
|
|
656 (3) the predication mode, which must be the wider of the predication
|
|
657 modes for MODE0 and MODE1
|
|
658 (4) the predication type (m, x or z)
|
|
659
|
|
660 The only supported int<->float conversions for which the integer is
|
|
661 narrower than the float are SI<->DF. It's therefore more convenient
|
|
662 to handle (3) by defining two patterns for int<->float conversions:
|
|
663 one in which the integer is at least as wide as the float and so
|
|
664 determines the predication mode, and another single SI<->DF pattern
|
|
665 in which the float's mode determines the predication mode (which is
|
|
666 always VNx2BI in that case).
|
|
667
|
|
668 The names of the patterns follow the optab convention of giving
|
|
669 the source mode before the destination mode. */
|
|
670 if (e.type_suffix (1).integer_p)
|
|
671 {
|
|
672 int unspec = (e.type_suffix (1).unsigned_p
|
|
673 ? UNSPEC_COND_UCVTF
|
|
674 : UNSPEC_COND_SCVTF);
|
|
675 if (e.type_suffix (0).element_bytes <= e.type_suffix (1).element_bytes)
|
|
676 icode = (e.pred == PRED_x
|
|
677 ? code_for_aarch64_sve_nonextend (unspec, mode1, mode0)
|
|
678 : code_for_cond_nonextend (unspec, mode1, mode0));
|
|
679 else
|
|
680 icode = (e.pred == PRED_x
|
|
681 ? code_for_aarch64_sve_extend (unspec, mode1, mode0)
|
|
682 : code_for_cond_extend (unspec, mode1, mode0));
|
|
683 }
|
|
684 else
|
|
685 {
|
|
686 int unspec = (!e.type_suffix (0).integer_p ? UNSPEC_COND_FCVT
|
|
687 : e.type_suffix (0).unsigned_p ? UNSPEC_COND_FCVTZU
|
|
688 : UNSPEC_COND_FCVTZS);
|
|
689 if (e.type_suffix (0).element_bytes >= e.type_suffix (1).element_bytes)
|
|
690 icode = (e.pred == PRED_x
|
|
691 ? code_for_aarch64_sve_nontrunc (unspec, mode1, mode0)
|
|
692 : code_for_cond_nontrunc (unspec, mode1, mode0));
|
|
693 else
|
|
694 icode = (e.pred == PRED_x
|
|
695 ? code_for_aarch64_sve_trunc (unspec, mode1, mode0)
|
|
696 : code_for_cond_trunc (unspec, mode1, mode0));
|
|
697 }
|
|
698
|
|
699 if (e.pred == PRED_x)
|
|
700 return e.use_pred_x_insn (icode);
|
|
701 return e.use_cond_insn (icode);
|
|
702 }
|
|
703 };
|
|
704
|
|
705 class svdot_impl : public function_base
|
|
706 {
|
|
707 public:
|
|
708 rtx
|
|
709 expand (function_expander &e) const OVERRIDE
|
|
710 {
|
|
711 /* In the optab, the multiplication operands come before the accumulator
|
|
712 operand. The optab is keyed off the multiplication mode. */
|
|
713 e.rotate_inputs_left (0, 3);
|
|
714 insn_code icode
|
|
715 = e.direct_optab_handler_for_sign (sdot_prod_optab, udot_prod_optab,
|
|
716 0, GET_MODE (e.args[0]));
|
|
717 return e.use_unpred_insn (icode);
|
|
718 }
|
|
719 };
|
|
720
|
|
721 class svdotprod_lane_impl : public unspec_based_function_base
|
|
722 {
|
|
723 public:
|
|
724 CONSTEXPR svdotprod_lane_impl (int unspec_for_sint,
|
|
725 int unspec_for_uint,
|
|
726 int unspec_for_float)
|
|
727 : unspec_based_function_base (unspec_for_sint,
|
|
728 unspec_for_uint,
|
|
729 unspec_for_float) {}
|
|
730
|
|
731 rtx
|
|
732 expand (function_expander &e) const OVERRIDE
|
|
733 {
|
|
734 /* Use the same ordering as the dot_prod_optab, with the
|
|
735 accumulator last. */
|
|
736 e.rotate_inputs_left (0, 4);
|
|
737 int unspec = unspec_for (e);
|
|
738 machine_mode mode = e.vector_mode (0);
|
|
739 return e.use_exact_insn (code_for_aarch64_dot_prod_lane (unspec, mode));
|
|
740 }
|
|
741 };
|
|
742
|
|
743 class svdup_impl : public quiet<function_base>
|
|
744 {
|
|
745 public:
|
|
746 gimple *
|
|
747 fold (gimple_folder &f) const OVERRIDE
|
|
748 {
|
|
749 tree vec_type = TREE_TYPE (f.lhs);
|
|
750 tree rhs = gimple_call_arg (f.call, f.pred == PRED_none ? 0 : 1);
|
|
751
|
|
752 if (f.pred == PRED_none || f.pred == PRED_x)
|
|
753 {
|
|
754 if (CONSTANT_CLASS_P (rhs))
|
|
755 {
|
|
756 if (f.type_suffix (0).bool_p)
|
|
757 return (tree_to_shwi (rhs)
|
|
758 ? f.fold_to_ptrue ()
|
|
759 : f.fold_to_pfalse ());
|
|
760
|
|
761 tree rhs_vector = build_vector_from_val (vec_type, rhs);
|
|
762 return gimple_build_assign (f.lhs, rhs_vector);
|
|
763 }
|
|
764
|
|
765 /* Avoid folding _b to a VEC_DUPLICATE_EXPR, since to do that we
|
|
766 would need to introduce an extra and unwanted conversion to
|
|
767 the truth vector element type. */
|
|
768 if (!f.type_suffix (0).bool_p)
|
|
769 return gimple_build_assign (f.lhs, VEC_DUPLICATE_EXPR, rhs);
|
|
770 }
|
|
771
|
|
772 /* svdup_z (pg, x) == VEC_COND_EXPR <pg, VEC_DUPLICATE_EXPR <x>, 0>. */
|
|
773 if (f.pred == PRED_z)
|
|
774 {
|
|
775 gimple_seq stmts = NULL;
|
|
776 tree pred = f.convert_pred (stmts, vec_type, 0);
|
|
777 rhs = f.force_vector (stmts, vec_type, rhs);
|
|
778 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
|
|
779 return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, rhs,
|
|
780 build_zero_cst (vec_type));
|
|
781 }
|
|
782
|
|
783 return NULL;
|
|
784 }
|
|
785
|
|
786 rtx
|
|
787 expand (function_expander &e) const OVERRIDE
|
|
788 {
|
|
789 if (e.pred == PRED_none || e.pred == PRED_x)
|
|
790 /* There's no benefit to using predicated instructions for _x here. */
|
|
791 return e.use_unpred_insn (e.direct_optab_handler (vec_duplicate_optab));
|
|
792
|
|
793 /* Model predicated svdups as a SEL in which the "true" value is
|
|
794 the duplicate of the function argument and the "false" value
|
|
795 is the value of inactive lanes. */
|
|
796 insn_code icode;
|
|
797 machine_mode mode = e.vector_mode (0);
|
|
798 if (valid_for_const_vector_p (GET_MODE_INNER (mode), e.args.last ()))
|
|
799 /* Duplicate the constant to fill a vector. The pattern optimizes
|
|
800 various cases involving constant operands, falling back to SEL
|
|
801 if necessary. */
|
|
802 icode = code_for_vcond_mask (mode, mode);
|
|
803 else
|
|
804 /* Use the pattern for selecting between a duplicated scalar
|
|
805 variable and a vector fallback. */
|
|
806 icode = code_for_aarch64_sel_dup (mode);
|
|
807 return e.use_vcond_mask_insn (icode);
|
|
808 }
|
|
809 };
|
|
810
|
|
811 class svdup_lane_impl : public quiet<function_base>
|
|
812 {
|
|
813 public:
|
|
814 rtx
|
|
815 expand (function_expander &e) const OVERRIDE
|
|
816 {
|
|
817 /* The native DUP lane has an index range of 64 bytes. */
|
|
818 machine_mode mode = e.vector_mode (0);
|
|
819 if (CONST_INT_P (e.args[1])
|
|
820 && IN_RANGE (INTVAL (e.args[1]) * GET_MODE_UNIT_SIZE (mode), 0, 63))
|
|
821 return e.use_exact_insn (code_for_aarch64_sve_dup_lane (mode));
|
|
822
|
|
823 /* Treat svdup_lane as if it were svtbl_n. */
|
|
824 return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0)));
|
|
825 }
|
|
826 };
|
|
827
|
|
828 class svdupq_impl : public quiet<function_base>
|
|
829 {
|
|
830 public:
|
|
831 gimple *
|
|
832 fold (gimple_folder &f) const OVERRIDE
|
|
833 {
|
|
834 tree vec_type = TREE_TYPE (f.lhs);
|
|
835 unsigned int nargs = gimple_call_num_args (f.call);
|
|
836 /* For predicates, pad out each argument so that we have one element
|
|
837 per bit. */
|
|
838 unsigned int factor = (f.type_suffix (0).bool_p
|
|
839 ? f.type_suffix (0).element_bytes : 1);
|
|
840 tree_vector_builder builder (vec_type, nargs * factor, 1);
|
|
841 for (unsigned int i = 0; i < nargs; ++i)
|
|
842 {
|
|
843 tree elt = gimple_call_arg (f.call, i);
|
|
844 if (!CONSTANT_CLASS_P (elt))
|
|
845 return NULL;
|
|
846 builder.quick_push (elt);
|
|
847 for (unsigned int j = 1; j < factor; ++j)
|
|
848 builder.quick_push (build_zero_cst (TREE_TYPE (vec_type)));
|
|
849 }
|
|
850 return gimple_build_assign (f.lhs, builder.build ());
|
|
851 }
|
|
852
|
|
853 rtx
|
|
854 expand (function_expander &e) const OVERRIDE
|
|
855 {
|
|
856 machine_mode mode = e.vector_mode (0);
|
|
857 unsigned int elements_per_vq = e.args.length ();
|
|
858 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
|
|
859 {
|
|
860 /* Construct a vector of integers so that we can compare them against
|
|
861 zero below. Zero vs. nonzero is the only distinction that
|
|
862 matters. */
|
|
863 mode = aarch64_sve_int_mode (mode);
|
|
864 for (unsigned int i = 0; i < elements_per_vq; ++i)
|
|
865 e.args[i] = simplify_gen_unary (ZERO_EXTEND, GET_MODE_INNER (mode),
|
|
866 e.args[i], QImode);
|
|
867 }
|
|
868
|
|
869 /* Get the 128-bit Advanced SIMD vector for this data size. */
|
|
870 scalar_mode element_mode = GET_MODE_INNER (mode);
|
|
871 machine_mode vq_mode = aarch64_vq_mode (element_mode).require ();
|
|
872 gcc_assert (known_eq (elements_per_vq, GET_MODE_NUNITS (vq_mode)));
|
|
873
|
|
874 /* Put the arguments into a 128-bit Advanced SIMD vector. We want
|
|
875 argument N to go into architectural lane N, whereas Advanced SIMD
|
|
876 vectors are loaded memory lsb to register lsb. We therefore need
|
|
877 to reverse the elements for big-endian targets. */
|
|
878 rtx vq_reg = gen_reg_rtx (vq_mode);
|
|
879 rtvec vec = rtvec_alloc (elements_per_vq);
|
|
880 for (unsigned int i = 0; i < elements_per_vq; ++i)
|
|
881 {
|
|
882 unsigned int argno = BYTES_BIG_ENDIAN ? elements_per_vq - i - 1 : i;
|
|
883 RTVEC_ELT (vec, i) = e.args[argno];
|
|
884 }
|
|
885 aarch64_expand_vector_init (vq_reg, gen_rtx_PARALLEL (vq_mode, vec));
|
|
886
|
|
887 /* If the result is a boolean, compare the data vector against zero. */
|
|
888 if (mode != e.vector_mode (0))
|
|
889 {
|
|
890 rtx data_dupq = aarch64_expand_sve_dupq (NULL, mode, vq_reg);
|
|
891 return aarch64_convert_sve_data_to_pred (e.possible_target,
|
|
892 e.vector_mode (0), data_dupq);
|
|
893 }
|
|
894
|
|
895 return aarch64_expand_sve_dupq (e.possible_target, mode, vq_reg);
|
|
896 }
|
|
897 };
|
|
898
|
|
899 class svdupq_lane_impl : public quiet<function_base>
|
|
900 {
|
|
901 public:
|
|
902 rtx
|
|
903 expand (function_expander &e) const OVERRIDE
|
|
904 {
|
|
905 machine_mode mode = e.vector_mode (0);
|
|
906 rtx index = e.args[1];
|
|
907 if (CONST_INT_P (index) && IN_RANGE (INTVAL (index), 0, 3))
|
|
908 {
|
|
909 /* Use the .Q form of DUP, which is the native instruction for
|
|
910 this function. */
|
|
911 insn_code icode = code_for_aarch64_sve_dupq_lane (mode);
|
|
912 unsigned int num_indices = e.elements_per_vq (0);
|
|
913 rtx indices = aarch64_gen_stepped_int_parallel
|
|
914 (num_indices, INTVAL (index) * num_indices, 1);
|
|
915
|
|
916 e.add_output_operand (icode);
|
|
917 e.add_input_operand (icode, e.args[0]);
|
|
918 e.add_fixed_operand (indices);
|
|
919 return e.generate_insn (icode);
|
|
920 }
|
|
921
|
|
922 /* Build a .D TBL index for the pairs of doublewords that we want to
|
|
923 duplicate. */
|
|
924 if (CONST_INT_P (index))
|
|
925 {
|
|
926 /* The index vector is a constant. */
|
|
927 rtx_vector_builder builder (VNx2DImode, 2, 1);
|
|
928 builder.quick_push (gen_int_mode (INTVAL (index) * 2, DImode));
|
|
929 builder.quick_push (gen_int_mode (INTVAL (index) * 2 + 1, DImode));
|
|
930 index = builder.build ();
|
|
931 }
|
|
932 else
|
|
933 {
|
|
934 /* Duplicate INDEX * 2 to fill a DImode vector. The ACLE spec
|
|
935 explicitly allows the top of the index to be dropped. */
|
|
936 index = force_reg (DImode, simplify_gen_binary (ASHIFT, DImode,
|
|
937 index, const1_rtx));
|
|
938 index = expand_vector_broadcast (VNx2DImode, index);
|
|
939
|
|
940 /* Get an alternating 0, 1 predicate. */
|
|
941 rtx_vector_builder builder (VNx2BImode, 2, 1);
|
|
942 builder.quick_push (const0_rtx);
|
|
943 builder.quick_push (constm1_rtx);
|
|
944 rtx pg = force_reg (VNx2BImode, builder.build ());
|
|
945
|
|
946 /* Add one to the odd elements of the index. */
|
|
947 rtx one = force_reg (VNx2DImode, CONST1_RTX (VNx2DImode));
|
|
948 rtx target = gen_reg_rtx (VNx2DImode);
|
|
949 emit_insn (gen_cond_addvnx2di (target, pg, index, one, index));
|
|
950 index = target;
|
|
951 }
|
|
952
|
|
953 e.args[0] = gen_lowpart (VNx2DImode, e.args[0]);
|
|
954 e.args[1] = index;
|
|
955 return e.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di);
|
|
956 }
|
|
957 };
|
|
958
|
|
959 /* Implements svextb, svexth and svextw. */
|
|
960 class svext_bhw_impl : public function_base
|
|
961 {
|
|
962 public:
|
|
963 CONSTEXPR svext_bhw_impl (scalar_int_mode from_mode)
|
|
964 : m_from_mode (from_mode) {}
|
|
965
|
|
966 rtx
|
|
967 expand (function_expander &e) const OVERRIDE
|
|
968 {
|
|
969 if (e.type_suffix (0).unsigned_p)
|
|
970 {
|
|
971 /* Convert to an AND. The widest we go is 0xffffffff, which fits
|
|
972 in a CONST_INT. */
|
|
973 e.args.quick_push (GEN_INT (GET_MODE_MASK (m_from_mode)));
|
|
974 if (e.pred == PRED_m)
|
|
975 /* We now have arguments "(inactive, pg, op, mask)". Convert this
|
|
976 to "(pg, op, mask, inactive)" so that the order matches svand_m
|
|
977 with an extra argument on the end. Take the inactive elements
|
|
978 from this extra argument. */
|
|
979 e.rotate_inputs_left (0, 4);
|
|
980 return e.map_to_rtx_codes (AND, AND, -1, 3);
|
|
981 }
|
|
982
|
|
983 machine_mode wide_mode = e.vector_mode (0);
|
|
984 poly_uint64 nunits = GET_MODE_NUNITS (wide_mode);
|
|
985 machine_mode narrow_mode
|
|
986 = aarch64_sve_data_mode (m_from_mode, nunits).require ();
|
|
987 if (e.pred == PRED_x)
|
|
988 {
|
|
989 insn_code icode = code_for_aarch64_pred_sxt (wide_mode, narrow_mode);
|
|
990 return e.use_pred_x_insn (icode);
|
|
991 }
|
|
992
|
|
993 insn_code icode = code_for_aarch64_cond_sxt (wide_mode, narrow_mode);
|
|
994 return e.use_cond_insn (icode);
|
|
995 }
|
|
996
|
|
997 /* The element mode that we're extending from. */
|
|
998 scalar_int_mode m_from_mode;
|
|
999 };
|
|
1000
|
|
1001 /* Implements svget2, svget3 and svget4. */
|
|
1002 class svget_impl : public quiet<multi_vector_function>
|
|
1003 {
|
|
1004 public:
|
|
1005 CONSTEXPR svget_impl (unsigned int vectors_per_tuple)
|
|
1006 : quiet<multi_vector_function> (vectors_per_tuple) {}
|
|
1007
|
|
1008 gimple *
|
|
1009 fold (gimple_folder &f) const OVERRIDE
|
|
1010 {
|
|
1011 /* Fold into a normal gimple component access. */
|
|
1012 tree rhs_tuple = gimple_call_arg (f.call, 0);
|
|
1013 tree index = gimple_call_arg (f.call, 1);
|
|
1014 tree field = tuple_type_field (TREE_TYPE (rhs_tuple));
|
|
1015 tree rhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
|
|
1016 rhs_tuple, field, NULL_TREE);
|
|
1017 tree rhs_vector = build4 (ARRAY_REF, TREE_TYPE (f.lhs),
|
|
1018 rhs_array, index, NULL_TREE, NULL_TREE);
|
|
1019 return gimple_build_assign (f.lhs, rhs_vector);
|
|
1020 }
|
|
1021
|
|
1022 rtx
|
|
1023 expand (function_expander &e) const OVERRIDE
|
|
1024 {
|
|
1025 /* Fold the access into a subreg rvalue. */
|
|
1026 return simplify_gen_subreg (e.vector_mode (0), e.args[0],
|
|
1027 GET_MODE (e.args[0]),
|
|
1028 INTVAL (e.args[1]) * BYTES_PER_SVE_VECTOR);
|
|
1029 }
|
|
1030 };
|
|
1031
|
|
1032 class svindex_impl : public function_base
|
|
1033 {
|
|
1034 public:
|
|
1035 rtx
|
|
1036 expand (function_expander &e) const OVERRIDE
|
|
1037 {
|
|
1038 return e.use_exact_insn (e.direct_optab_handler (vec_series_optab));
|
|
1039 }
|
|
1040 };
|
|
1041
|
|
1042 class svinsr_impl : public quiet<function_base>
|
|
1043 {
|
|
1044 public:
|
|
1045 gimple *
|
|
1046 fold (gimple_folder &f) const OVERRIDE
|
|
1047 {
|
|
1048 gcall *new_call = gimple_build_call_internal (IFN_VEC_SHL_INSERT, 2,
|
|
1049 gimple_call_arg (f.call, 0),
|
|
1050 gimple_call_arg (f.call, 1));
|
|
1051 gimple_call_set_lhs (new_call, f.lhs);
|
|
1052 return new_call;
|
|
1053 }
|
|
1054
|
|
1055 rtx
|
|
1056 expand (function_expander &e) const OVERRIDE
|
|
1057 {
|
|
1058 insn_code icode = direct_optab_handler (vec_shl_insert_optab,
|
|
1059 e.vector_mode (0));
|
|
1060 return e.use_exact_insn (icode);
|
|
1061 }
|
|
1062 };
|
|
1063
|
|
1064 /* Implements svlasta and svlastb. */
|
|
1065 class svlast_impl : public quiet<function_base>
|
|
1066 {
|
|
1067 public:
|
|
1068 CONSTEXPR svlast_impl (int unspec) : m_unspec (unspec) {}
|
|
1069
|
|
1070 rtx
|
|
1071 expand (function_expander &e) const OVERRIDE
|
|
1072 {
|
|
1073 return e.use_exact_insn (code_for_extract (m_unspec, e.vector_mode (0)));
|
|
1074 }
|
|
1075
|
|
1076 /* The unspec code associated with the operation. */
|
|
1077 int m_unspec;
|
|
1078 };
|
|
1079
|
|
1080 class svld1_impl : public full_width_access
|
|
1081 {
|
|
1082 public:
|
|
1083 unsigned int
|
|
1084 call_properties (const function_instance &) const OVERRIDE
|
|
1085 {
|
|
1086 return CP_READ_MEMORY;
|
|
1087 }
|
|
1088
|
|
1089 gimple *
|
|
1090 fold (gimple_folder &f) const OVERRIDE
|
|
1091 {
|
|
1092 tree vectype = f.vector_type (0);
|
|
1093
|
|
1094 /* Get the predicate and base pointer. */
|
|
1095 gimple_seq stmts = NULL;
|
|
1096 tree pred = f.convert_pred (stmts, vectype, 0);
|
|
1097 tree base = f.fold_contiguous_base (stmts, vectype);
|
|
1098 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
|
|
1099
|
|
1100 tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
|
|
1101 gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
|
|
1102 base, cookie, pred);
|
|
1103 gimple_call_set_lhs (new_call, f.lhs);
|
|
1104 return new_call;
|
|
1105 }
|
|
1106
|
|
1107 rtx
|
|
1108 expand (function_expander &e) const OVERRIDE
|
|
1109 {
|
|
1110 insn_code icode = convert_optab_handler (maskload_optab,
|
|
1111 e.vector_mode (0), e.gp_mode (0));
|
|
1112 return e.use_contiguous_load_insn (icode);
|
|
1113 }
|
|
1114 };
|
|
1115
|
|
1116 /* Implements extending contiguous forms of svld1. */
|
|
1117 class svld1_extend_impl : public extending_load
|
|
1118 {
|
|
1119 public:
|
|
1120 CONSTEXPR svld1_extend_impl (type_suffix_index memory_type)
|
|
1121 : extending_load (memory_type) {}
|
|
1122
|
|
1123 rtx
|
|
1124 expand (function_expander &e) const OVERRIDE
|
|
1125 {
|
|
1126 insn_code icode = code_for_aarch64_load (extend_rtx_code (),
|
|
1127 e.vector_mode (0),
|
|
1128 e.memory_vector_mode ());
|
|
1129 return e.use_contiguous_load_insn (icode);
|
|
1130 }
|
|
1131 };
|
|
1132
|
|
1133 class svld1_gather_impl : public full_width_access
|
|
1134 {
|
|
1135 public:
|
|
1136 unsigned int
|
|
1137 call_properties (const function_instance &) const OVERRIDE
|
|
1138 {
|
|
1139 return CP_READ_MEMORY;
|
|
1140 }
|
|
1141
|
|
1142 rtx
|
|
1143 expand (function_expander &e) const OVERRIDE
|
|
1144 {
|
|
1145 e.prepare_gather_address_operands (1);
|
|
1146 /* Put the predicate last, as required by mask_gather_load_optab. */
|
|
1147 e.rotate_inputs_left (0, 5);
|
|
1148 machine_mode mem_mode = e.memory_vector_mode ();
|
|
1149 machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
|
|
1150 insn_code icode = convert_optab_handler (mask_gather_load_optab,
|
|
1151 mem_mode, int_mode);
|
|
1152 return e.use_exact_insn (icode);
|
|
1153 }
|
|
1154 };
|
|
1155
|
|
1156 /* Implements extending forms of svld1_gather. */
|
|
1157 class svld1_gather_extend_impl : public extending_load
|
|
1158 {
|
|
1159 public:
|
|
1160 CONSTEXPR svld1_gather_extend_impl (type_suffix_index memory_type)
|
|
1161 : extending_load (memory_type) {}
|
|
1162
|
|
1163 rtx
|
|
1164 expand (function_expander &e) const OVERRIDE
|
|
1165 {
|
|
1166 e.prepare_gather_address_operands (1);
|
|
1167 /* Put the predicate last, since the extending gathers use the same
|
|
1168 operand order as mask_gather_load_optab. */
|
|
1169 e.rotate_inputs_left (0, 5);
|
|
1170 /* Add a constant predicate for the extension rtx. */
|
|
1171 e.args.quick_push (CONSTM1_RTX (VNx16BImode));
|
|
1172 insn_code icode = code_for_aarch64_gather_load (extend_rtx_code (),
|
|
1173 e.vector_mode (0),
|
|
1174 e.memory_vector_mode ());
|
|
1175 return e.use_exact_insn (icode);
|
|
1176 }
|
|
1177 };
|
|
1178
|
|
1179 class load_replicate : public function_base
|
|
1180 {
|
|
1181 public:
|
|
1182 unsigned int
|
|
1183 call_properties (const function_instance &) const OVERRIDE
|
|
1184 {
|
|
1185 return CP_READ_MEMORY;
|
|
1186 }
|
|
1187
|
|
1188 tree
|
|
1189 memory_scalar_type (const function_instance &fi) const OVERRIDE
|
|
1190 {
|
|
1191 return fi.scalar_type (0);
|
|
1192 }
|
|
1193 };
|
|
1194
|
|
1195 class svld1rq_impl : public load_replicate
|
|
1196 {
|
|
1197 public:
|
|
1198 machine_mode
|
|
1199 memory_vector_mode (const function_instance &fi) const OVERRIDE
|
|
1200 {
|
|
1201 return aarch64_vq_mode (GET_MODE_INNER (fi.vector_mode (0))).require ();
|
|
1202 }
|
|
1203
|
|
1204 rtx
|
|
1205 expand (function_expander &e) const OVERRIDE
|
|
1206 {
|
|
1207 insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0));
|
|
1208 return e.use_contiguous_load_insn (icode);
|
|
1209 }
|
|
1210 };
|
|
1211
|
|
1212 class svld1ro_impl : public load_replicate
|
|
1213 {
|
|
1214 public:
|
|
1215 machine_mode
|
|
1216 memory_vector_mode (const function_instance &) const OVERRIDE
|
|
1217 {
|
|
1218 return OImode;
|
|
1219 }
|
|
1220
|
|
1221 rtx
|
|
1222 expand (function_expander &e) const OVERRIDE
|
|
1223 {
|
|
1224 insn_code icode = code_for_aarch64_sve_ld1ro (e.vector_mode (0));
|
|
1225 return e.use_contiguous_load_insn (icode);
|
|
1226 }
|
|
1227 };
|
|
1228
|
|
1229 /* Implements svld2, svld3 and svld4. */
|
|
1230 class svld234_impl : public full_width_access
|
|
1231 {
|
|
1232 public:
|
|
1233 CONSTEXPR svld234_impl (unsigned int vectors_per_tuple)
|
|
1234 : full_width_access (vectors_per_tuple) {}
|
|
1235
|
|
1236 unsigned int
|
|
1237 call_properties (const function_instance &) const OVERRIDE
|
|
1238 {
|
|
1239 return CP_READ_MEMORY;
|
|
1240 }
|
|
1241
|
|
1242 gimple *
|
|
1243 fold (gimple_folder &f) const OVERRIDE
|
|
1244 {
|
|
1245 tree tuple_type = TREE_TYPE (f.lhs);
|
|
1246 tree vectype = f.vector_type (0);
|
|
1247
|
|
1248 /* Get the predicate and base pointer. */
|
|
1249 gimple_seq stmts = NULL;
|
|
1250 tree pred = f.convert_pred (stmts, vectype, 0);
|
|
1251 tree base = f.fold_contiguous_base (stmts, vectype);
|
|
1252 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
|
|
1253
|
|
1254 /* Emit two statements: a clobber of the lhs, so that it isn't
|
|
1255 upwards exposed, and then the load itself.
|
|
1256
|
|
1257 The fold routines expect the replacement statement to have the
|
|
1258 same lhs as the original call, so return the clobber statement
|
|
1259 rather than the load. */
|
|
1260 gimple *clobber = gimple_build_assign (f.lhs, build_clobber (tuple_type));
|
|
1261
|
|
1262 /* View the loaded data as an array of vectors. */
|
|
1263 tree field = tuple_type_field (tuple_type);
|
|
1264 tree lhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field),
|
|
1265 unshare_expr (f.lhs));
|
|
1266
|
|
1267 /* Emit the load itself. */
|
|
1268 tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
|
|
1269 gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
|
|
1270 base, cookie, pred);
|
|
1271 gimple_call_set_lhs (new_call, lhs_array);
|
|
1272 gsi_insert_after (f.gsi, new_call, GSI_SAME_STMT);
|
|
1273
|
|
1274 return clobber;
|
|
1275 }
|
|
1276
|
|
1277 rtx
|
|
1278 expand (function_expander &e) const OVERRIDE
|
|
1279 {
|
|
1280 machine_mode tuple_mode = TYPE_MODE (TREE_TYPE (e.call_expr));
|
|
1281 insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab,
|
|
1282 tuple_mode, e.vector_mode (0));
|
|
1283 return e.use_contiguous_load_insn (icode);
|
|
1284 }
|
|
1285 };
|
|
1286
|
|
1287 class svldff1_gather_impl : public full_width_access
|
|
1288 {
|
|
1289 public:
|
|
1290 unsigned int
|
|
1291 call_properties (const function_instance &) const OVERRIDE
|
|
1292 {
|
|
1293 return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
|
|
1294 }
|
|
1295
|
|
1296 rtx
|
|
1297 expand (function_expander &e) const OVERRIDE
|
|
1298 {
|
|
1299 /* See the block comment in aarch64-sve.md for details about the
|
|
1300 FFR handling. */
|
|
1301 emit_insn (gen_aarch64_update_ffr_for_load ());
|
|
1302
|
|
1303 e.prepare_gather_address_operands (1);
|
|
1304 /* Put the predicate last, since ldff1_gather uses the same operand
|
|
1305 order as mask_gather_load_optab. */
|
|
1306 e.rotate_inputs_left (0, 5);
|
|
1307 machine_mode mem_mode = e.memory_vector_mode ();
|
|
1308 return e.use_exact_insn (code_for_aarch64_ldff1_gather (mem_mode));
|
|
1309 }
|
|
1310 };
|
|
1311
|
|
1312 /* Implements extending forms of svldff1_gather. */
|
|
1313 class svldff1_gather_extend : public extending_load
|
|
1314 {
|
|
1315 public:
|
|
1316 CONSTEXPR svldff1_gather_extend (type_suffix_index memory_type)
|
|
1317 : extending_load (memory_type) {}
|
|
1318
|
|
1319 rtx
|
|
1320 expand (function_expander &e) const OVERRIDE
|
|
1321 {
|
|
1322 /* See the block comment in aarch64-sve.md for details about the
|
|
1323 FFR handling. */
|
|
1324 emit_insn (gen_aarch64_update_ffr_for_load ());
|
|
1325
|
|
1326 e.prepare_gather_address_operands (1);
|
|
1327 /* Put the predicate last, since ldff1_gather uses the same operand
|
|
1328 order as mask_gather_load_optab. */
|
|
1329 e.rotate_inputs_left (0, 5);
|
|
1330 /* Add a constant predicate for the extension rtx. */
|
|
1331 e.args.quick_push (CONSTM1_RTX (VNx16BImode));
|
|
1332 insn_code icode = code_for_aarch64_ldff1_gather (extend_rtx_code (),
|
|
1333 e.vector_mode (0),
|
|
1334 e.memory_vector_mode ());
|
|
1335 return e.use_exact_insn (icode);
|
|
1336 }
|
|
1337 };
|
|
1338
|
|
1339 class svldnt1_impl : public full_width_access
|
|
1340 {
|
|
1341 public:
|
|
1342 unsigned int
|
|
1343 call_properties (const function_instance &) const OVERRIDE
|
|
1344 {
|
|
1345 return CP_READ_MEMORY;
|
|
1346 }
|
|
1347
|
|
1348 rtx
|
|
1349 expand (function_expander &e) const OVERRIDE
|
|
1350 {
|
|
1351 insn_code icode = code_for_aarch64_ldnt1 (e.vector_mode (0));
|
|
1352 return e.use_contiguous_load_insn (icode);
|
|
1353 }
|
|
1354 };
|
|
1355
|
|
1356 /* Implements svldff1 and svldnf1. */
|
|
1357 class svldxf1_impl : public full_width_access
|
|
1358 {
|
|
1359 public:
|
|
1360 CONSTEXPR svldxf1_impl (int unspec) : m_unspec (unspec) {}
|
|
1361
|
|
1362 unsigned int
|
|
1363 call_properties (const function_instance &) const OVERRIDE
|
|
1364 {
|
|
1365 return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
|
|
1366 }
|
|
1367
|
|
1368 rtx
|
|
1369 expand (function_expander &e) const OVERRIDE
|
|
1370 {
|
|
1371 /* See the block comment in aarch64-sve.md for details about the
|
|
1372 FFR handling. */
|
|
1373 emit_insn (gen_aarch64_update_ffr_for_load ());
|
|
1374
|
|
1375 machine_mode mode = e.vector_mode (0);
|
|
1376 return e.use_contiguous_load_insn (code_for_aarch64_ldf1 (m_unspec, mode));
|
|
1377 }
|
|
1378
|
|
1379 /* The unspec associated with the load. */
|
|
1380 int m_unspec;
|
|
1381 };
|
|
1382
|
|
1383 /* Implements extending contiguous forms of svldff1 and svldnf1. */
|
|
1384 class svldxf1_extend_impl : public extending_load
|
|
1385 {
|
|
1386 public:
|
|
1387 CONSTEXPR svldxf1_extend_impl (type_suffix_index memory_type, int unspec)
|
|
1388 : extending_load (memory_type), m_unspec (unspec) {}
|
|
1389
|
|
1390 unsigned int
|
|
1391 call_properties (const function_instance &) const OVERRIDE
|
|
1392 {
|
|
1393 return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
|
|
1394 }
|
|
1395
|
|
1396 rtx
|
|
1397 expand (function_expander &e) const OVERRIDE
|
|
1398 {
|
|
1399 /* See the block comment in aarch64-sve.md for details about the
|
|
1400 FFR handling. */
|
|
1401 emit_insn (gen_aarch64_update_ffr_for_load ());
|
|
1402
|
|
1403 insn_code icode = code_for_aarch64_ldf1 (m_unspec, extend_rtx_code (),
|
|
1404 e.vector_mode (0),
|
|
1405 e.memory_vector_mode ());
|
|
1406 return e.use_contiguous_load_insn (icode);
|
|
1407 }
|
|
1408
|
|
1409 /* The unspec associated with the load. */
|
|
1410 int m_unspec;
|
|
1411 };
|
|
1412
|
|
1413 class svlen_impl : public quiet<function_base>
|
|
1414 {
|
|
1415 public:
|
|
1416 gimple *
|
|
1417 fold (gimple_folder &f) const OVERRIDE
|
|
1418 {
|
|
1419 /* The argument only exists for its type. */
|
|
1420 tree rhs_type = TREE_TYPE (gimple_call_arg (f.call, 0));
|
|
1421 tree count = build_int_cstu (TREE_TYPE (f.lhs),
|
|
1422 TYPE_VECTOR_SUBPARTS (rhs_type));
|
|
1423 return gimple_build_assign (f.lhs, count);
|
|
1424 }
|
|
1425
|
|
1426 rtx
|
|
1427 expand (function_expander &e) const OVERRIDE
|
|
1428 {
|
|
1429 /* The argument only exists for its type. */
|
|
1430 return gen_int_mode (GET_MODE_NUNITS (e.vector_mode (0)), DImode);
|
|
1431 }
|
|
1432 };
|
|
1433
|
|
1434 class svmad_impl : public function_base
|
|
1435 {
|
|
1436 public:
|
|
1437 rtx
|
|
1438 expand (function_expander &e) const OVERRIDE
|
|
1439 {
|
|
1440 return expand_mad (e);
|
|
1441 }
|
|
1442 };
|
|
1443
|
|
1444 class svmla_impl : public function_base
|
|
1445 {
|
|
1446 public:
|
|
1447 rtx
|
|
1448 expand (function_expander &e) const OVERRIDE
|
|
1449 {
|
|
1450 /* Put the accumulator at the end (argument 3), but keep it as the
|
|
1451 merge input for _m functions. */
|
|
1452 e.rotate_inputs_left (1, 4);
|
|
1453 return expand_mad (e, 3);
|
|
1454 }
|
|
1455 };
|
|
1456
|
|
1457 class svmla_lane_impl : public function_base
|
|
1458 {
|
|
1459 public:
|
|
1460 rtx
|
|
1461 expand (function_expander &e) const OVERRIDE
|
|
1462 {
|
|
1463 if (e.type_suffix (0).integer_p)
|
|
1464 {
|
|
1465 machine_mode mode = e.vector_mode (0);
|
|
1466 return e.use_exact_insn (code_for_aarch64_sve_add_mul_lane (mode));
|
|
1467 }
|
|
1468 return expand_mla_mls_lane (e, UNSPEC_FMLA);
|
|
1469 }
|
|
1470 };
|
|
1471
|
|
1472 class svmls_impl : public function_base
|
|
1473 {
|
|
1474 public:
|
|
1475 rtx
|
|
1476 expand (function_expander &e) const OVERRIDE
|
|
1477 {
|
|
1478 /* Put the accumulator at the end (argument 3), but keep it as the
|
|
1479 merge input for _m functions. */
|
|
1480 e.rotate_inputs_left (1, 4);
|
|
1481 return expand_msb (e, 3);
|
|
1482 }
|
|
1483 };
|
|
1484
|
|
1485 class svmov_impl : public function_base
|
|
1486 {
|
|
1487 public:
|
|
1488 gimple *
|
|
1489 fold (gimple_folder &f) const OVERRIDE
|
|
1490 {
|
|
1491 return gimple_build_assign (f.lhs, BIT_AND_EXPR,
|
|
1492 gimple_call_arg (f.call, 0),
|
|
1493 gimple_call_arg (f.call, 1));
|
|
1494 }
|
|
1495
|
|
1496 rtx
|
|
1497 expand (function_expander &e) const OVERRIDE
|
|
1498 {
|
|
1499 /* The canonical form for the assembler alias "MOV Pa.B, Pb/Z, Pc.B"
|
|
1500 is "AND Pa.B, Pb/Z, Pc.B, Pc.B". */
|
|
1501 gcc_assert (e.pred == PRED_z);
|
|
1502 e.args.quick_push (e.args[1]);
|
|
1503 return e.use_exact_insn (CODE_FOR_aarch64_pred_andvnx16bi_z);
|
|
1504 }
|
|
1505 };
|
|
1506
|
|
1507 class svmls_lane_impl : public function_base
|
|
1508 {
|
|
1509 public:
|
|
1510 rtx
|
|
1511 expand (function_expander &e) const OVERRIDE
|
|
1512 {
|
|
1513 if (e.type_suffix (0).integer_p)
|
|
1514 {
|
|
1515 machine_mode mode = e.vector_mode (0);
|
|
1516 return e.use_exact_insn (code_for_aarch64_sve_sub_mul_lane (mode));
|
|
1517 }
|
|
1518 return expand_mla_mls_lane (e, UNSPEC_FMLS);
|
|
1519 }
|
|
1520 };
|
|
1521
|
|
1522 class svmmla_impl : public function_base
|
|
1523 {
|
|
1524 public:
|
|
1525 rtx
|
|
1526 expand (function_expander &e) const OVERRIDE
|
|
1527 {
|
|
1528 insn_code icode;
|
|
1529 if (e.type_suffix (0).integer_p)
|
|
1530 {
|
|
1531 if (e.type_suffix (0).unsigned_p)
|
|
1532 icode = code_for_aarch64_sve_add (UNSPEC_UMATMUL, e.vector_mode (0));
|
|
1533 else
|
|
1534 icode = code_for_aarch64_sve_add (UNSPEC_SMATMUL, e.vector_mode (0));
|
|
1535 }
|
|
1536 else
|
|
1537 icode = code_for_aarch64_sve (UNSPEC_FMMLA, e.vector_mode (0));
|
|
1538 return e.use_exact_insn (icode);
|
|
1539 }
|
|
1540 };
|
|
1541
|
|
1542 class svmsb_impl : public function_base
|
|
1543 {
|
|
1544 public:
|
|
1545 rtx
|
|
1546 expand (function_expander &e) const OVERRIDE
|
|
1547 {
|
|
1548 return expand_msb (e);
|
|
1549 }
|
|
1550 };
|
|
1551
|
|
1552 class svnand_impl : public function_base
|
|
1553 {
|
|
1554 public:
|
|
1555 rtx
|
|
1556 expand (function_expander &e) const OVERRIDE
|
|
1557 {
|
|
1558 gcc_assert (e.pred == PRED_z);
|
|
1559 return e.use_exact_insn (CODE_FOR_aarch64_pred_nandvnx16bi_z);
|
|
1560 }
|
|
1561 };
|
|
1562
|
|
1563 class svnor_impl : public function_base
|
|
1564 {
|
|
1565 public:
|
|
1566 rtx
|
|
1567 expand (function_expander &e) const OVERRIDE
|
|
1568 {
|
|
1569 gcc_assert (e.pred == PRED_z);
|
|
1570 return e.use_exact_insn (CODE_FOR_aarch64_pred_norvnx16bi_z);
|
|
1571 }
|
|
1572 };
|
|
1573
|
|
1574 class svnot_impl : public rtx_code_function
|
|
1575 {
|
|
1576 public:
|
|
1577 CONSTEXPR svnot_impl () : rtx_code_function (NOT, NOT, -1) {}
|
|
1578
|
|
1579 rtx
|
|
1580 expand (function_expander &e) const OVERRIDE
|
|
1581 {
|
|
1582 if (e.type_suffix_ids[0] == TYPE_SUFFIX_b)
|
|
1583 {
|
|
1584 /* The canonical form for the assembler alias "NOT Pa.B, Pb/Z, Pc.B"
|
|
1585 is "EOR Pa.B, Pb/Z, Pb.B, Pc.B". */
|
|
1586 gcc_assert (e.pred == PRED_z);
|
|
1587 e.args.quick_insert (1, e.args[0]);
|
|
1588 return e.use_exact_insn (CODE_FOR_aarch64_pred_xorvnx16bi_z);
|
|
1589 }
|
|
1590 return rtx_code_function::expand (e);
|
|
1591 }
|
|
1592 };
|
|
1593
|
|
1594 class svorn_impl : public function_base
|
|
1595 {
|
|
1596 public:
|
|
1597 rtx
|
|
1598 expand (function_expander &e) const OVERRIDE
|
|
1599 {
|
|
1600 gcc_assert (e.pred == PRED_z);
|
|
1601 return e.use_exact_insn (CODE_FOR_aarch64_pred_ornvnx16bi_z);
|
|
1602 }
|
|
1603 };
|
|
1604
|
|
1605 class svpfalse_impl : public function_base
|
|
1606 {
|
|
1607 public:
|
|
1608 gimple *
|
|
1609 fold (gimple_folder &f) const OVERRIDE
|
|
1610 {
|
|
1611 return f.fold_to_pfalse ();
|
|
1612 }
|
|
1613
|
|
1614 rtx
|
|
1615 expand (function_expander &) const OVERRIDE
|
|
1616 {
|
|
1617 return CONST0_RTX (VNx16BImode);
|
|
1618 }
|
|
1619 };
|
|
1620
|
|
1621 /* Implements svpfirst and svpnext, which share the same .md patterns. */
|
|
1622 class svpfirst_svpnext_impl : public function_base
|
|
1623 {
|
|
1624 public:
|
|
1625 CONSTEXPR svpfirst_svpnext_impl (int unspec) : m_unspec (unspec) {}
|
|
1626
|
|
1627 rtx
|
|
1628 expand (function_expander &e) const OVERRIDE
|
|
1629 {
|
|
1630 machine_mode mode = e.vector_mode (0);
|
|
1631 e.add_ptrue_hint (0, mode);
|
|
1632 return e.use_exact_insn (code_for_aarch64_sve (m_unspec, mode));
|
|
1633 }
|
|
1634
|
|
1635 /* The unspec associated with the operation. */
|
|
1636 int m_unspec;
|
|
1637 };
|
|
1638
|
|
1639 /* Implements contiguous forms of svprf[bhwd]. */
|
|
1640 class svprf_bhwd_impl : public function_base
|
|
1641 {
|
|
1642 public:
|
|
1643 CONSTEXPR svprf_bhwd_impl (machine_mode mode) : m_mode (mode) {}
|
|
1644
|
|
1645 unsigned int
|
|
1646 call_properties (const function_instance &) const OVERRIDE
|
|
1647 {
|
|
1648 return CP_PREFETCH_MEMORY;
|
|
1649 }
|
|
1650
|
|
1651 rtx
|
|
1652 expand (function_expander &e) const OVERRIDE
|
|
1653 {
|
|
1654 e.prepare_prefetch_operands ();
|
|
1655 insn_code icode = code_for_aarch64_sve_prefetch (m_mode);
|
|
1656 return e.use_contiguous_prefetch_insn (icode);
|
|
1657 }
|
|
1658
|
|
1659 /* The mode that we'd use to hold one vector of prefetched data. */
|
|
1660 machine_mode m_mode;
|
|
1661 };
|
|
1662
|
|
1663 /* Implements svprf[bhwd]_gather. */
|
|
1664 class svprf_bhwd_gather_impl : public function_base
|
|
1665 {
|
|
1666 public:
|
|
1667 CONSTEXPR svprf_bhwd_gather_impl (machine_mode mode) : m_mode (mode) {}
|
|
1668
|
|
1669 unsigned int
|
|
1670 call_properties (const function_instance &) const OVERRIDE
|
|
1671 {
|
|
1672 return CP_PREFETCH_MEMORY;
|
|
1673 }
|
|
1674
|
|
1675 machine_mode
|
|
1676 memory_vector_mode (const function_instance &) const OVERRIDE
|
|
1677 {
|
|
1678 return m_mode;
|
|
1679 }
|
|
1680
|
|
1681 rtx
|
|
1682 expand (function_expander &e) const OVERRIDE
|
|
1683 {
|
|
1684 e.prepare_prefetch_operands ();
|
|
1685 e.prepare_gather_address_operands (1);
|
|
1686
|
|
1687 /* Insert a zero operand to identify the mode of the memory being
|
|
1688 accessed. This goes between the gather operands and prefetch
|
|
1689 operands created above. */
|
|
1690 e.args.quick_insert (5, CONST0_RTX (m_mode));
|
|
1691
|
|
1692 machine_mode reg_mode = GET_MODE (e.args[2]);
|
|
1693 insn_code icode = code_for_aarch64_sve_gather_prefetch (m_mode, reg_mode);
|
|
1694 return e.use_exact_insn (icode);
|
|
1695 }
|
|
1696
|
|
1697 /* The mode that we'd use to hold one vector of prefetched data. */
|
|
1698 machine_mode m_mode;
|
|
1699 };
|
|
1700
|
|
1701 /* Implements svptest_any, svptest_first and svptest_last. */
|
|
1702 class svptest_impl : public function_base
|
|
1703 {
|
|
1704 public:
|
|
1705 CONSTEXPR svptest_impl (rtx_code compare) : m_compare (compare) {}
|
|
1706
|
|
1707 rtx
|
|
1708 expand (function_expander &e) const OVERRIDE
|
|
1709 {
|
|
1710 /* See whether GP is an exact ptrue for some predicate mode;
|
|
1711 i.e. whether converting the GP to that mode will not drop
|
|
1712 set bits and will leave all significant bits set. */
|
|
1713 machine_mode wide_mode;
|
|
1714 int hint;
|
|
1715 if (aarch64_ptrue_all_mode (e.args[0]).exists (&wide_mode))
|
|
1716 hint = SVE_KNOWN_PTRUE;
|
|
1717 else
|
|
1718 {
|
|
1719 hint = SVE_MAYBE_NOT_PTRUE;
|
|
1720 wide_mode = VNx16BImode;
|
|
1721 }
|
|
1722
|
|
1723 /* Generate the PTEST itself. */
|
|
1724 rtx pg = force_reg (VNx16BImode, e.args[0]);
|
|
1725 rtx wide_pg = gen_lowpart (wide_mode, pg);
|
|
1726 rtx hint_rtx = gen_int_mode (hint, DImode);
|
|
1727 rtx op = force_reg (wide_mode, gen_lowpart (wide_mode, e.args[1]));
|
|
1728 emit_insn (gen_aarch64_ptestvnx16bi (pg, wide_pg, hint_rtx, op));
|
|
1729
|
|
1730 /* Get the location of the boolean result. We can provide SImode and
|
|
1731 DImode values directly; rely on generic code to convert others. */
|
|
1732 rtx target = e.possible_target;
|
|
1733 if (!target
|
|
1734 || !REG_P (target)
|
|
1735 || (GET_MODE (target) != SImode && GET_MODE (target) != DImode))
|
|
1736 target = gen_reg_rtx (DImode);
|
|
1737
|
|
1738 /* Generate a CSET to convert the CC result of the PTEST to a boolean. */
|
|
1739 rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
|
|
1740 rtx compare = gen_rtx_fmt_ee (m_compare, GET_MODE (target),
|
|
1741 cc_reg, const0_rtx);
|
|
1742 emit_insn (gen_rtx_SET (target, compare));
|
|
1743 return target;
|
|
1744 }
|
|
1745
|
|
1746 /* The comparison code associated with ptest condition. */
|
|
1747 rtx_code m_compare;
|
|
1748 };
|
|
1749
|
|
1750 class svptrue_impl : public function_base
|
|
1751 {
|
|
1752 public:
|
|
1753 gimple *
|
|
1754 fold (gimple_folder &f) const OVERRIDE
|
|
1755 {
|
|
1756 return f.fold_to_ptrue ();
|
|
1757 }
|
|
1758
|
|
1759 rtx
|
|
1760 expand (function_expander &e) const OVERRIDE
|
|
1761 {
|
|
1762 return aarch64_ptrue_all (e.type_suffix (0).element_bytes);
|
|
1763 }
|
|
1764 };
|
|
1765
|
|
1766 class svptrue_pat_impl : public function_base
|
|
1767 {
|
|
1768 public:
|
|
1769 gimple *
|
|
1770 fold (gimple_folder &f) const OVERRIDE
|
|
1771 {
|
|
1772 tree pattern_arg = gimple_call_arg (f.call, 0);
|
|
1773 aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg);
|
|
1774
|
|
1775 if (pattern == AARCH64_SV_ALL)
|
|
1776 /* svptrue_pat_bN (SV_ALL) == svptrue_bN (). */
|
|
1777 return f.fold_to_ptrue ();
|
|
1778
|
|
1779 /* See whether we can count the number of elements in the pattern
|
|
1780 at compile time. If so, construct a predicate with that number
|
|
1781 of 1s followed by all 0s. */
|
|
1782 int nelts_per_vq = f.elements_per_vq (0);
|
|
1783 HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, nelts_per_vq);
|
|
1784 if (value >= 0)
|
|
1785 return f.fold_to_vl_pred (value);
|
|
1786
|
|
1787 return NULL;
|
|
1788 }
|
|
1789
|
|
1790 rtx
|
|
1791 expand (function_expander &e) const OVERRIDE
|
|
1792 {
|
|
1793 /* In rtl, the predicate is represented as the constant:
|
|
1794
|
|
1795 (const:V16BI (unspec:V16BI [(const_int PATTERN)
|
|
1796 (const_vector:VnnBI [zeros])]
|
|
1797 UNSPEC_PTRUE))
|
|
1798
|
|
1799 where nn determines the element size. */
|
|
1800 rtvec vec = gen_rtvec (2, e.args[0], CONST0_RTX (e.vector_mode (0)));
|
|
1801 return gen_rtx_CONST (VNx16BImode,
|
|
1802 gen_rtx_UNSPEC (VNx16BImode, vec, UNSPEC_PTRUE));
|
|
1803 }
|
|
1804 };
|
|
1805
|
|
1806 /* Implements svqdec[bhwd]{,_pat} and svqinc[bhwd]{,_pat}. */
|
|
1807 class svqdec_svqinc_bhwd_impl : public function_base
|
|
1808 {
|
|
1809 public:
|
|
1810 CONSTEXPR svqdec_svqinc_bhwd_impl (rtx_code code_for_sint,
|
|
1811 rtx_code code_for_uint,
|
|
1812 scalar_int_mode elem_mode)
|
|
1813 : m_code_for_sint (code_for_sint),
|
|
1814 m_code_for_uint (code_for_uint),
|
|
1815 m_elem_mode (elem_mode)
|
|
1816 {}
|
|
1817
|
|
1818 rtx
|
|
1819 expand (function_expander &e) const OVERRIDE
|
|
1820 {
|
|
1821 /* Treat non-_pat functions in the same way as _pat functions with
|
|
1822 an SV_ALL argument. */
|
|
1823 if (e.args.length () == 2)
|
|
1824 e.args.quick_insert (1, gen_int_mode (AARCH64_SV_ALL, DImode));
|
|
1825
|
|
1826 /* Insert the number of elements per 128-bit block as a fake argument,
|
|
1827 between the pattern and the multiplier. Arguments 1, 2 and 3 then
|
|
1828 correspond exactly with the 3 UNSPEC_SVE_CNT_PAT operands; see
|
|
1829 aarch64_sve_cnt_pat for details. */
|
|
1830 unsigned int elements_per_vq = 128 / GET_MODE_BITSIZE (m_elem_mode);
|
|
1831 e.args.quick_insert (2, gen_int_mode (elements_per_vq, DImode));
|
|
1832
|
|
1833 rtx_code code = (e.type_suffix (0).unsigned_p
|
|
1834 ? m_code_for_uint
|
|
1835 : m_code_for_sint);
|
|
1836
|
|
1837 /* Choose between operating on integer scalars or integer vectors. */
|
|
1838 machine_mode mode = e.vector_mode (0);
|
|
1839 if (e.mode_suffix_id == MODE_n)
|
|
1840 mode = GET_MODE_INNER (mode);
|
|
1841 return e.use_exact_insn (code_for_aarch64_sve_pat (code, mode));
|
|
1842 }
|
|
1843
|
|
1844 /* The saturating addition or subtraction codes to use for signed and
|
|
1845 unsigned values respectively. */
|
|
1846 rtx_code m_code_for_sint;
|
|
1847 rtx_code m_code_for_uint;
|
|
1848
|
|
1849 /* The integer mode associated with the [bhwd] suffix. */
|
|
1850 scalar_int_mode m_elem_mode;
|
|
1851 };
|
|
1852
|
|
1853 /* Implements svqdec[bhwd]{,_pat}. */
|
|
1854 class svqdec_bhwd_impl : public svqdec_svqinc_bhwd_impl
|
|
1855 {
|
|
1856 public:
|
|
1857 CONSTEXPR svqdec_bhwd_impl (scalar_int_mode elem_mode)
|
|
1858 : svqdec_svqinc_bhwd_impl (SS_MINUS, US_MINUS, elem_mode) {}
|
|
1859 };
|
|
1860
|
|
1861 /* Implements svqinc[bhwd]{,_pat}. */
|
|
1862 class svqinc_bhwd_impl : public svqdec_svqinc_bhwd_impl
|
|
1863 {
|
|
1864 public:
|
|
1865 CONSTEXPR svqinc_bhwd_impl (scalar_int_mode elem_mode)
|
|
1866 : svqdec_svqinc_bhwd_impl (SS_PLUS, US_PLUS, elem_mode) {}
|
|
1867 };
|
|
1868
|
|
1869 /* Implements svqdecp and svqincp. */
|
|
1870 class svqdecp_svqincp_impl : public function_base
|
|
1871 {
|
|
1872 public:
|
|
1873 CONSTEXPR svqdecp_svqincp_impl (rtx_code code_for_sint,
|
|
1874 rtx_code code_for_uint)
|
|
1875 : m_code_for_sint (code_for_sint),
|
|
1876 m_code_for_uint (code_for_uint)
|
|
1877 {}
|
|
1878
|
|
1879 rtx
|
|
1880 expand (function_expander &e) const OVERRIDE
|
|
1881 {
|
|
1882 rtx_code code = (e.type_suffix (0).unsigned_p
|
|
1883 ? m_code_for_uint
|
|
1884 : m_code_for_sint);
|
|
1885 insn_code icode;
|
|
1886 if (e.mode_suffix_id == MODE_n)
|
|
1887 {
|
|
1888 /* Increment or decrement a scalar (whose mode is given by the first
|
|
1889 type suffix) by the number of active elements in a predicate
|
|
1890 (whose mode is given by the second type suffix). */
|
|
1891 machine_mode mode = GET_MODE_INNER (e.vector_mode (0));
|
|
1892 icode = code_for_aarch64_sve_cntp (code, mode, e.vector_mode (1));
|
|
1893 }
|
|
1894 else
|
|
1895 /* Increment a vector by the number of active elements in a predicate,
|
|
1896 with the vector mode determining the predicate mode. */
|
|
1897 icode = code_for_aarch64_sve_cntp (code, e.vector_mode (0));
|
|
1898 return e.use_exact_insn (icode);
|
|
1899 }
|
|
1900
|
|
1901 /* The saturating addition or subtraction codes to use for signed and
|
|
1902 unsigned values respectively. */
|
|
1903 rtx_code m_code_for_sint;
|
|
1904 rtx_code m_code_for_uint;
|
|
1905 };
|
|
1906
|
|
1907 class svrdffr_impl : public function_base
|
|
1908 {
|
|
1909 public:
|
|
1910 unsigned int
|
|
1911 call_properties (const function_instance &) const OVERRIDE
|
|
1912 {
|
|
1913 return CP_READ_FFR;
|
|
1914 }
|
|
1915
|
|
1916 rtx
|
|
1917 expand (function_expander &e) const OVERRIDE
|
|
1918 {
|
|
1919 /* See the block comment in aarch64-sve.md for details about the
|
|
1920 FFR handling. */
|
|
1921 emit_insn (gen_aarch64_copy_ffr_to_ffrt ());
|
|
1922 rtx result = e.use_exact_insn (e.pred == PRED_z
|
|
1923 ? CODE_FOR_aarch64_rdffr_z
|
|
1924 : CODE_FOR_aarch64_rdffr);
|
|
1925 emit_insn (gen_aarch64_update_ffrt ());
|
|
1926 return result;
|
|
1927 }
|
|
1928 };
|
|
1929
|
|
1930 class svreinterpret_impl : public quiet<function_base>
|
|
1931 {
|
|
1932 public:
|
|
1933 gimple *
|
|
1934 fold (gimple_folder &f) const OVERRIDE
|
|
1935 {
|
|
1936 /* Punt to rtl if the effect of the reinterpret on registers does not
|
|
1937 conform to GCC's endianness model. */
|
|
1938 if (!targetm.can_change_mode_class (f.vector_mode (0),
|
|
1939 f.vector_mode (1), FP_REGS))
|
|
1940 return NULL;
|
|
1941
|
|
1942 /* Otherwise svreinterpret corresponds directly to a VIEW_CONVERT_EXPR
|
|
1943 reinterpretation. */
|
|
1944 tree rhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (f.lhs),
|
|
1945 gimple_call_arg (f.call, 0));
|
|
1946 return gimple_build_assign (f.lhs, VIEW_CONVERT_EXPR, rhs);
|
|
1947 }
|
|
1948
|
|
1949 rtx
|
|
1950 expand (function_expander &e) const OVERRIDE
|
|
1951 {
|
|
1952 machine_mode mode = e.vector_mode (0);
|
|
1953 return e.use_exact_insn (code_for_aarch64_sve_reinterpret (mode));
|
|
1954 }
|
|
1955 };
|
|
1956
|
|
1957 class svrev_impl : public permute
|
|
1958 {
|
|
1959 public:
|
|
1960 gimple *
|
|
1961 fold (gimple_folder &f) const OVERRIDE
|
|
1962 {
|
|
1963 /* Punt for now on _b16 and wider; we'd need more complex evpc logic
|
|
1964 to rerecognize the result. */
|
|
1965 if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8)
|
|
1966 return NULL;
|
|
1967
|
|
1968 /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }. */
|
|
1969 poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
|
|
1970 vec_perm_builder builder (nelts, 1, 3);
|
|
1971 for (int i = 0; i < 3; ++i)
|
|
1972 builder.quick_push (nelts - i - 1);
|
|
1973 return fold_permute (f, builder);
|
|
1974 }
|
|
1975
|
|
1976 rtx
|
|
1977 expand (function_expander &e) const OVERRIDE
|
|
1978 {
|
|
1979 return e.use_exact_insn (code_for_aarch64_sve_rev (e.vector_mode (0)));
|
|
1980 }
|
|
1981 };
|
|
1982
|
|
1983 class svsel_impl : public quiet<function_base>
|
|
1984 {
|
|
1985 public:
|
|
1986 gimple *
|
|
1987 fold (gimple_folder &f) const OVERRIDE
|
|
1988 {
|
|
1989 /* svsel corresponds exactly to VEC_COND_EXPR. */
|
|
1990 gimple_seq stmts = NULL;
|
|
1991 tree pred = f.convert_pred (stmts, f.vector_type (0), 0);
|
|
1992 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
|
|
1993 return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred,
|
|
1994 gimple_call_arg (f.call, 1),
|
|
1995 gimple_call_arg (f.call, 2));
|
|
1996 }
|
|
1997
|
|
1998 rtx
|
|
1999 expand (function_expander &e) const OVERRIDE
|
|
2000 {
|
|
2001 /* svsel (cond, truev, falsev) is vcond_mask (truev, falsev, cond). */
|
|
2002 e.rotate_inputs_left (0, 3);
|
|
2003 insn_code icode = convert_optab_handler (vcond_mask_optab,
|
|
2004 e.vector_mode (0),
|
|
2005 e.gp_mode (0));
|
|
2006 return e.use_exact_insn (icode);
|
|
2007 }
|
|
2008 };
|
|
2009
|
|
2010 /* Implements svset2, svset3 and svset4. */
|
|
2011 class svset_impl : public quiet<multi_vector_function>
|
|
2012 {
|
|
2013 public:
|
|
2014 CONSTEXPR svset_impl (unsigned int vectors_per_tuple)
|
|
2015 : quiet<multi_vector_function> (vectors_per_tuple) {}
|
|
2016
|
|
2017 gimple *
|
|
2018 fold (gimple_folder &f) const OVERRIDE
|
|
2019 {
|
|
2020 tree rhs_tuple = gimple_call_arg (f.call, 0);
|
|
2021 tree index = gimple_call_arg (f.call, 1);
|
|
2022 tree rhs_vector = gimple_call_arg (f.call, 2);
|
|
2023
|
|
2024 /* Replace the call with two statements: a copy of the full tuple
|
|
2025 to the call result, followed by an update of the individual vector.
|
|
2026
|
|
2027 The fold routines expect the replacement statement to have the
|
|
2028 same lhs as the original call, so return the copy statement
|
|
2029 rather than the field update. */
|
|
2030 gassign *copy = gimple_build_assign (unshare_expr (f.lhs), rhs_tuple);
|
|
2031
|
|
2032 /* Get a reference to the individual vector. */
|
|
2033 tree field = tuple_type_field (TREE_TYPE (f.lhs));
|
|
2034 tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
|
|
2035 f.lhs, field, NULL_TREE);
|
|
2036 tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector),
|
|
2037 lhs_array, index, NULL_TREE, NULL_TREE);
|
|
2038 gassign *update = gimple_build_assign (lhs_vector, rhs_vector);
|
|
2039 gsi_insert_after (f.gsi, update, GSI_SAME_STMT);
|
|
2040
|
|
2041 return copy;
|
|
2042 }
|
|
2043
|
|
2044 rtx
|
|
2045 expand (function_expander &e) const OVERRIDE
|
|
2046 {
|
|
2047 rtx rhs_tuple = e.args[0];
|
|
2048 unsigned int index = INTVAL (e.args[1]);
|
|
2049 rtx rhs_vector = e.args[2];
|
|
2050
|
|
2051 /* First copy the full tuple to the target register. */
|
|
2052 rtx lhs_tuple = e.get_nonoverlapping_reg_target ();
|
|
2053 emit_move_insn (lhs_tuple, rhs_tuple);
|
|
2054
|
|
2055 /* ...then update the individual vector. */
|
|
2056 rtx lhs_vector = simplify_gen_subreg (GET_MODE (rhs_vector),
|
|
2057 lhs_tuple, GET_MODE (lhs_tuple),
|
|
2058 index * BYTES_PER_SVE_VECTOR);
|
|
2059 emit_move_insn (lhs_vector, rhs_vector);
|
|
2060 return lhs_vector;
|
|
2061 }
|
|
2062 };
|
|
2063
|
|
2064 class svsetffr_impl : public function_base
|
|
2065 {
|
|
2066 public:
|
|
2067 unsigned int
|
|
2068 call_properties (const function_instance &) const OVERRIDE
|
|
2069 {
|
|
2070 return CP_WRITE_FFR;
|
|
2071 }
|
|
2072
|
|
2073 rtx
|
|
2074 expand (function_expander &e) const OVERRIDE
|
|
2075 {
|
|
2076 e.args.quick_push (CONSTM1_RTX (VNx16BImode));
|
|
2077 return e.use_exact_insn (CODE_FOR_aarch64_wrffr);
|
|
2078 }
|
|
2079 };
|
|
2080
|
|
2081 class svst1_impl : public full_width_access
|
|
2082 {
|
|
2083 public:
|
|
2084 unsigned int
|
|
2085 call_properties (const function_instance &) const OVERRIDE
|
|
2086 {
|
|
2087 return CP_WRITE_MEMORY;
|
|
2088 }
|
|
2089
|
|
2090 gimple *
|
|
2091 fold (gimple_folder &f) const OVERRIDE
|
|
2092 {
|
|
2093 tree vectype = f.vector_type (0);
|
|
2094
|
|
2095 /* Get the predicate and base pointer. */
|
|
2096 gimple_seq stmts = NULL;
|
|
2097 tree pred = f.convert_pred (stmts, vectype, 0);
|
|
2098 tree base = f.fold_contiguous_base (stmts, vectype);
|
|
2099 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
|
|
2100
|
|
2101 tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
|
|
2102 tree rhs = gimple_call_arg (f.call, gimple_call_num_args (f.call) - 1);
|
|
2103 return gimple_build_call_internal (IFN_MASK_STORE, 4,
|
|
2104 base, cookie, pred, rhs);
|
|
2105 }
|
|
2106
|
|
2107 rtx
|
|
2108 expand (function_expander &e) const OVERRIDE
|
|
2109 {
|
|
2110 insn_code icode = convert_optab_handler (maskstore_optab,
|
|
2111 e.vector_mode (0), e.gp_mode (0));
|
|
2112 return e.use_contiguous_store_insn (icode);
|
|
2113 }
|
|
2114 };
|
|
2115
|
|
2116 class svst1_scatter_impl : public full_width_access
|
|
2117 {
|
|
2118 public:
|
|
2119 unsigned int
|
|
2120 call_properties (const function_instance &) const OVERRIDE
|
|
2121 {
|
|
2122 return CP_WRITE_MEMORY;
|
|
2123 }
|
|
2124
|
|
2125 rtx
|
|
2126 expand (function_expander &e) const OVERRIDE
|
|
2127 {
|
|
2128 e.prepare_gather_address_operands (1);
|
|
2129 /* Put the predicate last, as required by mask_scatter_store_optab. */
|
|
2130 e.rotate_inputs_left (0, 6);
|
|
2131 machine_mode mem_mode = e.memory_vector_mode ();
|
|
2132 machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
|
|
2133 insn_code icode = convert_optab_handler (mask_scatter_store_optab,
|
|
2134 mem_mode, int_mode);
|
|
2135 return e.use_exact_insn (icode);
|
|
2136 }
|
|
2137 };
|
|
2138
|
|
2139 /* Implements truncating forms of svst1_scatter. */
|
|
2140 class svst1_scatter_truncate_impl : public truncating_store
|
|
2141 {
|
|
2142 public:
|
|
2143 CONSTEXPR svst1_scatter_truncate_impl (scalar_int_mode to_mode)
|
|
2144 : truncating_store (to_mode) {}
|
|
2145
|
|
2146 rtx
|
|
2147 expand (function_expander &e) const OVERRIDE
|
|
2148 {
|
|
2149 e.prepare_gather_address_operands (1);
|
|
2150 /* Put the predicate last, since the truncating scatters use the same
|
|
2151 operand order as mask_scatter_store_optab. */
|
|
2152 e.rotate_inputs_left (0, 6);
|
|
2153 insn_code icode = code_for_aarch64_scatter_store_trunc
|
|
2154 (e.memory_vector_mode (), e.vector_mode (0));
|
|
2155 return e.use_exact_insn (icode);
|
|
2156 }
|
|
2157 };
|
|
2158
|
|
2159 /* Implements truncating contiguous forms of svst1. */
|
|
2160 class svst1_truncate_impl : public truncating_store
|
|
2161 {
|
|
2162 public:
|
|
2163 CONSTEXPR svst1_truncate_impl (scalar_int_mode to_mode)
|
|
2164 : truncating_store (to_mode) {}
|
|
2165
|
|
2166 rtx
|
|
2167 expand (function_expander &e) const OVERRIDE
|
|
2168 {
|
|
2169 insn_code icode = code_for_aarch64_store_trunc (e.memory_vector_mode (),
|
|
2170 e.vector_mode (0));
|
|
2171 return e.use_contiguous_store_insn (icode);
|
|
2172 }
|
|
2173 };
|
|
2174
|
|
2175 /* Implements svst2, svst3 and svst4. */
|
|
2176 class svst234_impl : public full_width_access
|
|
2177 {
|
|
2178 public:
|
|
2179 CONSTEXPR svst234_impl (unsigned int vectors_per_tuple)
|
|
2180 : full_width_access (vectors_per_tuple) {}
|
|
2181
|
|
2182 unsigned int
|
|
2183 call_properties (const function_instance &) const OVERRIDE
|
|
2184 {
|
|
2185 return CP_WRITE_MEMORY;
|
|
2186 }
|
|
2187
|
|
2188 gimple *
|
|
2189 fold (gimple_folder &f) const OVERRIDE
|
|
2190 {
|
|
2191 tree vectype = f.vector_type (0);
|
|
2192
|
|
2193 /* Get the predicate and base pointer. */
|
|
2194 gimple_seq stmts = NULL;
|
|
2195 tree pred = f.convert_pred (stmts, vectype, 0);
|
|
2196 tree base = f.fold_contiguous_base (stmts, vectype);
|
|
2197 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
|
|
2198
|
|
2199 /* View the stored data as an array of vectors. */
|
|
2200 unsigned int num_args = gimple_call_num_args (f.call);
|
|
2201 tree rhs_tuple = gimple_call_arg (f.call, num_args - 1);
|
|
2202 tree field = tuple_type_field (TREE_TYPE (rhs_tuple));
|
|
2203 tree rhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field), rhs_tuple);
|
|
2204
|
|
2205 tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
|
|
2206 return gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
|
|
2207 base, cookie, pred, rhs_array);
|
|
2208 }
|
|
2209
|
|
2210 rtx
|
|
2211 expand (function_expander &e) const OVERRIDE
|
|
2212 {
|
|
2213 machine_mode tuple_mode = GET_MODE (e.args.last ());
|
|
2214 insn_code icode = convert_optab_handler (vec_mask_store_lanes_optab,
|
|
2215 tuple_mode, e.vector_mode (0));
|
|
2216 return e.use_contiguous_store_insn (icode);
|
|
2217 }
|
|
2218 };
|
|
2219
|
|
2220 class svstnt1_impl : public full_width_access
|
|
2221 {
|
|
2222 public:
|
|
2223 unsigned int
|
|
2224 call_properties (const function_instance &) const OVERRIDE
|
|
2225 {
|
|
2226 return CP_WRITE_MEMORY;
|
|
2227 }
|
|
2228
|
|
2229 rtx
|
|
2230 expand (function_expander &e) const OVERRIDE
|
|
2231 {
|
|
2232 insn_code icode = code_for_aarch64_stnt1 (e.vector_mode (0));
|
|
2233 return e.use_contiguous_store_insn (icode);
|
|
2234 }
|
|
2235 };
|
|
2236
|
|
2237 class svsub_impl : public rtx_code_function
|
|
2238 {
|
|
2239 public:
|
|
2240 CONSTEXPR svsub_impl ()
|
|
2241 : rtx_code_function (MINUS, MINUS, UNSPEC_COND_FSUB) {}
|
|
2242
|
|
2243 rtx
|
|
2244 expand (function_expander &e) const OVERRIDE
|
|
2245 {
|
|
2246 /* Canonicalize subtractions of constants to additions. */
|
|
2247 machine_mode mode = e.vector_mode (0);
|
|
2248 if (e.try_negating_argument (2, mode))
|
|
2249 return e.map_to_rtx_codes (PLUS, PLUS, UNSPEC_COND_FADD);
|
|
2250
|
|
2251 return rtx_code_function::expand (e);
|
|
2252 }
|
|
2253 };
|
|
2254
|
|
2255 class svtbl_impl : public permute
|
|
2256 {
|
|
2257 public:
|
|
2258 rtx
|
|
2259 expand (function_expander &e) const OVERRIDE
|
|
2260 {
|
|
2261 return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0)));
|
|
2262 }
|
|
2263 };
|
|
2264
|
|
2265 /* Implements svtrn1 and svtrn2. */
|
|
2266 class svtrn_impl : public binary_permute
|
|
2267 {
|
|
2268 public:
|
|
2269 CONSTEXPR svtrn_impl (int base)
|
|
2270 : binary_permute (base ? UNSPEC_TRN2 : UNSPEC_TRN1), m_base (base) {}
|
|
2271
|
|
2272 gimple *
|
|
2273 fold (gimple_folder &f) const OVERRIDE
|
|
2274 {
|
|
2275 /* svtrn1: { 0, nelts, 2, nelts + 2, 4, nelts + 4, ... }
|
|
2276 svtrn2: as for svtrn1, but with 1 added to each index. */
|
|
2277 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
|
|
2278 vec_perm_builder builder (nelts, 2, 3);
|
|
2279 for (unsigned int i = 0; i < 3; ++i)
|
|
2280 {
|
|
2281 builder.quick_push (m_base + i * 2);
|
|
2282 builder.quick_push (m_base + i * 2 + nelts);
|
|
2283 }
|
|
2284 return fold_permute (f, builder);
|
|
2285 }
|
|
2286
|
|
2287 /* 0 for svtrn1, 1 for svtrn2. */
|
|
2288 unsigned int m_base;
|
|
2289 };
|
|
2290
|
|
2291 /* Base class for svundef{,2,3,4}. */
|
|
2292 class svundef_impl : public quiet<multi_vector_function>
|
|
2293 {
|
|
2294 public:
|
|
2295 CONSTEXPR svundef_impl (unsigned int vectors_per_tuple)
|
|
2296 : quiet<multi_vector_function> (vectors_per_tuple) {}
|
|
2297
|
|
2298 gimple *
|
|
2299 fold (gimple_folder &f) const OVERRIDE
|
|
2300 {
|
|
2301 /* Don't fold svundef at the gimple level. There's no exact
|
|
2302 correspondence for SSA_NAMEs, and we explicitly don't want
|
|
2303 to generate a specific value (like an all-zeros vector). */
|
|
2304 if (vectors_per_tuple () == 1)
|
|
2305 return NULL;
|
|
2306 return gimple_build_assign (f.lhs, build_clobber (TREE_TYPE (f.lhs)));
|
|
2307 }
|
|
2308
|
|
2309 rtx
|
|
2310 expand (function_expander &e) const OVERRIDE
|
|
2311 {
|
|
2312 rtx target = e.get_reg_target ();
|
|
2313 emit_clobber (copy_rtx (target));
|
|
2314 return target;
|
|
2315 }
|
|
2316 };
|
|
2317
|
|
2318 /* Implements svunpklo and svunpkhi. */
|
|
2319 class svunpk_impl : public quiet<function_base>
|
|
2320 {
|
|
2321 public:
|
|
2322 CONSTEXPR svunpk_impl (bool high_p) : m_high_p (high_p) {}
|
|
2323
|
|
2324 gimple *
|
|
2325 fold (gimple_folder &f) const OVERRIDE
|
|
2326 {
|
|
2327 /* Don't fold the predicate ops, since every bit of the svbool_t
|
|
2328 result is significant. */
|
|
2329 if (f.type_suffix_ids[0] == TYPE_SUFFIX_b)
|
|
2330 return NULL;
|
|
2331
|
|
2332 /* The first half in memory is VEC_UNPACK_LO_EXPR for little-endian
|
|
2333 and VEC_UNPACK_HI_EXPR for big-endian. */
|
|
2334 bool high_p = BYTES_BIG_ENDIAN ? !m_high_p : m_high_p;
|
|
2335 tree_code code = high_p ? VEC_UNPACK_HI_EXPR : VEC_UNPACK_LO_EXPR;
|
|
2336 return gimple_build_assign (f.lhs, code, gimple_call_arg (f.call, 0));
|
|
2337 }
|
|
2338
|
|
2339 rtx
|
|
2340 expand (function_expander &e) const OVERRIDE
|
|
2341 {
|
|
2342 machine_mode mode = GET_MODE (e.args[0]);
|
|
2343 unsigned int unpacku = m_high_p ? UNSPEC_UNPACKUHI : UNSPEC_UNPACKULO;
|
|
2344 unsigned int unpacks = m_high_p ? UNSPEC_UNPACKSHI : UNSPEC_UNPACKSLO;
|
|
2345 insn_code icode;
|
|
2346 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
|
|
2347 icode = code_for_aarch64_sve_punpk (unpacku, mode);
|
|
2348 else
|
|
2349 {
|
|
2350 int unspec = e.type_suffix (0).unsigned_p ? unpacku : unpacks;
|
|
2351 icode = code_for_aarch64_sve_unpk (unspec, unspec, mode);
|
|
2352 }
|
|
2353 return e.use_exact_insn (icode);
|
|
2354 }
|
|
2355
|
|
2356 /* True for svunpkhi, false for svunpklo. */
|
|
2357 bool m_high_p;
|
|
2358 };
|
|
2359
|
|
2360 /* Also implements svsudot. */
|
|
2361 class svusdot_impl : public function_base
|
|
2362 {
|
|
2363 public:
|
|
2364 CONSTEXPR svusdot_impl (bool su) : m_su (su) {}
|
|
2365
|
|
2366 rtx
|
|
2367 expand (function_expander &e) const OVERRIDE
|
|
2368 {
|
|
2369 /* The implementation of the ACLE function svsudot (for the non-lane
|
|
2370 version) is through the USDOT instruction but with the second and third
|
|
2371 inputs swapped. */
|
|
2372 if (m_su)
|
|
2373 e.rotate_inputs_left (1, 2);
|
|
2374 /* The ACLE function has the same order requirements as for svdot.
|
|
2375 While there's no requirement for the RTL pattern to have the same sort
|
|
2376 of order as that for <sur>dot_prod, it's easier to read.
|
|
2377 Hence we do the same rotation on arguments as svdot_impl does. */
|
|
2378 e.rotate_inputs_left (0, 3);
|
|
2379 machine_mode mode = e.vector_mode (0);
|
|
2380 insn_code icode = code_for_aarch64_dot_prod (UNSPEC_USDOT, mode);
|
|
2381 return e.use_exact_insn (icode);
|
|
2382 }
|
|
2383
|
|
2384 private:
|
|
2385 bool m_su;
|
|
2386 };
|
|
2387
|
|
2388 /* Implements svuzp1 and svuzp2. */
|
|
2389 class svuzp_impl : public binary_permute
|
|
2390 {
|
|
2391 public:
|
|
2392 CONSTEXPR svuzp_impl (unsigned int base)
|
|
2393 : binary_permute (base ? UNSPEC_UZP2 : UNSPEC_UZP1), m_base (base) {}
|
|
2394
|
|
2395 gimple *
|
|
2396 fold (gimple_folder &f) const OVERRIDE
|
|
2397 {
|
|
2398 /* svuzp1: { 0, 2, 4, 6, ... }
|
|
2399 svuzp2: { 1, 3, 5, 7, ... }. */
|
|
2400 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
|
|
2401 vec_perm_builder builder (nelts, 1, 3);
|
|
2402 for (unsigned int i = 0; i < 3; ++i)
|
|
2403 builder.quick_push (m_base + i * 2);
|
|
2404 return fold_permute (f, builder);
|
|
2405 }
|
|
2406
|
|
2407 /* 0 for svuzp1, 1 for svuzp2. */
|
|
2408 unsigned int m_base;
|
|
2409 };
|
|
2410
|
|
2411 /* A function_base for svwhilele and svwhilelt functions. */
|
|
2412 class svwhilelx_impl : public while_comparison
|
|
2413 {
|
|
2414 public:
|
|
2415 CONSTEXPR svwhilelx_impl (int unspec_for_sint, int unspec_for_uint, bool eq_p)
|
|
2416 : while_comparison (unspec_for_sint, unspec_for_uint), m_eq_p (eq_p)
|
|
2417 {}
|
|
2418
|
|
2419 /* Try to fold a call by treating its arguments as constants of type T. */
|
|
2420 template<typename T>
|
|
2421 gimple *
|
|
2422 fold_type (gimple_folder &f) const
|
|
2423 {
|
|
2424 /* Only handle cases in which both operands are constant. */
|
|
2425 T arg0, arg1;
|
|
2426 if (!poly_int_tree_p (gimple_call_arg (f.call, 0), &arg0)
|
|
2427 || !poly_int_tree_p (gimple_call_arg (f.call, 1), &arg1))
|
|
2428 return NULL;
|
|
2429
|
|
2430 /* Check whether the result is known to be all-false. */
|
|
2431 if (m_eq_p ? known_gt (arg0, arg1) : known_ge (arg0, arg1))
|
|
2432 return f.fold_to_pfalse ();
|
|
2433
|
|
2434 /* Punt if we can't tell at compile time whether the result
|
|
2435 is all-false. */
|
|
2436 if (m_eq_p ? maybe_gt (arg0, arg1) : maybe_ge (arg0, arg1))
|
|
2437 return NULL;
|
|
2438
|
|
2439 /* At this point we know the result has at least one set element. */
|
|
2440 poly_uint64 diff = arg1 - arg0;
|
|
2441 poly_uint64 nelts = GET_MODE_NUNITS (f.vector_mode (0));
|
|
2442
|
|
2443 /* Canonicalize the svwhilele form to the svwhilelt form. Subtract
|
|
2444 from NELTS rather than adding to DIFF, to prevent overflow. */
|
|
2445 if (m_eq_p)
|
|
2446 nelts -= 1;
|
|
2447
|
|
2448 /* Check whether the result is known to be all-true. */
|
|
2449 if (known_ge (diff, nelts))
|
|
2450 return f.fold_to_ptrue ();
|
|
2451
|
|
2452 /* Punt if DIFF might not be the actual number of set elements
|
|
2453 in the result. Conditional equality is fine. */
|
|
2454 if (maybe_gt (diff, nelts))
|
|
2455 return NULL;
|
|
2456
|
|
2457 /* At this point we know that the predicate will have DIFF set elements
|
|
2458 for svwhilelt and DIFF + 1 set elements for svwhilele (which stops
|
|
2459 after rather than before ARG1 is reached). See if we can create
|
|
2460 the predicate at compile time. */
|
|
2461 unsigned HOST_WIDE_INT vl;
|
|
2462 if (diff.is_constant (&vl))
|
|
2463 /* Overflow is no longer possible after the checks above. */
|
|
2464 return f.fold_to_vl_pred (m_eq_p ? vl + 1 : vl);
|
|
2465
|
|
2466 return NULL;
|
|
2467 }
|
|
2468
|
|
2469 gimple *
|
|
2470 fold (gimple_folder &f) const OVERRIDE
|
|
2471 {
|
|
2472 if (f.type_suffix (1).unsigned_p)
|
|
2473 return fold_type<poly_uint64> (f);
|
|
2474 else
|
|
2475 return fold_type<poly_int64> (f);
|
|
2476 }
|
|
2477
|
|
2478 /* True svwhilele, false for svwhilelt. */
|
|
2479 bool m_eq_p;
|
|
2480 };
|
|
2481
|
|
2482 class svwrffr_impl : public function_base
|
|
2483 {
|
|
2484 public:
|
|
2485 unsigned int
|
|
2486 call_properties (const function_instance &) const OVERRIDE
|
|
2487 {
|
|
2488 return CP_WRITE_FFR;
|
|
2489 }
|
|
2490
|
|
2491 rtx
|
|
2492 expand (function_expander &e) const OVERRIDE
|
|
2493 {
|
|
2494 return e.use_exact_insn (CODE_FOR_aarch64_wrffr);
|
|
2495 }
|
|
2496 };
|
|
2497
|
|
2498 /* Implements svzip1 and svzip2. */
|
|
2499 class svzip_impl : public binary_permute
|
|
2500 {
|
|
2501 public:
|
|
2502 CONSTEXPR svzip_impl (unsigned int base)
|
|
2503 : binary_permute (base ? UNSPEC_ZIP2 : UNSPEC_ZIP1), m_base (base) {}
|
|
2504
|
|
2505 gimple *
|
|
2506 fold (gimple_folder &f) const OVERRIDE
|
|
2507 {
|
|
2508 /* svzip1: { 0, nelts, 1, nelts + 1, 2, nelts + 2, ... }
|
|
2509 svzip2: as for svzip1, but with nelts / 2 added to each index. */
|
|
2510 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
|
|
2511 poly_uint64 base = m_base * exact_div (nelts, 2);
|
|
2512 vec_perm_builder builder (nelts, 2, 3);
|
|
2513 for (unsigned int i = 0; i < 3; ++i)
|
|
2514 {
|
|
2515 builder.quick_push (base + i);
|
|
2516 builder.quick_push (base + i + nelts);
|
|
2517 }
|
|
2518 return fold_permute (f, builder);
|
|
2519 }
|
|
2520
|
|
2521 /* 0 for svzip1, 1 for svzip2. */
|
|
2522 unsigned int m_base;
|
|
2523 };
|
|
2524
|
|
2525 } /* end anonymous namespace */
|
|
2526
|
|
2527 namespace aarch64_sve {
|
|
2528
|
|
2529 FUNCTION (svabd, svabd_impl,)
|
|
2530 FUNCTION (svabs, quiet<rtx_code_function>, (ABS, ABS, UNSPEC_COND_FABS))
|
|
2531 FUNCTION (svacge, svac_impl, (UNSPEC_COND_FCMGE))
|
|
2532 FUNCTION (svacgt, svac_impl, (UNSPEC_COND_FCMGT))
|
|
2533 FUNCTION (svacle, svac_impl, (UNSPEC_COND_FCMLE))
|
|
2534 FUNCTION (svaclt, svac_impl, (UNSPEC_COND_FCMLT))
|
|
2535 FUNCTION (svadd, rtx_code_function, (PLUS, PLUS, UNSPEC_COND_FADD))
|
|
2536 FUNCTION (svadda, svadda_impl,)
|
|
2537 FUNCTION (svaddv, reduction, (UNSPEC_SADDV, UNSPEC_UADDV, UNSPEC_FADDV))
|
|
2538 FUNCTION (svadrb, svadr_bhwd_impl, (0))
|
|
2539 FUNCTION (svadrd, svadr_bhwd_impl, (3))
|
|
2540 FUNCTION (svadrh, svadr_bhwd_impl, (1))
|
|
2541 FUNCTION (svadrw, svadr_bhwd_impl, (2))
|
|
2542 FUNCTION (svand, rtx_code_function, (AND, AND))
|
|
2543 FUNCTION (svandv, reduction, (UNSPEC_ANDV))
|
|
2544 FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT))
|
|
2545 FUNCTION (svasr_wide, shift_wide, (ASHIFTRT, UNSPEC_ASHIFTRT_WIDE))
|
|
2546 FUNCTION (svasrd, unspec_based_function, (UNSPEC_ASRD, -1, -1))
|
|
2547 FUNCTION (svbfdot, fixed_insn_function, (CODE_FOR_aarch64_sve_bfdotvnx4sf))
|
|
2548 FUNCTION (svbfdot_lane, fixed_insn_function,
|
|
2549 (CODE_FOR_aarch64_sve_bfdot_lanevnx4sf))
|
|
2550 FUNCTION (svbfmlalb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlalbvnx4sf))
|
|
2551 FUNCTION (svbfmlalb_lane, fixed_insn_function,
|
|
2552 (CODE_FOR_aarch64_sve_bfmlalb_lanevnx4sf))
|
|
2553 FUNCTION (svbfmlalt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlaltvnx4sf))
|
|
2554 FUNCTION (svbfmlalt_lane, fixed_insn_function,
|
|
2555 (CODE_FOR_aarch64_sve_bfmlalt_lanevnx4sf))
|
|
2556 FUNCTION (svbfmmla, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmmlavnx4sf))
|
|
2557 FUNCTION (svbic, svbic_impl,)
|
|
2558 FUNCTION (svbrka, svbrk_unary_impl, (UNSPEC_BRKA))
|
|
2559 FUNCTION (svbrkb, svbrk_unary_impl, (UNSPEC_BRKB))
|
|
2560 FUNCTION (svbrkn, svbrk_binary_impl, (UNSPEC_BRKN))
|
|
2561 FUNCTION (svbrkpa, svbrk_binary_impl, (UNSPEC_BRKPA))
|
|
2562 FUNCTION (svbrkpb, svbrk_binary_impl, (UNSPEC_BRKPB))
|
|
2563 FUNCTION (svcadd, svcadd_impl,)
|
|
2564 FUNCTION (svclasta, svclast_impl, (UNSPEC_CLASTA))
|
|
2565 FUNCTION (svclastb, svclast_impl, (UNSPEC_CLASTB))
|
|
2566 FUNCTION (svcls, unary_count, (CLRSB))
|
|
2567 FUNCTION (svclz, unary_count, (CLZ))
|
|
2568 FUNCTION (svcmla, svcmla_impl,)
|
|
2569 FUNCTION (svcmla_lane, svcmla_lane_impl,)
|
|
2570 FUNCTION (svcmpeq, svcmp_impl, (EQ_EXPR, UNSPEC_COND_FCMEQ))
|
|
2571 FUNCTION (svcmpeq_wide, svcmp_wide_impl, (EQ_EXPR, UNSPEC_COND_CMPEQ_WIDE,
|
|
2572 UNSPEC_COND_CMPEQ_WIDE))
|
|
2573 FUNCTION (svcmpge, svcmp_impl, (GE_EXPR, UNSPEC_COND_FCMGE))
|
|
2574 FUNCTION (svcmpge_wide, svcmp_wide_impl, (GE_EXPR, UNSPEC_COND_CMPGE_WIDE,
|
|
2575 UNSPEC_COND_CMPHS_WIDE))
|
|
2576 FUNCTION (svcmpgt, svcmp_impl, (GT_EXPR, UNSPEC_COND_FCMGT))
|
|
2577 FUNCTION (svcmpgt_wide, svcmp_wide_impl, (GT_EXPR, UNSPEC_COND_CMPGT_WIDE,
|
|
2578 UNSPEC_COND_CMPHI_WIDE))
|
|
2579 FUNCTION (svcmple, svcmp_impl, (LE_EXPR, UNSPEC_COND_FCMLE))
|
|
2580 FUNCTION (svcmple_wide, svcmp_wide_impl, (LE_EXPR, UNSPEC_COND_CMPLE_WIDE,
|
|
2581 UNSPEC_COND_CMPLS_WIDE))
|
|
2582 FUNCTION (svcmplt, svcmp_impl, (LT_EXPR, UNSPEC_COND_FCMLT))
|
|
2583 FUNCTION (svcmplt_wide, svcmp_wide_impl, (LT_EXPR, UNSPEC_COND_CMPLT_WIDE,
|
|
2584 UNSPEC_COND_CMPLO_WIDE))
|
|
2585 FUNCTION (svcmpne, svcmp_impl, (NE_EXPR, UNSPEC_COND_FCMNE))
|
|
2586 FUNCTION (svcmpne_wide, svcmp_wide_impl, (NE_EXPR, UNSPEC_COND_CMPNE_WIDE,
|
|
2587 UNSPEC_COND_CMPNE_WIDE))
|
|
2588 FUNCTION (svcmpuo, svcmpuo_impl,)
|
|
2589 FUNCTION (svcnot, svcnot_impl,)
|
|
2590 FUNCTION (svcnt, unary_count, (POPCOUNT))
|
|
2591 FUNCTION (svcntb, svcnt_bhwd_impl, (VNx16QImode))
|
|
2592 FUNCTION (svcntb_pat, svcnt_bhwd_pat_impl, (VNx16QImode))
|
|
2593 FUNCTION (svcntd, svcnt_bhwd_impl, (VNx2DImode))
|
|
2594 FUNCTION (svcntd_pat, svcnt_bhwd_pat_impl, (VNx2DImode))
|
|
2595 FUNCTION (svcnth, svcnt_bhwd_impl, (VNx8HImode))
|
|
2596 FUNCTION (svcnth_pat, svcnt_bhwd_pat_impl, (VNx8HImode))
|
|
2597 FUNCTION (svcntp, svcntp_impl,)
|
|
2598 FUNCTION (svcntw, svcnt_bhwd_impl, (VNx4SImode))
|
|
2599 FUNCTION (svcntw_pat, svcnt_bhwd_pat_impl, (VNx4SImode))
|
|
2600 FUNCTION (svcompact, QUIET_CODE_FOR_MODE0 (aarch64_sve_compact),)
|
|
2601 FUNCTION (svcreate2, svcreate_impl, (2))
|
|
2602 FUNCTION (svcreate3, svcreate_impl, (3))
|
|
2603 FUNCTION (svcreate4, svcreate_impl, (4))
|
|
2604 FUNCTION (svcvt, svcvt_impl,)
|
|
2605 FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),)
|
|
2606 FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV))
|
|
2607 FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
|
|
2608 FUNCTION (svdot, svdot_impl,)
|
|
2609 FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, -1))
|
|
2610 FUNCTION (svdup, svdup_impl,)
|
|
2611 FUNCTION (svdup_lane, svdup_lane_impl,)
|
|
2612 FUNCTION (svdupq, svdupq_impl,)
|
|
2613 FUNCTION (svdupq_lane, svdupq_lane_impl,)
|
|
2614 FUNCTION (sveor, rtx_code_function, (XOR, XOR, -1))
|
|
2615 FUNCTION (sveorv, reduction, (UNSPEC_XORV))
|
|
2616 FUNCTION (svexpa, unspec_based_function, (-1, -1, UNSPEC_FEXPA))
|
|
2617 FUNCTION (svext, QUIET_CODE_FOR_MODE0 (aarch64_sve_ext),)
|
|
2618 FUNCTION (svextb, svext_bhw_impl, (QImode))
|
|
2619 FUNCTION (svexth, svext_bhw_impl, (HImode))
|
|
2620 FUNCTION (svextw, svext_bhw_impl, (SImode))
|
|
2621 FUNCTION (svget2, svget_impl, (2))
|
|
2622 FUNCTION (svget3, svget_impl, (3))
|
|
2623 FUNCTION (svget4, svget_impl, (4))
|
|
2624 FUNCTION (svindex, svindex_impl,)
|
|
2625 FUNCTION (svinsr, svinsr_impl,)
|
|
2626 FUNCTION (svlasta, svlast_impl, (UNSPEC_LASTA))
|
|
2627 FUNCTION (svlastb, svlast_impl, (UNSPEC_LASTB))
|
|
2628 FUNCTION (svld1, svld1_impl,)
|
|
2629 FUNCTION (svld1_gather, svld1_gather_impl,)
|
|
2630 FUNCTION (svld1ro, svld1ro_impl,)
|
|
2631 FUNCTION (svld1rq, svld1rq_impl,)
|
|
2632 FUNCTION (svld1sb, svld1_extend_impl, (TYPE_SUFFIX_s8))
|
|
2633 FUNCTION (svld1sb_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s8))
|
|
2634 FUNCTION (svld1sh, svld1_extend_impl, (TYPE_SUFFIX_s16))
|
|
2635 FUNCTION (svld1sh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s16))
|
|
2636 FUNCTION (svld1sw, svld1_extend_impl, (TYPE_SUFFIX_s32))
|
|
2637 FUNCTION (svld1sw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s32))
|
|
2638 FUNCTION (svld1ub, svld1_extend_impl, (TYPE_SUFFIX_u8))
|
|
2639 FUNCTION (svld1ub_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u8))
|
|
2640 FUNCTION (svld1uh, svld1_extend_impl, (TYPE_SUFFIX_u16))
|
|
2641 FUNCTION (svld1uh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u16))
|
|
2642 FUNCTION (svld1uw, svld1_extend_impl, (TYPE_SUFFIX_u32))
|
|
2643 FUNCTION (svld1uw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u32))
|
|
2644 FUNCTION (svld2, svld234_impl, (2))
|
|
2645 FUNCTION (svld3, svld234_impl, (3))
|
|
2646 FUNCTION (svld4, svld234_impl, (4))
|
|
2647 FUNCTION (svldff1, svldxf1_impl, (UNSPEC_LDFF1))
|
|
2648 FUNCTION (svldff1_gather, svldff1_gather_impl,)
|
|
2649 FUNCTION (svldff1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDFF1))
|
|
2650 FUNCTION (svldff1sb_gather, svldff1_gather_extend, (TYPE_SUFFIX_s8))
|
|
2651 FUNCTION (svldff1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDFF1))
|
|
2652 FUNCTION (svldff1sh_gather, svldff1_gather_extend, (TYPE_SUFFIX_s16))
|
|
2653 FUNCTION (svldff1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDFF1))
|
|
2654 FUNCTION (svldff1sw_gather, svldff1_gather_extend, (TYPE_SUFFIX_s32))
|
|
2655 FUNCTION (svldff1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDFF1))
|
|
2656 FUNCTION (svldff1ub_gather, svldff1_gather_extend, (TYPE_SUFFIX_u8))
|
|
2657 FUNCTION (svldff1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDFF1))
|
|
2658 FUNCTION (svldff1uh_gather, svldff1_gather_extend, (TYPE_SUFFIX_u16))
|
|
2659 FUNCTION (svldff1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDFF1))
|
|
2660 FUNCTION (svldff1uw_gather, svldff1_gather_extend, (TYPE_SUFFIX_u32))
|
|
2661 FUNCTION (svldnf1, svldxf1_impl, (UNSPEC_LDNF1))
|
|
2662 FUNCTION (svldnf1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDNF1))
|
|
2663 FUNCTION (svldnf1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDNF1))
|
|
2664 FUNCTION (svldnf1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDNF1))
|
|
2665 FUNCTION (svldnf1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDNF1))
|
|
2666 FUNCTION (svldnf1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDNF1))
|
|
2667 FUNCTION (svldnf1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDNF1))
|
|
2668 FUNCTION (svldnt1, svldnt1_impl,)
|
|
2669 FUNCTION (svlen, svlen_impl,)
|
|
2670 FUNCTION (svlsl, rtx_code_function, (ASHIFT, ASHIFT))
|
|
2671 FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE))
|
|
2672 FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT))
|
|
2673 FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE))
|
|
2674 FUNCTION (svmad, svmad_impl,)
|
|
2675 FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX))
|
|
2676 FUNCTION (svmaxnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMAXNM))
|
|
2677 FUNCTION (svmaxnmv, reduction, (UNSPEC_FMAXNMV))
|
|
2678 FUNCTION (svmaxv, reduction, (UNSPEC_SMAXV, UNSPEC_UMAXV, UNSPEC_FMAXV))
|
|
2679 FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN))
|
|
2680 FUNCTION (svminnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMINNM))
|
|
2681 FUNCTION (svminnmv, reduction, (UNSPEC_FMINNMV))
|
|
2682 FUNCTION (svminv, reduction, (UNSPEC_SMINV, UNSPEC_UMINV, UNSPEC_FMINV))
|
|
2683 FUNCTION (svmla, svmla_impl,)
|
|
2684 FUNCTION (svmla_lane, svmla_lane_impl,)
|
|
2685 FUNCTION (svmls, svmls_impl,)
|
|
2686 FUNCTION (svmls_lane, svmls_lane_impl,)
|
|
2687 FUNCTION (svmmla, svmmla_impl,)
|
|
2688 FUNCTION (svmov, svmov_impl,)
|
|
2689 FUNCTION (svmsb, svmsb_impl,)
|
|
2690 FUNCTION (svmul, rtx_code_function, (MULT, MULT, UNSPEC_COND_FMUL))
|
|
2691 FUNCTION (svmul_lane, CODE_FOR_MODE0 (aarch64_mul_lane),)
|
|
2692 FUNCTION (svmulh, unspec_based_function, (UNSPEC_SMUL_HIGHPART,
|
|
2693 UNSPEC_UMUL_HIGHPART, -1))
|
|
2694 FUNCTION (svmulx, unspec_based_function, (-1, -1, UNSPEC_COND_FMULX))
|
|
2695 FUNCTION (svnand, svnand_impl,)
|
|
2696 FUNCTION (svneg, quiet<rtx_code_function>, (NEG, NEG, UNSPEC_COND_FNEG))
|
|
2697 FUNCTION (svnmad, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLA))
|
|
2698 FUNCTION (svnmla, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLA))
|
|
2699 FUNCTION (svnmls, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLS))
|
|
2700 FUNCTION (svnmsb, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLS))
|
|
2701 FUNCTION (svnor, svnor_impl,)
|
|
2702 FUNCTION (svnot, svnot_impl,)
|
|
2703 FUNCTION (svorn, svorn_impl,)
|
|
2704 FUNCTION (svorr, rtx_code_function, (IOR, IOR))
|
|
2705 FUNCTION (svorv, reduction, (UNSPEC_IORV))
|
|
2706 FUNCTION (svpfalse, svpfalse_impl,)
|
|
2707 FUNCTION (svpfirst, svpfirst_svpnext_impl, (UNSPEC_PFIRST))
|
|
2708 FUNCTION (svpnext, svpfirst_svpnext_impl, (UNSPEC_PNEXT))
|
|
2709 FUNCTION (svprfb, svprf_bhwd_impl, (VNx16QImode))
|
|
2710 FUNCTION (svprfb_gather, svprf_bhwd_gather_impl, (VNx16QImode))
|
|
2711 FUNCTION (svprfd, svprf_bhwd_impl, (VNx2DImode))
|
|
2712 FUNCTION (svprfd_gather, svprf_bhwd_gather_impl, (VNx2DImode))
|
|
2713 FUNCTION (svprfh, svprf_bhwd_impl, (VNx8HImode))
|
|
2714 FUNCTION (svprfh_gather, svprf_bhwd_gather_impl, (VNx8HImode))
|
|
2715 FUNCTION (svprfw, svprf_bhwd_impl, (VNx4SImode))
|
|
2716 FUNCTION (svprfw_gather, svprf_bhwd_gather_impl, (VNx4SImode))
|
|
2717 FUNCTION (svptest_any, svptest_impl, (NE))
|
|
2718 FUNCTION (svptest_first, svptest_impl, (LT))
|
|
2719 FUNCTION (svptest_last, svptest_impl, (LTU))
|
|
2720 FUNCTION (svptrue, svptrue_impl,)
|
|
2721 FUNCTION (svptrue_pat, svptrue_pat_impl,)
|
|
2722 FUNCTION (svqadd, rtx_code_function, (SS_PLUS, US_PLUS, -1))
|
|
2723 FUNCTION (svqdecb, svqdec_bhwd_impl, (QImode))
|
|
2724 FUNCTION (svqdecb_pat, svqdec_bhwd_impl, (QImode))
|
|
2725 FUNCTION (svqdecd, svqdec_bhwd_impl, (DImode))
|
|
2726 FUNCTION (svqdecd_pat, svqdec_bhwd_impl, (DImode))
|
|
2727 FUNCTION (svqdech, svqdec_bhwd_impl, (HImode))
|
|
2728 FUNCTION (svqdech_pat, svqdec_bhwd_impl, (HImode))
|
|
2729 FUNCTION (svqdecp, svqdecp_svqincp_impl, (SS_MINUS, US_MINUS))
|
|
2730 FUNCTION (svqdecw, svqdec_bhwd_impl, (SImode))
|
|
2731 FUNCTION (svqdecw_pat, svqdec_bhwd_impl, (SImode))
|
|
2732 FUNCTION (svqincb, svqinc_bhwd_impl, (QImode))
|
|
2733 FUNCTION (svqincb_pat, svqinc_bhwd_impl, (QImode))
|
|
2734 FUNCTION (svqincd, svqinc_bhwd_impl, (DImode))
|
|
2735 FUNCTION (svqincd_pat, svqinc_bhwd_impl, (DImode))
|
|
2736 FUNCTION (svqinch, svqinc_bhwd_impl, (HImode))
|
|
2737 FUNCTION (svqinch_pat, svqinc_bhwd_impl, (HImode))
|
|
2738 FUNCTION (svqincp, svqdecp_svqincp_impl, (SS_PLUS, US_PLUS))
|
|
2739 FUNCTION (svqincw, svqinc_bhwd_impl, (SImode))
|
|
2740 FUNCTION (svqincw_pat, svqinc_bhwd_impl, (SImode))
|
|
2741 FUNCTION (svqsub, rtx_code_function, (SS_MINUS, US_MINUS, -1))
|
|
2742 FUNCTION (svrbit, unspec_based_function, (UNSPEC_RBIT, UNSPEC_RBIT, -1))
|
|
2743 FUNCTION (svrdffr, svrdffr_impl,)
|
|
2744 FUNCTION (svrecpe, unspec_based_function, (-1, UNSPEC_URECPE, UNSPEC_FRECPE))
|
|
2745 FUNCTION (svrecps, unspec_based_function, (-1, -1, UNSPEC_FRECPS))
|
|
2746 FUNCTION (svrecpx, unspec_based_function, (-1, -1, UNSPEC_COND_FRECPX))
|
|
2747 FUNCTION (svreinterpret, svreinterpret_impl,)
|
|
2748 FUNCTION (svrev, svrev_impl,)
|
|
2749 FUNCTION (svrevb, unspec_based_function, (UNSPEC_REVB, UNSPEC_REVB, -1))
|
|
2750 FUNCTION (svrevh, unspec_based_function, (UNSPEC_REVH, UNSPEC_REVH, -1))
|
|
2751 FUNCTION (svrevw, unspec_based_function, (UNSPEC_REVW, UNSPEC_REVW, -1))
|
|
2752 FUNCTION (svrinta, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTA))
|
|
2753 FUNCTION (svrinti, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTI))
|
|
2754 FUNCTION (svrintm, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTM))
|
|
2755 FUNCTION (svrintn, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTN))
|
|
2756 FUNCTION (svrintp, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTP))
|
|
2757 FUNCTION (svrintx, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTX))
|
|
2758 FUNCTION (svrintz, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTZ))
|
|
2759 FUNCTION (svrsqrte, unspec_based_function, (-1, UNSPEC_RSQRTE, UNSPEC_RSQRTE))
|
|
2760 FUNCTION (svrsqrts, unspec_based_function, (-1, -1, UNSPEC_RSQRTS))
|
|
2761 FUNCTION (svscale, unspec_based_function, (-1, -1, UNSPEC_COND_FSCALE))
|
|
2762 FUNCTION (svsel, svsel_impl,)
|
|
2763 FUNCTION (svset2, svset_impl, (2))
|
|
2764 FUNCTION (svset3, svset_impl, (3))
|
|
2765 FUNCTION (svset4, svset_impl, (4))
|
|
2766 FUNCTION (svsetffr, svsetffr_impl,)
|
|
2767 FUNCTION (svsplice, QUIET_CODE_FOR_MODE0 (aarch64_sve_splice),)
|
|
2768 FUNCTION (svsqrt, rtx_code_function, (SQRT, SQRT, UNSPEC_COND_FSQRT))
|
|
2769 FUNCTION (svst1, svst1_impl,)
|
|
2770 FUNCTION (svst1_scatter, svst1_scatter_impl,)
|
|
2771 FUNCTION (svst1b, svst1_truncate_impl, (QImode))
|
|
2772 FUNCTION (svst1b_scatter, svst1_scatter_truncate_impl, (QImode))
|
|
2773 FUNCTION (svst1h, svst1_truncate_impl, (HImode))
|
|
2774 FUNCTION (svst1h_scatter, svst1_scatter_truncate_impl, (HImode))
|
|
2775 FUNCTION (svst1w, svst1_truncate_impl, (SImode))
|
|
2776 FUNCTION (svst1w_scatter, svst1_scatter_truncate_impl, (SImode))
|
|
2777 FUNCTION (svst2, svst234_impl, (2))
|
|
2778 FUNCTION (svst3, svst234_impl, (3))
|
|
2779 FUNCTION (svst4, svst234_impl, (4))
|
|
2780 FUNCTION (svstnt1, svstnt1_impl,)
|
|
2781 FUNCTION (svsub, svsub_impl,)
|
|
2782 FUNCTION (svsubr, rtx_code_function_rotated, (MINUS, MINUS, UNSPEC_COND_FSUB))
|
|
2783 FUNCTION (svsudot, svusdot_impl, (true))
|
|
2784 FUNCTION (svsudot_lane, svdotprod_lane_impl, (UNSPEC_SUDOT, -1, -1))
|
|
2785 FUNCTION (svtbl, svtbl_impl,)
|
|
2786 FUNCTION (svtmad, CODE_FOR_MODE0 (aarch64_sve_tmad),)
|
|
2787 FUNCTION (svtrn1, svtrn_impl, (0))
|
|
2788 FUNCTION (svtrn1q, unspec_based_function, (UNSPEC_TRN1Q, UNSPEC_TRN1Q,
|
|
2789 UNSPEC_TRN1Q))
|
|
2790 FUNCTION (svtrn2, svtrn_impl, (1))
|
|
2791 FUNCTION (svtrn2q, unspec_based_function, (UNSPEC_TRN2Q, UNSPEC_TRN2Q,
|
|
2792 UNSPEC_TRN2Q))
|
|
2793 FUNCTION (svtsmul, unspec_based_function, (-1, -1, UNSPEC_FTSMUL))
|
|
2794 FUNCTION (svtssel, unspec_based_function, (-1, -1, UNSPEC_FTSSEL))
|
|
2795 FUNCTION (svundef, svundef_impl, (1))
|
|
2796 FUNCTION (svundef2, svundef_impl, (2))
|
|
2797 FUNCTION (svundef3, svundef_impl, (3))
|
|
2798 FUNCTION (svundef4, svundef_impl, (4))
|
|
2799 FUNCTION (svunpkhi, svunpk_impl, (true))
|
|
2800 FUNCTION (svunpklo, svunpk_impl, (false))
|
|
2801 FUNCTION (svusdot, svusdot_impl, (false))
|
|
2802 FUNCTION (svusdot_lane, svdotprod_lane_impl, (UNSPEC_USDOT, -1, -1))
|
|
2803 FUNCTION (svusmmla, unspec_based_add_function, (UNSPEC_USMATMUL, -1, -1))
|
|
2804 FUNCTION (svuzp1, svuzp_impl, (0))
|
|
2805 FUNCTION (svuzp1q, unspec_based_function, (UNSPEC_UZP1Q, UNSPEC_UZP1Q,
|
|
2806 UNSPEC_UZP1Q))
|
|
2807 FUNCTION (svuzp2, svuzp_impl, (1))
|
|
2808 FUNCTION (svuzp2q, unspec_based_function, (UNSPEC_UZP2Q, UNSPEC_UZP2Q,
|
|
2809 UNSPEC_UZP2Q))
|
|
2810 FUNCTION (svwhilele, svwhilelx_impl, (UNSPEC_WHILELE, UNSPEC_WHILELS, true))
|
|
2811 FUNCTION (svwhilelt, svwhilelx_impl, (UNSPEC_WHILELT, UNSPEC_WHILELO, false))
|
|
2812 FUNCTION (svwrffr, svwrffr_impl,)
|
|
2813 FUNCTION (svzip1, svzip_impl, (0))
|
|
2814 FUNCTION (svzip1q, unspec_based_function, (UNSPEC_ZIP1Q, UNSPEC_ZIP1Q,
|
|
2815 UNSPEC_ZIP1Q))
|
|
2816 FUNCTION (svzip2, svzip_impl, (1))
|
|
2817 FUNCTION (svzip2q, unspec_based_function, (UNSPEC_ZIP2Q, UNSPEC_ZIP2Q,
|
|
2818 UNSPEC_ZIP2Q))
|
|
2819
|
|
2820 } /* end namespace aarch64_sve */
|