Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/aarch64/aarch64-sve-builtins-base.cc @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
131:84e7813d76e9 | 145:1830386684a0 |
---|---|
1 /* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics) | |
2 Copyright (C) 2018-2020 Free Software Foundation, Inc. | |
3 | |
4 This file is part of GCC. | |
5 | |
6 GCC is free software; you can redistribute it and/or modify it | |
7 under the terms of the GNU General Public License as published by | |
8 the Free Software Foundation; either version 3, or (at your option) | |
9 any later version. | |
10 | |
11 GCC is distributed in the hope that it will be useful, but | |
12 WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 General Public License for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with GCC; see the file COPYING3. If not see | |
18 <http://www.gnu.org/licenses/>. */ | |
19 | |
20 #include "config.h" | |
21 #include "system.h" | |
22 #include "coretypes.h" | |
23 #include "tm.h" | |
24 #include "tree.h" | |
25 #include "rtl.h" | |
26 #include "tm_p.h" | |
27 #include "memmodel.h" | |
28 #include "insn-codes.h" | |
29 #include "optabs.h" | |
30 #include "recog.h" | |
31 #include "expr.h" | |
32 #include "basic-block.h" | |
33 #include "function.h" | |
34 #include "fold-const.h" | |
35 #include "gimple.h" | |
36 #include "gimple-iterator.h" | |
37 #include "gimplify.h" | |
38 #include "explow.h" | |
39 #include "emit-rtl.h" | |
40 #include "tree-vector-builder.h" | |
41 #include "rtx-vector-builder.h" | |
42 #include "vec-perm-indices.h" | |
43 #include "aarch64-sve-builtins.h" | |
44 #include "aarch64-sve-builtins-shapes.h" | |
45 #include "aarch64-sve-builtins-base.h" | |
46 #include "aarch64-sve-builtins-functions.h" | |
47 | |
48 using namespace aarch64_sve; | |
49 | |
50 namespace { | |
51 | |
52 /* Return the UNSPEC_CMLA* unspec for rotation amount ROT. */ | |
53 static int | |
54 unspec_cmla (int rot) | |
55 { | |
56 switch (rot) | |
57 { | |
58 case 0: return UNSPEC_CMLA; | |
59 case 90: return UNSPEC_CMLA90; | |
60 case 180: return UNSPEC_CMLA180; | |
61 case 270: return UNSPEC_CMLA270; | |
62 default: gcc_unreachable (); | |
63 } | |
64 } | |
65 | |
66 /* Return the UNSPEC_FCMLA* unspec for rotation amount ROT. */ | |
67 static int | |
68 unspec_fcmla (int rot) | |
69 { | |
70 switch (rot) | |
71 { | |
72 case 0: return UNSPEC_FCMLA; | |
73 case 90: return UNSPEC_FCMLA90; | |
74 case 180: return UNSPEC_FCMLA180; | |
75 case 270: return UNSPEC_FCMLA270; | |
76 default: gcc_unreachable (); | |
77 } | |
78 } | |
79 | |
80 /* Return the UNSPEC_COND_FCMLA* unspec for rotation amount ROT. */ | |
81 static int | |
82 unspec_cond_fcmla (int rot) | |
83 { | |
84 switch (rot) | |
85 { | |
86 case 0: return UNSPEC_COND_FCMLA; | |
87 case 90: return UNSPEC_COND_FCMLA90; | |
88 case 180: return UNSPEC_COND_FCMLA180; | |
89 case 270: return UNSPEC_COND_FCMLA270; | |
90 default: gcc_unreachable (); | |
91 } | |
92 } | |
93 | |
94 /* Expand a call to svmad, or svmla after reordering its operands. | |
95 Make _m forms merge with argument MERGE_ARGNO. */ | |
96 static rtx | |
97 expand_mad (function_expander &e, | |
98 unsigned int merge_argno = DEFAULT_MERGE_ARGNO) | |
99 { | |
100 if (e.pred == PRED_x) | |
101 { | |
102 insn_code icode; | |
103 if (e.type_suffix (0).integer_p) | |
104 icode = code_for_aarch64_pred_fma (e.vector_mode (0)); | |
105 else | |
106 icode = code_for_aarch64_pred (UNSPEC_COND_FMLA, e.vector_mode (0)); | |
107 return e.use_pred_x_insn (icode); | |
108 } | |
109 | |
110 insn_code icode = e.direct_optab_handler (cond_fma_optab); | |
111 return e.use_cond_insn (icode, merge_argno); | |
112 } | |
113 | |
114 /* Expand a call to svmla_lane or svmls_lane using floating-point unspec | |
115 UNSPEC. */ | |
116 static rtx | |
117 expand_mla_mls_lane (function_expander &e, int unspec) | |
118 { | |
119 /* Put the operands in the normal (fma ...) order, with the accumulator | |
120 last. This fits naturally since that's also the unprinted operand | |
121 in the asm output. */ | |
122 e.rotate_inputs_left (0, 4); | |
123 insn_code icode = code_for_aarch64_lane (unspec, e.vector_mode (0)); | |
124 return e.use_exact_insn (icode); | |
125 } | |
126 | |
127 /* Expand a call to svmsb, or svmls after reordering its operands. | |
128 Make _m forms merge with argument MERGE_ARGNO. */ | |
129 static rtx | |
130 expand_msb (function_expander &e, | |
131 unsigned int merge_argno = DEFAULT_MERGE_ARGNO) | |
132 { | |
133 if (e.pred == PRED_x) | |
134 { | |
135 insn_code icode; | |
136 if (e.type_suffix (0).integer_p) | |
137 icode = code_for_aarch64_pred_fnma (e.vector_mode (0)); | |
138 else | |
139 icode = code_for_aarch64_pred (UNSPEC_COND_FMLS, e.vector_mode (0)); | |
140 return e.use_pred_x_insn (icode); | |
141 } | |
142 | |
143 insn_code icode = e.direct_optab_handler (cond_fnma_optab); | |
144 return e.use_cond_insn (icode, merge_argno); | |
145 } | |
146 | |
147 class svabd_impl : public function_base | |
148 { | |
149 public: | |
150 rtx | |
151 expand (function_expander &e) const OVERRIDE | |
152 { | |
153 /* The integer operations are represented as the subtraction of the | |
154 minimum from the maximum, with the signedness of the instruction | |
155 keyed off the signedness of the maximum operation. */ | |
156 rtx_code max_code = e.type_suffix (0).unsigned_p ? UMAX : SMAX; | |
157 insn_code icode; | |
158 if (e.pred == PRED_x) | |
159 { | |
160 if (e.type_suffix (0).integer_p) | |
161 icode = code_for_aarch64_pred_abd (max_code, e.vector_mode (0)); | |
162 else | |
163 icode = code_for_aarch64_pred_abd (e.vector_mode (0)); | |
164 return e.use_pred_x_insn (icode); | |
165 } | |
166 | |
167 if (e.type_suffix (0).integer_p) | |
168 icode = code_for_aarch64_cond_abd (max_code, e.vector_mode (0)); | |
169 else | |
170 icode = code_for_aarch64_cond_abd (e.vector_mode (0)); | |
171 return e.use_cond_insn (icode); | |
172 } | |
173 }; | |
174 | |
175 /* Implements svacge, svacgt, svacle and svaclt. */ | |
176 class svac_impl : public function_base | |
177 { | |
178 public: | |
179 CONSTEXPR svac_impl (int unspec) : m_unspec (unspec) {} | |
180 | |
181 rtx | |
182 expand (function_expander &e) const OVERRIDE | |
183 { | |
184 e.add_ptrue_hint (0, e.gp_mode (0)); | |
185 insn_code icode = code_for_aarch64_pred_fac (m_unspec, e.vector_mode (0)); | |
186 return e.use_exact_insn (icode); | |
187 } | |
188 | |
189 /* The unspec code for the underlying comparison. */ | |
190 int m_unspec; | |
191 }; | |
192 | |
193 class svadda_impl : public function_base | |
194 { | |
195 public: | |
196 rtx | |
197 expand (function_expander &e) const OVERRIDE | |
198 { | |
199 /* Put the predicate last, as required by mask_fold_left_plus_optab. */ | |
200 e.rotate_inputs_left (0, 3); | |
201 machine_mode mode = e.vector_mode (0); | |
202 insn_code icode = direct_optab_handler (mask_fold_left_plus_optab, mode); | |
203 return e.use_exact_insn (icode); | |
204 } | |
205 }; | |
206 | |
207 /* Implements svadr[bhwd]. */ | |
208 class svadr_bhwd_impl : public function_base | |
209 { | |
210 public: | |
211 CONSTEXPR svadr_bhwd_impl (unsigned int shift) : m_shift (shift) {} | |
212 | |
213 rtx | |
214 expand (function_expander &e) const OVERRIDE | |
215 { | |
216 machine_mode mode = GET_MODE (e.args[0]); | |
217 if (m_shift == 0) | |
218 return e.use_exact_insn (code_for_aarch64_adr (mode)); | |
219 | |
220 /* Turn the access size into an extra shift argument. */ | |
221 rtx shift = gen_int_mode (m_shift, GET_MODE_INNER (mode)); | |
222 e.args.quick_push (expand_vector_broadcast (mode, shift)); | |
223 return e.use_exact_insn (code_for_aarch64_adr_shift (mode)); | |
224 } | |
225 | |
226 /* How many bits left to shift the vector displacement. */ | |
227 unsigned int m_shift; | |
228 }; | |
229 | |
230 class svbic_impl : public function_base | |
231 { | |
232 public: | |
233 rtx | |
234 expand (function_expander &e) const OVERRIDE | |
235 { | |
236 /* Convert svbic of a constant into svand of its inverse. */ | |
237 if (CONST_INT_P (e.args[2])) | |
238 { | |
239 machine_mode mode = GET_MODE_INNER (e.vector_mode (0)); | |
240 e.args[2] = simplify_unary_operation (NOT, mode, e.args[2], mode); | |
241 return e.map_to_rtx_codes (AND, AND, -1); | |
242 } | |
243 | |
244 if (e.type_suffix_ids[0] == TYPE_SUFFIX_b) | |
245 { | |
246 gcc_assert (e.pred == PRED_z); | |
247 return e.use_exact_insn (CODE_FOR_aarch64_pred_bicvnx16bi_z); | |
248 } | |
249 | |
250 if (e.pred == PRED_x) | |
251 return e.use_unpred_insn (code_for_aarch64_bic (e.vector_mode (0))); | |
252 | |
253 return e.use_cond_insn (code_for_cond_bic (e.vector_mode (0))); | |
254 } | |
255 }; | |
256 | |
257 /* Implements svbrkn, svbrkpa and svbrkpb. */ | |
258 class svbrk_binary_impl : public function_base | |
259 { | |
260 public: | |
261 CONSTEXPR svbrk_binary_impl (int unspec) : m_unspec (unspec) {} | |
262 | |
263 rtx | |
264 expand (function_expander &e) const OVERRIDE | |
265 { | |
266 return e.use_exact_insn (code_for_aarch64_brk (m_unspec)); | |
267 } | |
268 | |
269 /* The unspec code associated with the operation. */ | |
270 int m_unspec; | |
271 }; | |
272 | |
273 /* Implements svbrka and svbrkb. */ | |
274 class svbrk_unary_impl : public function_base | |
275 { | |
276 public: | |
277 CONSTEXPR svbrk_unary_impl (int unspec) : m_unspec (unspec) {} | |
278 | |
279 rtx | |
280 expand (function_expander &e) const OVERRIDE | |
281 { | |
282 return e.use_cond_insn (code_for_aarch64_brk (m_unspec)); | |
283 } | |
284 | |
285 /* The unspec code associated with the operation. */ | |
286 int m_unspec; | |
287 }; | |
288 | |
289 class svcadd_impl : public function_base | |
290 { | |
291 public: | |
292 rtx | |
293 expand (function_expander &e) const OVERRIDE | |
294 { | |
295 /* Convert the rotation amount into a specific unspec. */ | |
296 int rot = INTVAL (e.args.pop ()); | |
297 if (rot == 90) | |
298 return e.map_to_unspecs (UNSPEC_CADD90, UNSPEC_CADD90, | |
299 UNSPEC_COND_FCADD90); | |
300 if (rot == 270) | |
301 return e.map_to_unspecs (UNSPEC_CADD270, UNSPEC_CADD270, | |
302 UNSPEC_COND_FCADD270); | |
303 gcc_unreachable (); | |
304 } | |
305 }; | |
306 | |
307 /* Implements svclasta and svclastb. */ | |
308 class svclast_impl : public quiet<function_base> | |
309 { | |
310 public: | |
311 CONSTEXPR svclast_impl (int unspec) : m_unspec (unspec) {} | |
312 | |
313 rtx | |
314 expand (function_expander &e) const OVERRIDE | |
315 { | |
316 /* Match the fold_extract_optab order. */ | |
317 std::swap (e.args[0], e.args[1]); | |
318 machine_mode mode = e.vector_mode (0); | |
319 insn_code icode; | |
320 if (e.mode_suffix_id == MODE_n) | |
321 icode = code_for_fold_extract (m_unspec, mode); | |
322 else | |
323 icode = code_for_aarch64_fold_extract_vector (m_unspec, mode); | |
324 return e.use_exact_insn (icode); | |
325 } | |
326 | |
327 /* The unspec code associated with the operation. */ | |
328 int m_unspec; | |
329 }; | |
330 | |
331 class svcmla_impl : public function_base | |
332 { | |
333 public: | |
334 rtx | |
335 expand (function_expander &e) const OVERRIDE | |
336 { | |
337 /* Convert the rotation amount into a specific unspec. */ | |
338 int rot = INTVAL (e.args.pop ()); | |
339 if (e.type_suffix (0).float_p) | |
340 { | |
341 /* Make the operand order the same as the one used by the fma optabs, | |
342 with the accumulator last. */ | |
343 e.rotate_inputs_left (1, 4); | |
344 return e.map_to_unspecs (-1, -1, unspec_cond_fcmla (rot), 3); | |
345 } | |
346 else | |
347 { | |
348 int cmla = unspec_cmla (rot); | |
349 return e.map_to_unspecs (cmla, cmla, -1); | |
350 } | |
351 } | |
352 }; | |
353 | |
354 class svcmla_lane_impl : public function_base | |
355 { | |
356 public: | |
357 rtx | |
358 expand (function_expander &e) const OVERRIDE | |
359 { | |
360 /* Convert the rotation amount into a specific unspec. */ | |
361 int rot = INTVAL (e.args.pop ()); | |
362 machine_mode mode = e.vector_mode (0); | |
363 if (e.type_suffix (0).float_p) | |
364 { | |
365 /* Make the operand order the same as the one used by the fma optabs, | |
366 with the accumulator last. */ | |
367 e.rotate_inputs_left (0, 4); | |
368 insn_code icode = code_for_aarch64_lane (unspec_fcmla (rot), mode); | |
369 return e.use_exact_insn (icode); | |
370 } | |
371 else | |
372 { | |
373 insn_code icode = code_for_aarch64_lane (unspec_cmla (rot), mode); | |
374 return e.use_exact_insn (icode); | |
375 } | |
376 } | |
377 }; | |
378 | |
379 /* Implements svcmp<cc> (except svcmpuo, which is handled separately). */ | |
380 class svcmp_impl : public function_base | |
381 { | |
382 public: | |
383 CONSTEXPR svcmp_impl (tree_code code, int unspec_for_fp) | |
384 : m_code (code), m_unspec_for_fp (unspec_for_fp) {} | |
385 | |
386 gimple * | |
387 fold (gimple_folder &f) const OVERRIDE | |
388 { | |
389 tree pg = gimple_call_arg (f.call, 0); | |
390 tree rhs1 = gimple_call_arg (f.call, 1); | |
391 tree rhs2 = gimple_call_arg (f.call, 2); | |
392 | |
393 /* Convert a ptrue-predicated integer comparison into the corresponding | |
394 gimple-level operation. */ | |
395 if (integer_all_onesp (pg) | |
396 && f.type_suffix (0).element_bytes == 1 | |
397 && f.type_suffix (0).integer_p) | |
398 { | |
399 gimple_seq stmts = NULL; | |
400 rhs2 = f.force_vector (stmts, TREE_TYPE (rhs1), rhs2); | |
401 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); | |
402 return gimple_build_assign (f.lhs, m_code, rhs1, rhs2); | |
403 } | |
404 | |
405 return NULL; | |
406 } | |
407 | |
408 rtx | |
409 expand (function_expander &e) const OVERRIDE | |
410 { | |
411 machine_mode mode = e.vector_mode (0); | |
412 | |
413 /* Comparisons are UNSPEC_PRED_Z operations and so need a hint | |
414 operand. */ | |
415 e.add_ptrue_hint (0, e.gp_mode (0)); | |
416 | |
417 if (e.type_suffix (0).integer_p) | |
418 { | |
419 bool unsigned_p = e.type_suffix (0).unsigned_p; | |
420 rtx_code code = get_rtx_code (m_code, unsigned_p); | |
421 return e.use_exact_insn (code_for_aarch64_pred_cmp (code, mode)); | |
422 } | |
423 | |
424 insn_code icode = code_for_aarch64_pred_fcm (m_unspec_for_fp, mode); | |
425 return e.use_exact_insn (icode); | |
426 } | |
427 | |
428 /* The tree code associated with the comparison. */ | |
429 tree_code m_code; | |
430 | |
431 /* The unspec code to use for floating-point comparisons. */ | |
432 int m_unspec_for_fp; | |
433 }; | |
434 | |
435 /* Implements svcmp<cc>_wide. */ | |
436 class svcmp_wide_impl : public function_base | |
437 { | |
438 public: | |
439 CONSTEXPR svcmp_wide_impl (tree_code code, int unspec_for_sint, | |
440 int unspec_for_uint) | |
441 : m_code (code), m_unspec_for_sint (unspec_for_sint), | |
442 m_unspec_for_uint (unspec_for_uint) {} | |
443 | |
444 rtx | |
445 expand (function_expander &e) const OVERRIDE | |
446 { | |
447 machine_mode mode = e.vector_mode (0); | |
448 bool unsigned_p = e.type_suffix (0).unsigned_p; | |
449 rtx_code code = get_rtx_code (m_code, unsigned_p); | |
450 | |
451 /* Comparisons are UNSPEC_PRED_Z operations and so need a hint | |
452 operand. */ | |
453 e.add_ptrue_hint (0, e.gp_mode (0)); | |
454 | |
455 /* If the argument is a constant that the unwidened comparisons | |
456 can handle directly, use them instead. */ | |
457 insn_code icode = code_for_aarch64_pred_cmp (code, mode); | |
458 rtx op2 = unwrap_const_vec_duplicate (e.args[3]); | |
459 if (CONSTANT_P (op2) | |
460 && insn_data[icode].operand[4].predicate (op2, DImode)) | |
461 { | |
462 e.args[3] = op2; | |
463 return e.use_exact_insn (icode); | |
464 } | |
465 | |
466 int unspec = (unsigned_p ? m_unspec_for_uint : m_unspec_for_sint); | |
467 return e.use_exact_insn (code_for_aarch64_pred_cmp_wide (unspec, mode)); | |
468 } | |
469 | |
470 /* The tree code associated with the comparison. */ | |
471 tree_code m_code; | |
472 | |
473 /* The unspec codes for signed and unsigned wide comparisons | |
474 respectively. */ | |
475 int m_unspec_for_sint; | |
476 int m_unspec_for_uint; | |
477 }; | |
478 | |
479 class svcmpuo_impl : public quiet<function_base> | |
480 { | |
481 public: | |
482 rtx | |
483 expand (function_expander &e) const OVERRIDE | |
484 { | |
485 e.add_ptrue_hint (0, e.gp_mode (0)); | |
486 return e.use_exact_insn (code_for_aarch64_pred_fcmuo (e.vector_mode (0))); | |
487 } | |
488 }; | |
489 | |
490 class svcnot_impl : public function_base | |
491 { | |
492 public: | |
493 rtx | |
494 expand (function_expander &e) const OVERRIDE | |
495 { | |
496 machine_mode mode = e.vector_mode (0); | |
497 if (e.pred == PRED_x) | |
498 { | |
499 /* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs | |
500 a ptrue hint. */ | |
501 e.add_ptrue_hint (0, e.gp_mode (0)); | |
502 return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode)); | |
503 } | |
504 | |
505 return e.use_cond_insn (code_for_cond_cnot (mode), 0); | |
506 } | |
507 }; | |
508 | |
509 /* Implements svcnt[bhwd], which count the number of elements | |
510 in a particular vector mode. */ | |
511 class svcnt_bhwd_impl : public function_base | |
512 { | |
513 public: | |
514 CONSTEXPR svcnt_bhwd_impl (machine_mode ref_mode) : m_ref_mode (ref_mode) {} | |
515 | |
516 gimple * | |
517 fold (gimple_folder &f) const OVERRIDE | |
518 { | |
519 tree count = build_int_cstu (TREE_TYPE (f.lhs), | |
520 GET_MODE_NUNITS (m_ref_mode)); | |
521 return gimple_build_assign (f.lhs, count); | |
522 } | |
523 | |
524 rtx | |
525 expand (function_expander &) const OVERRIDE | |
526 { | |
527 return gen_int_mode (GET_MODE_NUNITS (m_ref_mode), DImode); | |
528 } | |
529 | |
530 /* The mode of the vector associated with the [bhwd] suffix. */ | |
531 machine_mode m_ref_mode; | |
532 }; | |
533 | |
534 /* Implements svcnt[bhwd]_pat. */ | |
535 class svcnt_bhwd_pat_impl : public svcnt_bhwd_impl | |
536 { | |
537 public: | |
538 CONSTEXPR svcnt_bhwd_pat_impl (machine_mode ref_mode) | |
539 : svcnt_bhwd_impl (ref_mode) {} | |
540 | |
541 gimple * | |
542 fold (gimple_folder &f) const OVERRIDE | |
543 { | |
544 tree pattern_arg = gimple_call_arg (f.call, 0); | |
545 aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg); | |
546 | |
547 if (pattern == AARCH64_SV_ALL) | |
548 /* svcvnt[bwhd]_pat (SV_ALL) == svcnt[bwhd] (). */ | |
549 return svcnt_bhwd_impl::fold (f); | |
550 | |
551 /* See whether we can count the number of elements in the pattern | |
552 at compile time. */ | |
553 unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode); | |
554 HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, elements_per_vq); | |
555 if (value >= 0) | |
556 { | |
557 tree count = build_int_cstu (TREE_TYPE (f.lhs), value); | |
558 return gimple_build_assign (f.lhs, count); | |
559 } | |
560 | |
561 return NULL; | |
562 } | |
563 | |
564 rtx | |
565 expand (function_expander &e) const OVERRIDE | |
566 { | |
567 unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode); | |
568 e.args.quick_push (gen_int_mode (elements_per_vq, DImode)); | |
569 e.args.quick_push (const1_rtx); | |
570 return e.use_exact_insn (CODE_FOR_aarch64_sve_cnt_pat); | |
571 } | |
572 }; | |
573 | |
574 class svcntp_impl : public function_base | |
575 { | |
576 public: | |
577 rtx | |
578 expand (function_expander &e) const OVERRIDE | |
579 { | |
580 machine_mode mode = e.vector_mode (0); | |
581 e.add_ptrue_hint (0, mode); | |
582 return e.use_exact_insn (code_for_aarch64_pred_cntp (mode)); | |
583 } | |
584 }; | |
585 | |
586 /* Implements svcreate2, svcreate3 and svcreate4. */ | |
587 class svcreate_impl : public quiet<multi_vector_function> | |
588 { | |
589 public: | |
590 CONSTEXPR svcreate_impl (unsigned int vectors_per_tuple) | |
591 : quiet<multi_vector_function> (vectors_per_tuple) {} | |
592 | |
593 gimple * | |
594 fold (gimple_folder &f) const OVERRIDE | |
595 { | |
596 unsigned int nargs = gimple_call_num_args (f.call); | |
597 tree lhs_type = TREE_TYPE (f.lhs); | |
598 | |
599 /* Replace the call with a clobber of the result (to prevent it from | |
600 becoming upwards exposed) followed by stores into each individual | |
601 vector of tuple. | |
602 | |
603 The fold routines expect the replacement statement to have the | |
604 same lhs as the original call, so return the clobber statement | |
605 rather than the final vector store. */ | |
606 gassign *clobber = gimple_build_assign (f.lhs, build_clobber (lhs_type)); | |
607 | |
608 for (unsigned int i = nargs; i-- > 0; ) | |
609 { | |
610 tree rhs_vector = gimple_call_arg (f.call, i); | |
611 tree field = tuple_type_field (TREE_TYPE (f.lhs)); | |
612 tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field), | |
613 unshare_expr (f.lhs), field, NULL_TREE); | |
614 tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector), | |
615 lhs_array, size_int (i), | |
616 NULL_TREE, NULL_TREE); | |
617 gassign *assign = gimple_build_assign (lhs_vector, rhs_vector); | |
618 gsi_insert_after (f.gsi, assign, GSI_SAME_STMT); | |
619 } | |
620 return clobber; | |
621 } | |
622 | |
623 rtx | |
624 expand (function_expander &e) const OVERRIDE | |
625 { | |
626 rtx lhs_tuple = e.get_nonoverlapping_reg_target (); | |
627 | |
628 /* Record that LHS_TUPLE is dead before the first store. */ | |
629 emit_clobber (lhs_tuple); | |
630 for (unsigned int i = 0; i < e.args.length (); ++i) | |
631 { | |
632 /* Use an lvalue subreg to refer to vector I in LHS_TUPLE. */ | |
633 rtx lhs_vector = simplify_gen_subreg (GET_MODE (e.args[i]), | |
634 lhs_tuple, GET_MODE (lhs_tuple), | |
635 i * BYTES_PER_SVE_VECTOR); | |
636 emit_move_insn (lhs_vector, e.args[i]); | |
637 } | |
638 return lhs_tuple; | |
639 } | |
640 }; | |
641 | |
642 class svcvt_impl : public function_base | |
643 { | |
644 public: | |
645 rtx | |
646 expand (function_expander &e) const OVERRIDE | |
647 { | |
648 machine_mode mode0 = e.vector_mode (0); | |
649 machine_mode mode1 = e.vector_mode (1); | |
650 insn_code icode; | |
651 /* All this complication comes from the need to select four things | |
652 simultaneously: | |
653 | |
654 (1) the kind of conversion (int<-float, float<-int, float<-float) | |
655 (2) signed vs. unsigned integers, where relevant | |
656 (3) the predication mode, which must be the wider of the predication | |
657 modes for MODE0 and MODE1 | |
658 (4) the predication type (m, x or z) | |
659 | |
660 The only supported int<->float conversions for which the integer is | |
661 narrower than the float are SI<->DF. It's therefore more convenient | |
662 to handle (3) by defining two patterns for int<->float conversions: | |
663 one in which the integer is at least as wide as the float and so | |
664 determines the predication mode, and another single SI<->DF pattern | |
665 in which the float's mode determines the predication mode (which is | |
666 always VNx2BI in that case). | |
667 | |
668 The names of the patterns follow the optab convention of giving | |
669 the source mode before the destination mode. */ | |
670 if (e.type_suffix (1).integer_p) | |
671 { | |
672 int unspec = (e.type_suffix (1).unsigned_p | |
673 ? UNSPEC_COND_UCVTF | |
674 : UNSPEC_COND_SCVTF); | |
675 if (e.type_suffix (0).element_bytes <= e.type_suffix (1).element_bytes) | |
676 icode = (e.pred == PRED_x | |
677 ? code_for_aarch64_sve_nonextend (unspec, mode1, mode0) | |
678 : code_for_cond_nonextend (unspec, mode1, mode0)); | |
679 else | |
680 icode = (e.pred == PRED_x | |
681 ? code_for_aarch64_sve_extend (unspec, mode1, mode0) | |
682 : code_for_cond_extend (unspec, mode1, mode0)); | |
683 } | |
684 else | |
685 { | |
686 int unspec = (!e.type_suffix (0).integer_p ? UNSPEC_COND_FCVT | |
687 : e.type_suffix (0).unsigned_p ? UNSPEC_COND_FCVTZU | |
688 : UNSPEC_COND_FCVTZS); | |
689 if (e.type_suffix (0).element_bytes >= e.type_suffix (1).element_bytes) | |
690 icode = (e.pred == PRED_x | |
691 ? code_for_aarch64_sve_nontrunc (unspec, mode1, mode0) | |
692 : code_for_cond_nontrunc (unspec, mode1, mode0)); | |
693 else | |
694 icode = (e.pred == PRED_x | |
695 ? code_for_aarch64_sve_trunc (unspec, mode1, mode0) | |
696 : code_for_cond_trunc (unspec, mode1, mode0)); | |
697 } | |
698 | |
699 if (e.pred == PRED_x) | |
700 return e.use_pred_x_insn (icode); | |
701 return e.use_cond_insn (icode); | |
702 } | |
703 }; | |
704 | |
705 class svdot_impl : public function_base | |
706 { | |
707 public: | |
708 rtx | |
709 expand (function_expander &e) const OVERRIDE | |
710 { | |
711 /* In the optab, the multiplication operands come before the accumulator | |
712 operand. The optab is keyed off the multiplication mode. */ | |
713 e.rotate_inputs_left (0, 3); | |
714 insn_code icode | |
715 = e.direct_optab_handler_for_sign (sdot_prod_optab, udot_prod_optab, | |
716 0, GET_MODE (e.args[0])); | |
717 return e.use_unpred_insn (icode); | |
718 } | |
719 }; | |
720 | |
721 class svdotprod_lane_impl : public unspec_based_function_base | |
722 { | |
723 public: | |
724 CONSTEXPR svdotprod_lane_impl (int unspec_for_sint, | |
725 int unspec_for_uint, | |
726 int unspec_for_float) | |
727 : unspec_based_function_base (unspec_for_sint, | |
728 unspec_for_uint, | |
729 unspec_for_float) {} | |
730 | |
731 rtx | |
732 expand (function_expander &e) const OVERRIDE | |
733 { | |
734 /* Use the same ordering as the dot_prod_optab, with the | |
735 accumulator last. */ | |
736 e.rotate_inputs_left (0, 4); | |
737 int unspec = unspec_for (e); | |
738 machine_mode mode = e.vector_mode (0); | |
739 return e.use_exact_insn (code_for_aarch64_dot_prod_lane (unspec, mode)); | |
740 } | |
741 }; | |
742 | |
743 class svdup_impl : public quiet<function_base> | |
744 { | |
745 public: | |
746 gimple * | |
747 fold (gimple_folder &f) const OVERRIDE | |
748 { | |
749 tree vec_type = TREE_TYPE (f.lhs); | |
750 tree rhs = gimple_call_arg (f.call, f.pred == PRED_none ? 0 : 1); | |
751 | |
752 if (f.pred == PRED_none || f.pred == PRED_x) | |
753 { | |
754 if (CONSTANT_CLASS_P (rhs)) | |
755 { | |
756 if (f.type_suffix (0).bool_p) | |
757 return (tree_to_shwi (rhs) | |
758 ? f.fold_to_ptrue () | |
759 : f.fold_to_pfalse ()); | |
760 | |
761 tree rhs_vector = build_vector_from_val (vec_type, rhs); | |
762 return gimple_build_assign (f.lhs, rhs_vector); | |
763 } | |
764 | |
765 /* Avoid folding _b to a VEC_DUPLICATE_EXPR, since to do that we | |
766 would need to introduce an extra and unwanted conversion to | |
767 the truth vector element type. */ | |
768 if (!f.type_suffix (0).bool_p) | |
769 return gimple_build_assign (f.lhs, VEC_DUPLICATE_EXPR, rhs); | |
770 } | |
771 | |
772 /* svdup_z (pg, x) == VEC_COND_EXPR <pg, VEC_DUPLICATE_EXPR <x>, 0>. */ | |
773 if (f.pred == PRED_z) | |
774 { | |
775 gimple_seq stmts = NULL; | |
776 tree pred = f.convert_pred (stmts, vec_type, 0); | |
777 rhs = f.force_vector (stmts, vec_type, rhs); | |
778 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); | |
779 return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, rhs, | |
780 build_zero_cst (vec_type)); | |
781 } | |
782 | |
783 return NULL; | |
784 } | |
785 | |
786 rtx | |
787 expand (function_expander &e) const OVERRIDE | |
788 { | |
789 if (e.pred == PRED_none || e.pred == PRED_x) | |
790 /* There's no benefit to using predicated instructions for _x here. */ | |
791 return e.use_unpred_insn (e.direct_optab_handler (vec_duplicate_optab)); | |
792 | |
793 /* Model predicated svdups as a SEL in which the "true" value is | |
794 the duplicate of the function argument and the "false" value | |
795 is the value of inactive lanes. */ | |
796 insn_code icode; | |
797 machine_mode mode = e.vector_mode (0); | |
798 if (valid_for_const_vector_p (GET_MODE_INNER (mode), e.args.last ())) | |
799 /* Duplicate the constant to fill a vector. The pattern optimizes | |
800 various cases involving constant operands, falling back to SEL | |
801 if necessary. */ | |
802 icode = code_for_vcond_mask (mode, mode); | |
803 else | |
804 /* Use the pattern for selecting between a duplicated scalar | |
805 variable and a vector fallback. */ | |
806 icode = code_for_aarch64_sel_dup (mode); | |
807 return e.use_vcond_mask_insn (icode); | |
808 } | |
809 }; | |
810 | |
811 class svdup_lane_impl : public quiet<function_base> | |
812 { | |
813 public: | |
814 rtx | |
815 expand (function_expander &e) const OVERRIDE | |
816 { | |
817 /* The native DUP lane has an index range of 64 bytes. */ | |
818 machine_mode mode = e.vector_mode (0); | |
819 if (CONST_INT_P (e.args[1]) | |
820 && IN_RANGE (INTVAL (e.args[1]) * GET_MODE_UNIT_SIZE (mode), 0, 63)) | |
821 return e.use_exact_insn (code_for_aarch64_sve_dup_lane (mode)); | |
822 | |
823 /* Treat svdup_lane as if it were svtbl_n. */ | |
824 return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0))); | |
825 } | |
826 }; | |
827 | |
828 class svdupq_impl : public quiet<function_base> | |
829 { | |
830 public: | |
831 gimple * | |
832 fold (gimple_folder &f) const OVERRIDE | |
833 { | |
834 tree vec_type = TREE_TYPE (f.lhs); | |
835 unsigned int nargs = gimple_call_num_args (f.call); | |
836 /* For predicates, pad out each argument so that we have one element | |
837 per bit. */ | |
838 unsigned int factor = (f.type_suffix (0).bool_p | |
839 ? f.type_suffix (0).element_bytes : 1); | |
840 tree_vector_builder builder (vec_type, nargs * factor, 1); | |
841 for (unsigned int i = 0; i < nargs; ++i) | |
842 { | |
843 tree elt = gimple_call_arg (f.call, i); | |
844 if (!CONSTANT_CLASS_P (elt)) | |
845 return NULL; | |
846 builder.quick_push (elt); | |
847 for (unsigned int j = 1; j < factor; ++j) | |
848 builder.quick_push (build_zero_cst (TREE_TYPE (vec_type))); | |
849 } | |
850 return gimple_build_assign (f.lhs, builder.build ()); | |
851 } | |
852 | |
853 rtx | |
854 expand (function_expander &e) const OVERRIDE | |
855 { | |
856 machine_mode mode = e.vector_mode (0); | |
857 unsigned int elements_per_vq = e.args.length (); | |
858 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) | |
859 { | |
860 /* Construct a vector of integers so that we can compare them against | |
861 zero below. Zero vs. nonzero is the only distinction that | |
862 matters. */ | |
863 mode = aarch64_sve_int_mode (mode); | |
864 for (unsigned int i = 0; i < elements_per_vq; ++i) | |
865 e.args[i] = simplify_gen_unary (ZERO_EXTEND, GET_MODE_INNER (mode), | |
866 e.args[i], QImode); | |
867 } | |
868 | |
869 /* Get the 128-bit Advanced SIMD vector for this data size. */ | |
870 scalar_mode element_mode = GET_MODE_INNER (mode); | |
871 machine_mode vq_mode = aarch64_vq_mode (element_mode).require (); | |
872 gcc_assert (known_eq (elements_per_vq, GET_MODE_NUNITS (vq_mode))); | |
873 | |
874 /* Put the arguments into a 128-bit Advanced SIMD vector. We want | |
875 argument N to go into architectural lane N, whereas Advanced SIMD | |
876 vectors are loaded memory lsb to register lsb. We therefore need | |
877 to reverse the elements for big-endian targets. */ | |
878 rtx vq_reg = gen_reg_rtx (vq_mode); | |
879 rtvec vec = rtvec_alloc (elements_per_vq); | |
880 for (unsigned int i = 0; i < elements_per_vq; ++i) | |
881 { | |
882 unsigned int argno = BYTES_BIG_ENDIAN ? elements_per_vq - i - 1 : i; | |
883 RTVEC_ELT (vec, i) = e.args[argno]; | |
884 } | |
885 aarch64_expand_vector_init (vq_reg, gen_rtx_PARALLEL (vq_mode, vec)); | |
886 | |
887 /* If the result is a boolean, compare the data vector against zero. */ | |
888 if (mode != e.vector_mode (0)) | |
889 { | |
890 rtx data_dupq = aarch64_expand_sve_dupq (NULL, mode, vq_reg); | |
891 return aarch64_convert_sve_data_to_pred (e.possible_target, | |
892 e.vector_mode (0), data_dupq); | |
893 } | |
894 | |
895 return aarch64_expand_sve_dupq (e.possible_target, mode, vq_reg); | |
896 } | |
897 }; | |
898 | |
899 class svdupq_lane_impl : public quiet<function_base> | |
900 { | |
901 public: | |
902 rtx | |
903 expand (function_expander &e) const OVERRIDE | |
904 { | |
905 machine_mode mode = e.vector_mode (0); | |
906 rtx index = e.args[1]; | |
907 if (CONST_INT_P (index) && IN_RANGE (INTVAL (index), 0, 3)) | |
908 { | |
909 /* Use the .Q form of DUP, which is the native instruction for | |
910 this function. */ | |
911 insn_code icode = code_for_aarch64_sve_dupq_lane (mode); | |
912 unsigned int num_indices = e.elements_per_vq (0); | |
913 rtx indices = aarch64_gen_stepped_int_parallel | |
914 (num_indices, INTVAL (index) * num_indices, 1); | |
915 | |
916 e.add_output_operand (icode); | |
917 e.add_input_operand (icode, e.args[0]); | |
918 e.add_fixed_operand (indices); | |
919 return e.generate_insn (icode); | |
920 } | |
921 | |
922 /* Build a .D TBL index for the pairs of doublewords that we want to | |
923 duplicate. */ | |
924 if (CONST_INT_P (index)) | |
925 { | |
926 /* The index vector is a constant. */ | |
927 rtx_vector_builder builder (VNx2DImode, 2, 1); | |
928 builder.quick_push (gen_int_mode (INTVAL (index) * 2, DImode)); | |
929 builder.quick_push (gen_int_mode (INTVAL (index) * 2 + 1, DImode)); | |
930 index = builder.build (); | |
931 } | |
932 else | |
933 { | |
934 /* Duplicate INDEX * 2 to fill a DImode vector. The ACLE spec | |
935 explicitly allows the top of the index to be dropped. */ | |
936 index = force_reg (DImode, simplify_gen_binary (ASHIFT, DImode, | |
937 index, const1_rtx)); | |
938 index = expand_vector_broadcast (VNx2DImode, index); | |
939 | |
940 /* Get an alternating 0, 1 predicate. */ | |
941 rtx_vector_builder builder (VNx2BImode, 2, 1); | |
942 builder.quick_push (const0_rtx); | |
943 builder.quick_push (constm1_rtx); | |
944 rtx pg = force_reg (VNx2BImode, builder.build ()); | |
945 | |
946 /* Add one to the odd elements of the index. */ | |
947 rtx one = force_reg (VNx2DImode, CONST1_RTX (VNx2DImode)); | |
948 rtx target = gen_reg_rtx (VNx2DImode); | |
949 emit_insn (gen_cond_addvnx2di (target, pg, index, one, index)); | |
950 index = target; | |
951 } | |
952 | |
953 e.args[0] = gen_lowpart (VNx2DImode, e.args[0]); | |
954 e.args[1] = index; | |
955 return e.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di); | |
956 } | |
957 }; | |
958 | |
959 /* Implements svextb, svexth and svextw. */ | |
960 class svext_bhw_impl : public function_base | |
961 { | |
962 public: | |
963 CONSTEXPR svext_bhw_impl (scalar_int_mode from_mode) | |
964 : m_from_mode (from_mode) {} | |
965 | |
966 rtx | |
967 expand (function_expander &e) const OVERRIDE | |
968 { | |
969 if (e.type_suffix (0).unsigned_p) | |
970 { | |
971 /* Convert to an AND. The widest we go is 0xffffffff, which fits | |
972 in a CONST_INT. */ | |
973 e.args.quick_push (GEN_INT (GET_MODE_MASK (m_from_mode))); | |
974 if (e.pred == PRED_m) | |
975 /* We now have arguments "(inactive, pg, op, mask)". Convert this | |
976 to "(pg, op, mask, inactive)" so that the order matches svand_m | |
977 with an extra argument on the end. Take the inactive elements | |
978 from this extra argument. */ | |
979 e.rotate_inputs_left (0, 4); | |
980 return e.map_to_rtx_codes (AND, AND, -1, 3); | |
981 } | |
982 | |
983 machine_mode wide_mode = e.vector_mode (0); | |
984 poly_uint64 nunits = GET_MODE_NUNITS (wide_mode); | |
985 machine_mode narrow_mode | |
986 = aarch64_sve_data_mode (m_from_mode, nunits).require (); | |
987 if (e.pred == PRED_x) | |
988 { | |
989 insn_code icode = code_for_aarch64_pred_sxt (wide_mode, narrow_mode); | |
990 return e.use_pred_x_insn (icode); | |
991 } | |
992 | |
993 insn_code icode = code_for_aarch64_cond_sxt (wide_mode, narrow_mode); | |
994 return e.use_cond_insn (icode); | |
995 } | |
996 | |
997 /* The element mode that we're extending from. */ | |
998 scalar_int_mode m_from_mode; | |
999 }; | |
1000 | |
1001 /* Implements svget2, svget3 and svget4. */ | |
1002 class svget_impl : public quiet<multi_vector_function> | |
1003 { | |
1004 public: | |
1005 CONSTEXPR svget_impl (unsigned int vectors_per_tuple) | |
1006 : quiet<multi_vector_function> (vectors_per_tuple) {} | |
1007 | |
1008 gimple * | |
1009 fold (gimple_folder &f) const OVERRIDE | |
1010 { | |
1011 /* Fold into a normal gimple component access. */ | |
1012 tree rhs_tuple = gimple_call_arg (f.call, 0); | |
1013 tree index = gimple_call_arg (f.call, 1); | |
1014 tree field = tuple_type_field (TREE_TYPE (rhs_tuple)); | |
1015 tree rhs_array = build3 (COMPONENT_REF, TREE_TYPE (field), | |
1016 rhs_tuple, field, NULL_TREE); | |
1017 tree rhs_vector = build4 (ARRAY_REF, TREE_TYPE (f.lhs), | |
1018 rhs_array, index, NULL_TREE, NULL_TREE); | |
1019 return gimple_build_assign (f.lhs, rhs_vector); | |
1020 } | |
1021 | |
1022 rtx | |
1023 expand (function_expander &e) const OVERRIDE | |
1024 { | |
1025 /* Fold the access into a subreg rvalue. */ | |
1026 return simplify_gen_subreg (e.vector_mode (0), e.args[0], | |
1027 GET_MODE (e.args[0]), | |
1028 INTVAL (e.args[1]) * BYTES_PER_SVE_VECTOR); | |
1029 } | |
1030 }; | |
1031 | |
1032 class svindex_impl : public function_base | |
1033 { | |
1034 public: | |
1035 rtx | |
1036 expand (function_expander &e) const OVERRIDE | |
1037 { | |
1038 return e.use_exact_insn (e.direct_optab_handler (vec_series_optab)); | |
1039 } | |
1040 }; | |
1041 | |
1042 class svinsr_impl : public quiet<function_base> | |
1043 { | |
1044 public: | |
1045 gimple * | |
1046 fold (gimple_folder &f) const OVERRIDE | |
1047 { | |
1048 gcall *new_call = gimple_build_call_internal (IFN_VEC_SHL_INSERT, 2, | |
1049 gimple_call_arg (f.call, 0), | |
1050 gimple_call_arg (f.call, 1)); | |
1051 gimple_call_set_lhs (new_call, f.lhs); | |
1052 return new_call; | |
1053 } | |
1054 | |
1055 rtx | |
1056 expand (function_expander &e) const OVERRIDE | |
1057 { | |
1058 insn_code icode = direct_optab_handler (vec_shl_insert_optab, | |
1059 e.vector_mode (0)); | |
1060 return e.use_exact_insn (icode); | |
1061 } | |
1062 }; | |
1063 | |
1064 /* Implements svlasta and svlastb. */ | |
1065 class svlast_impl : public quiet<function_base> | |
1066 { | |
1067 public: | |
1068 CONSTEXPR svlast_impl (int unspec) : m_unspec (unspec) {} | |
1069 | |
1070 rtx | |
1071 expand (function_expander &e) const OVERRIDE | |
1072 { | |
1073 return e.use_exact_insn (code_for_extract (m_unspec, e.vector_mode (0))); | |
1074 } | |
1075 | |
1076 /* The unspec code associated with the operation. */ | |
1077 int m_unspec; | |
1078 }; | |
1079 | |
1080 class svld1_impl : public full_width_access | |
1081 { | |
1082 public: | |
1083 unsigned int | |
1084 call_properties (const function_instance &) const OVERRIDE | |
1085 { | |
1086 return CP_READ_MEMORY; | |
1087 } | |
1088 | |
1089 gimple * | |
1090 fold (gimple_folder &f) const OVERRIDE | |
1091 { | |
1092 tree vectype = f.vector_type (0); | |
1093 | |
1094 /* Get the predicate and base pointer. */ | |
1095 gimple_seq stmts = NULL; | |
1096 tree pred = f.convert_pred (stmts, vectype, 0); | |
1097 tree base = f.fold_contiguous_base (stmts, vectype); | |
1098 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); | |
1099 | |
1100 tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); | |
1101 gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 3, | |
1102 base, cookie, pred); | |
1103 gimple_call_set_lhs (new_call, f.lhs); | |
1104 return new_call; | |
1105 } | |
1106 | |
1107 rtx | |
1108 expand (function_expander &e) const OVERRIDE | |
1109 { | |
1110 insn_code icode = convert_optab_handler (maskload_optab, | |
1111 e.vector_mode (0), e.gp_mode (0)); | |
1112 return e.use_contiguous_load_insn (icode); | |
1113 } | |
1114 }; | |
1115 | |
1116 /* Implements extending contiguous forms of svld1. */ | |
1117 class svld1_extend_impl : public extending_load | |
1118 { | |
1119 public: | |
1120 CONSTEXPR svld1_extend_impl (type_suffix_index memory_type) | |
1121 : extending_load (memory_type) {} | |
1122 | |
1123 rtx | |
1124 expand (function_expander &e) const OVERRIDE | |
1125 { | |
1126 insn_code icode = code_for_aarch64_load (extend_rtx_code (), | |
1127 e.vector_mode (0), | |
1128 e.memory_vector_mode ()); | |
1129 return e.use_contiguous_load_insn (icode); | |
1130 } | |
1131 }; | |
1132 | |
1133 class svld1_gather_impl : public full_width_access | |
1134 { | |
1135 public: | |
1136 unsigned int | |
1137 call_properties (const function_instance &) const OVERRIDE | |
1138 { | |
1139 return CP_READ_MEMORY; | |
1140 } | |
1141 | |
1142 rtx | |
1143 expand (function_expander &e) const OVERRIDE | |
1144 { | |
1145 e.prepare_gather_address_operands (1); | |
1146 /* Put the predicate last, as required by mask_gather_load_optab. */ | |
1147 e.rotate_inputs_left (0, 5); | |
1148 machine_mode mem_mode = e.memory_vector_mode (); | |
1149 machine_mode int_mode = aarch64_sve_int_mode (mem_mode); | |
1150 insn_code icode = convert_optab_handler (mask_gather_load_optab, | |
1151 mem_mode, int_mode); | |
1152 return e.use_exact_insn (icode); | |
1153 } | |
1154 }; | |
1155 | |
1156 /* Implements extending forms of svld1_gather. */ | |
1157 class svld1_gather_extend_impl : public extending_load | |
1158 { | |
1159 public: | |
1160 CONSTEXPR svld1_gather_extend_impl (type_suffix_index memory_type) | |
1161 : extending_load (memory_type) {} | |
1162 | |
1163 rtx | |
1164 expand (function_expander &e) const OVERRIDE | |
1165 { | |
1166 e.prepare_gather_address_operands (1); | |
1167 /* Put the predicate last, since the extending gathers use the same | |
1168 operand order as mask_gather_load_optab. */ | |
1169 e.rotate_inputs_left (0, 5); | |
1170 /* Add a constant predicate for the extension rtx. */ | |
1171 e.args.quick_push (CONSTM1_RTX (VNx16BImode)); | |
1172 insn_code icode = code_for_aarch64_gather_load (extend_rtx_code (), | |
1173 e.vector_mode (0), | |
1174 e.memory_vector_mode ()); | |
1175 return e.use_exact_insn (icode); | |
1176 } | |
1177 }; | |
1178 | |
1179 class load_replicate : public function_base | |
1180 { | |
1181 public: | |
1182 unsigned int | |
1183 call_properties (const function_instance &) const OVERRIDE | |
1184 { | |
1185 return CP_READ_MEMORY; | |
1186 } | |
1187 | |
1188 tree | |
1189 memory_scalar_type (const function_instance &fi) const OVERRIDE | |
1190 { | |
1191 return fi.scalar_type (0); | |
1192 } | |
1193 }; | |
1194 | |
1195 class svld1rq_impl : public load_replicate | |
1196 { | |
1197 public: | |
1198 machine_mode | |
1199 memory_vector_mode (const function_instance &fi) const OVERRIDE | |
1200 { | |
1201 return aarch64_vq_mode (GET_MODE_INNER (fi.vector_mode (0))).require (); | |
1202 } | |
1203 | |
1204 rtx | |
1205 expand (function_expander &e) const OVERRIDE | |
1206 { | |
1207 insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0)); | |
1208 return e.use_contiguous_load_insn (icode); | |
1209 } | |
1210 }; | |
1211 | |
1212 class svld1ro_impl : public load_replicate | |
1213 { | |
1214 public: | |
1215 machine_mode | |
1216 memory_vector_mode (const function_instance &) const OVERRIDE | |
1217 { | |
1218 return OImode; | |
1219 } | |
1220 | |
1221 rtx | |
1222 expand (function_expander &e) const OVERRIDE | |
1223 { | |
1224 insn_code icode = code_for_aarch64_sve_ld1ro (e.vector_mode (0)); | |
1225 return e.use_contiguous_load_insn (icode); | |
1226 } | |
1227 }; | |
1228 | |
1229 /* Implements svld2, svld3 and svld4. */ | |
1230 class svld234_impl : public full_width_access | |
1231 { | |
1232 public: | |
1233 CONSTEXPR svld234_impl (unsigned int vectors_per_tuple) | |
1234 : full_width_access (vectors_per_tuple) {} | |
1235 | |
1236 unsigned int | |
1237 call_properties (const function_instance &) const OVERRIDE | |
1238 { | |
1239 return CP_READ_MEMORY; | |
1240 } | |
1241 | |
1242 gimple * | |
1243 fold (gimple_folder &f) const OVERRIDE | |
1244 { | |
1245 tree tuple_type = TREE_TYPE (f.lhs); | |
1246 tree vectype = f.vector_type (0); | |
1247 | |
1248 /* Get the predicate and base pointer. */ | |
1249 gimple_seq stmts = NULL; | |
1250 tree pred = f.convert_pred (stmts, vectype, 0); | |
1251 tree base = f.fold_contiguous_base (stmts, vectype); | |
1252 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); | |
1253 | |
1254 /* Emit two statements: a clobber of the lhs, so that it isn't | |
1255 upwards exposed, and then the load itself. | |
1256 | |
1257 The fold routines expect the replacement statement to have the | |
1258 same lhs as the original call, so return the clobber statement | |
1259 rather than the load. */ | |
1260 gimple *clobber = gimple_build_assign (f.lhs, build_clobber (tuple_type)); | |
1261 | |
1262 /* View the loaded data as an array of vectors. */ | |
1263 tree field = tuple_type_field (tuple_type); | |
1264 tree lhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field), | |
1265 unshare_expr (f.lhs)); | |
1266 | |
1267 /* Emit the load itself. */ | |
1268 tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); | |
1269 gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3, | |
1270 base, cookie, pred); | |
1271 gimple_call_set_lhs (new_call, lhs_array); | |
1272 gsi_insert_after (f.gsi, new_call, GSI_SAME_STMT); | |
1273 | |
1274 return clobber; | |
1275 } | |
1276 | |
1277 rtx | |
1278 expand (function_expander &e) const OVERRIDE | |
1279 { | |
1280 machine_mode tuple_mode = TYPE_MODE (TREE_TYPE (e.call_expr)); | |
1281 insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab, | |
1282 tuple_mode, e.vector_mode (0)); | |
1283 return e.use_contiguous_load_insn (icode); | |
1284 } | |
1285 }; | |
1286 | |
1287 class svldff1_gather_impl : public full_width_access | |
1288 { | |
1289 public: | |
1290 unsigned int | |
1291 call_properties (const function_instance &) const OVERRIDE | |
1292 { | |
1293 return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR; | |
1294 } | |
1295 | |
1296 rtx | |
1297 expand (function_expander &e) const OVERRIDE | |
1298 { | |
1299 /* See the block comment in aarch64-sve.md for details about the | |
1300 FFR handling. */ | |
1301 emit_insn (gen_aarch64_update_ffr_for_load ()); | |
1302 | |
1303 e.prepare_gather_address_operands (1); | |
1304 /* Put the predicate last, since ldff1_gather uses the same operand | |
1305 order as mask_gather_load_optab. */ | |
1306 e.rotate_inputs_left (0, 5); | |
1307 machine_mode mem_mode = e.memory_vector_mode (); | |
1308 return e.use_exact_insn (code_for_aarch64_ldff1_gather (mem_mode)); | |
1309 } | |
1310 }; | |
1311 | |
1312 /* Implements extending forms of svldff1_gather. */ | |
1313 class svldff1_gather_extend : public extending_load | |
1314 { | |
1315 public: | |
1316 CONSTEXPR svldff1_gather_extend (type_suffix_index memory_type) | |
1317 : extending_load (memory_type) {} | |
1318 | |
1319 rtx | |
1320 expand (function_expander &e) const OVERRIDE | |
1321 { | |
1322 /* See the block comment in aarch64-sve.md for details about the | |
1323 FFR handling. */ | |
1324 emit_insn (gen_aarch64_update_ffr_for_load ()); | |
1325 | |
1326 e.prepare_gather_address_operands (1); | |
1327 /* Put the predicate last, since ldff1_gather uses the same operand | |
1328 order as mask_gather_load_optab. */ | |
1329 e.rotate_inputs_left (0, 5); | |
1330 /* Add a constant predicate for the extension rtx. */ | |
1331 e.args.quick_push (CONSTM1_RTX (VNx16BImode)); | |
1332 insn_code icode = code_for_aarch64_ldff1_gather (extend_rtx_code (), | |
1333 e.vector_mode (0), | |
1334 e.memory_vector_mode ()); | |
1335 return e.use_exact_insn (icode); | |
1336 } | |
1337 }; | |
1338 | |
1339 class svldnt1_impl : public full_width_access | |
1340 { | |
1341 public: | |
1342 unsigned int | |
1343 call_properties (const function_instance &) const OVERRIDE | |
1344 { | |
1345 return CP_READ_MEMORY; | |
1346 } | |
1347 | |
1348 rtx | |
1349 expand (function_expander &e) const OVERRIDE | |
1350 { | |
1351 insn_code icode = code_for_aarch64_ldnt1 (e.vector_mode (0)); | |
1352 return e.use_contiguous_load_insn (icode); | |
1353 } | |
1354 }; | |
1355 | |
1356 /* Implements svldff1 and svldnf1. */ | |
1357 class svldxf1_impl : public full_width_access | |
1358 { | |
1359 public: | |
1360 CONSTEXPR svldxf1_impl (int unspec) : m_unspec (unspec) {} | |
1361 | |
1362 unsigned int | |
1363 call_properties (const function_instance &) const OVERRIDE | |
1364 { | |
1365 return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR; | |
1366 } | |
1367 | |
1368 rtx | |
1369 expand (function_expander &e) const OVERRIDE | |
1370 { | |
1371 /* See the block comment in aarch64-sve.md for details about the | |
1372 FFR handling. */ | |
1373 emit_insn (gen_aarch64_update_ffr_for_load ()); | |
1374 | |
1375 machine_mode mode = e.vector_mode (0); | |
1376 return e.use_contiguous_load_insn (code_for_aarch64_ldf1 (m_unspec, mode)); | |
1377 } | |
1378 | |
1379 /* The unspec associated with the load. */ | |
1380 int m_unspec; | |
1381 }; | |
1382 | |
1383 /* Implements extending contiguous forms of svldff1 and svldnf1. */ | |
1384 class svldxf1_extend_impl : public extending_load | |
1385 { | |
1386 public: | |
1387 CONSTEXPR svldxf1_extend_impl (type_suffix_index memory_type, int unspec) | |
1388 : extending_load (memory_type), m_unspec (unspec) {} | |
1389 | |
1390 unsigned int | |
1391 call_properties (const function_instance &) const OVERRIDE | |
1392 { | |
1393 return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR; | |
1394 } | |
1395 | |
1396 rtx | |
1397 expand (function_expander &e) const OVERRIDE | |
1398 { | |
1399 /* See the block comment in aarch64-sve.md for details about the | |
1400 FFR handling. */ | |
1401 emit_insn (gen_aarch64_update_ffr_for_load ()); | |
1402 | |
1403 insn_code icode = code_for_aarch64_ldf1 (m_unspec, extend_rtx_code (), | |
1404 e.vector_mode (0), | |
1405 e.memory_vector_mode ()); | |
1406 return e.use_contiguous_load_insn (icode); | |
1407 } | |
1408 | |
1409 /* The unspec associated with the load. */ | |
1410 int m_unspec; | |
1411 }; | |
1412 | |
1413 class svlen_impl : public quiet<function_base> | |
1414 { | |
1415 public: | |
1416 gimple * | |
1417 fold (gimple_folder &f) const OVERRIDE | |
1418 { | |
1419 /* The argument only exists for its type. */ | |
1420 tree rhs_type = TREE_TYPE (gimple_call_arg (f.call, 0)); | |
1421 tree count = build_int_cstu (TREE_TYPE (f.lhs), | |
1422 TYPE_VECTOR_SUBPARTS (rhs_type)); | |
1423 return gimple_build_assign (f.lhs, count); | |
1424 } | |
1425 | |
1426 rtx | |
1427 expand (function_expander &e) const OVERRIDE | |
1428 { | |
1429 /* The argument only exists for its type. */ | |
1430 return gen_int_mode (GET_MODE_NUNITS (e.vector_mode (0)), DImode); | |
1431 } | |
1432 }; | |
1433 | |
1434 class svmad_impl : public function_base | |
1435 { | |
1436 public: | |
1437 rtx | |
1438 expand (function_expander &e) const OVERRIDE | |
1439 { | |
1440 return expand_mad (e); | |
1441 } | |
1442 }; | |
1443 | |
1444 class svmla_impl : public function_base | |
1445 { | |
1446 public: | |
1447 rtx | |
1448 expand (function_expander &e) const OVERRIDE | |
1449 { | |
1450 /* Put the accumulator at the end (argument 3), but keep it as the | |
1451 merge input for _m functions. */ | |
1452 e.rotate_inputs_left (1, 4); | |
1453 return expand_mad (e, 3); | |
1454 } | |
1455 }; | |
1456 | |
1457 class svmla_lane_impl : public function_base | |
1458 { | |
1459 public: | |
1460 rtx | |
1461 expand (function_expander &e) const OVERRIDE | |
1462 { | |
1463 if (e.type_suffix (0).integer_p) | |
1464 { | |
1465 machine_mode mode = e.vector_mode (0); | |
1466 return e.use_exact_insn (code_for_aarch64_sve_add_mul_lane (mode)); | |
1467 } | |
1468 return expand_mla_mls_lane (e, UNSPEC_FMLA); | |
1469 } | |
1470 }; | |
1471 | |
1472 class svmls_impl : public function_base | |
1473 { | |
1474 public: | |
1475 rtx | |
1476 expand (function_expander &e) const OVERRIDE | |
1477 { | |
1478 /* Put the accumulator at the end (argument 3), but keep it as the | |
1479 merge input for _m functions. */ | |
1480 e.rotate_inputs_left (1, 4); | |
1481 return expand_msb (e, 3); | |
1482 } | |
1483 }; | |
1484 | |
1485 class svmov_impl : public function_base | |
1486 { | |
1487 public: | |
1488 gimple * | |
1489 fold (gimple_folder &f) const OVERRIDE | |
1490 { | |
1491 return gimple_build_assign (f.lhs, BIT_AND_EXPR, | |
1492 gimple_call_arg (f.call, 0), | |
1493 gimple_call_arg (f.call, 1)); | |
1494 } | |
1495 | |
1496 rtx | |
1497 expand (function_expander &e) const OVERRIDE | |
1498 { | |
1499 /* The canonical form for the assembler alias "MOV Pa.B, Pb/Z, Pc.B" | |
1500 is "AND Pa.B, Pb/Z, Pc.B, Pc.B". */ | |
1501 gcc_assert (e.pred == PRED_z); | |
1502 e.args.quick_push (e.args[1]); | |
1503 return e.use_exact_insn (CODE_FOR_aarch64_pred_andvnx16bi_z); | |
1504 } | |
1505 }; | |
1506 | |
1507 class svmls_lane_impl : public function_base | |
1508 { | |
1509 public: | |
1510 rtx | |
1511 expand (function_expander &e) const OVERRIDE | |
1512 { | |
1513 if (e.type_suffix (0).integer_p) | |
1514 { | |
1515 machine_mode mode = e.vector_mode (0); | |
1516 return e.use_exact_insn (code_for_aarch64_sve_sub_mul_lane (mode)); | |
1517 } | |
1518 return expand_mla_mls_lane (e, UNSPEC_FMLS); | |
1519 } | |
1520 }; | |
1521 | |
1522 class svmmla_impl : public function_base | |
1523 { | |
1524 public: | |
1525 rtx | |
1526 expand (function_expander &e) const OVERRIDE | |
1527 { | |
1528 insn_code icode; | |
1529 if (e.type_suffix (0).integer_p) | |
1530 { | |
1531 if (e.type_suffix (0).unsigned_p) | |
1532 icode = code_for_aarch64_sve_add (UNSPEC_UMATMUL, e.vector_mode (0)); | |
1533 else | |
1534 icode = code_for_aarch64_sve_add (UNSPEC_SMATMUL, e.vector_mode (0)); | |
1535 } | |
1536 else | |
1537 icode = code_for_aarch64_sve (UNSPEC_FMMLA, e.vector_mode (0)); | |
1538 return e.use_exact_insn (icode); | |
1539 } | |
1540 }; | |
1541 | |
1542 class svmsb_impl : public function_base | |
1543 { | |
1544 public: | |
1545 rtx | |
1546 expand (function_expander &e) const OVERRIDE | |
1547 { | |
1548 return expand_msb (e); | |
1549 } | |
1550 }; | |
1551 | |
1552 class svnand_impl : public function_base | |
1553 { | |
1554 public: | |
1555 rtx | |
1556 expand (function_expander &e) const OVERRIDE | |
1557 { | |
1558 gcc_assert (e.pred == PRED_z); | |
1559 return e.use_exact_insn (CODE_FOR_aarch64_pred_nandvnx16bi_z); | |
1560 } | |
1561 }; | |
1562 | |
1563 class svnor_impl : public function_base | |
1564 { | |
1565 public: | |
1566 rtx | |
1567 expand (function_expander &e) const OVERRIDE | |
1568 { | |
1569 gcc_assert (e.pred == PRED_z); | |
1570 return e.use_exact_insn (CODE_FOR_aarch64_pred_norvnx16bi_z); | |
1571 } | |
1572 }; | |
1573 | |
1574 class svnot_impl : public rtx_code_function | |
1575 { | |
1576 public: | |
1577 CONSTEXPR svnot_impl () : rtx_code_function (NOT, NOT, -1) {} | |
1578 | |
1579 rtx | |
1580 expand (function_expander &e) const OVERRIDE | |
1581 { | |
1582 if (e.type_suffix_ids[0] == TYPE_SUFFIX_b) | |
1583 { | |
1584 /* The canonical form for the assembler alias "NOT Pa.B, Pb/Z, Pc.B" | |
1585 is "EOR Pa.B, Pb/Z, Pb.B, Pc.B". */ | |
1586 gcc_assert (e.pred == PRED_z); | |
1587 e.args.quick_insert (1, e.args[0]); | |
1588 return e.use_exact_insn (CODE_FOR_aarch64_pred_xorvnx16bi_z); | |
1589 } | |
1590 return rtx_code_function::expand (e); | |
1591 } | |
1592 }; | |
1593 | |
1594 class svorn_impl : public function_base | |
1595 { | |
1596 public: | |
1597 rtx | |
1598 expand (function_expander &e) const OVERRIDE | |
1599 { | |
1600 gcc_assert (e.pred == PRED_z); | |
1601 return e.use_exact_insn (CODE_FOR_aarch64_pred_ornvnx16bi_z); | |
1602 } | |
1603 }; | |
1604 | |
1605 class svpfalse_impl : public function_base | |
1606 { | |
1607 public: | |
1608 gimple * | |
1609 fold (gimple_folder &f) const OVERRIDE | |
1610 { | |
1611 return f.fold_to_pfalse (); | |
1612 } | |
1613 | |
1614 rtx | |
1615 expand (function_expander &) const OVERRIDE | |
1616 { | |
1617 return CONST0_RTX (VNx16BImode); | |
1618 } | |
1619 }; | |
1620 | |
1621 /* Implements svpfirst and svpnext, which share the same .md patterns. */ | |
1622 class svpfirst_svpnext_impl : public function_base | |
1623 { | |
1624 public: | |
1625 CONSTEXPR svpfirst_svpnext_impl (int unspec) : m_unspec (unspec) {} | |
1626 | |
1627 rtx | |
1628 expand (function_expander &e) const OVERRIDE | |
1629 { | |
1630 machine_mode mode = e.vector_mode (0); | |
1631 e.add_ptrue_hint (0, mode); | |
1632 return e.use_exact_insn (code_for_aarch64_sve (m_unspec, mode)); | |
1633 } | |
1634 | |
1635 /* The unspec associated with the operation. */ | |
1636 int m_unspec; | |
1637 }; | |
1638 | |
1639 /* Implements contiguous forms of svprf[bhwd]. */ | |
1640 class svprf_bhwd_impl : public function_base | |
1641 { | |
1642 public: | |
1643 CONSTEXPR svprf_bhwd_impl (machine_mode mode) : m_mode (mode) {} | |
1644 | |
1645 unsigned int | |
1646 call_properties (const function_instance &) const OVERRIDE | |
1647 { | |
1648 return CP_PREFETCH_MEMORY; | |
1649 } | |
1650 | |
1651 rtx | |
1652 expand (function_expander &e) const OVERRIDE | |
1653 { | |
1654 e.prepare_prefetch_operands (); | |
1655 insn_code icode = code_for_aarch64_sve_prefetch (m_mode); | |
1656 return e.use_contiguous_prefetch_insn (icode); | |
1657 } | |
1658 | |
1659 /* The mode that we'd use to hold one vector of prefetched data. */ | |
1660 machine_mode m_mode; | |
1661 }; | |
1662 | |
1663 /* Implements svprf[bhwd]_gather. */ | |
1664 class svprf_bhwd_gather_impl : public function_base | |
1665 { | |
1666 public: | |
1667 CONSTEXPR svprf_bhwd_gather_impl (machine_mode mode) : m_mode (mode) {} | |
1668 | |
1669 unsigned int | |
1670 call_properties (const function_instance &) const OVERRIDE | |
1671 { | |
1672 return CP_PREFETCH_MEMORY; | |
1673 } | |
1674 | |
1675 machine_mode | |
1676 memory_vector_mode (const function_instance &) const OVERRIDE | |
1677 { | |
1678 return m_mode; | |
1679 } | |
1680 | |
1681 rtx | |
1682 expand (function_expander &e) const OVERRIDE | |
1683 { | |
1684 e.prepare_prefetch_operands (); | |
1685 e.prepare_gather_address_operands (1); | |
1686 | |
1687 /* Insert a zero operand to identify the mode of the memory being | |
1688 accessed. This goes between the gather operands and prefetch | |
1689 operands created above. */ | |
1690 e.args.quick_insert (5, CONST0_RTX (m_mode)); | |
1691 | |
1692 machine_mode reg_mode = GET_MODE (e.args[2]); | |
1693 insn_code icode = code_for_aarch64_sve_gather_prefetch (m_mode, reg_mode); | |
1694 return e.use_exact_insn (icode); | |
1695 } | |
1696 | |
1697 /* The mode that we'd use to hold one vector of prefetched data. */ | |
1698 machine_mode m_mode; | |
1699 }; | |
1700 | |
1701 /* Implements svptest_any, svptest_first and svptest_last. */ | |
1702 class svptest_impl : public function_base | |
1703 { | |
1704 public: | |
1705 CONSTEXPR svptest_impl (rtx_code compare) : m_compare (compare) {} | |
1706 | |
1707 rtx | |
1708 expand (function_expander &e) const OVERRIDE | |
1709 { | |
1710 /* See whether GP is an exact ptrue for some predicate mode; | |
1711 i.e. whether converting the GP to that mode will not drop | |
1712 set bits and will leave all significant bits set. */ | |
1713 machine_mode wide_mode; | |
1714 int hint; | |
1715 if (aarch64_ptrue_all_mode (e.args[0]).exists (&wide_mode)) | |
1716 hint = SVE_KNOWN_PTRUE; | |
1717 else | |
1718 { | |
1719 hint = SVE_MAYBE_NOT_PTRUE; | |
1720 wide_mode = VNx16BImode; | |
1721 } | |
1722 | |
1723 /* Generate the PTEST itself. */ | |
1724 rtx pg = force_reg (VNx16BImode, e.args[0]); | |
1725 rtx wide_pg = gen_lowpart (wide_mode, pg); | |
1726 rtx hint_rtx = gen_int_mode (hint, DImode); | |
1727 rtx op = force_reg (wide_mode, gen_lowpart (wide_mode, e.args[1])); | |
1728 emit_insn (gen_aarch64_ptestvnx16bi (pg, wide_pg, hint_rtx, op)); | |
1729 | |
1730 /* Get the location of the boolean result. We can provide SImode and | |
1731 DImode values directly; rely on generic code to convert others. */ | |
1732 rtx target = e.possible_target; | |
1733 if (!target | |
1734 || !REG_P (target) | |
1735 || (GET_MODE (target) != SImode && GET_MODE (target) != DImode)) | |
1736 target = gen_reg_rtx (DImode); | |
1737 | |
1738 /* Generate a CSET to convert the CC result of the PTEST to a boolean. */ | |
1739 rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); | |
1740 rtx compare = gen_rtx_fmt_ee (m_compare, GET_MODE (target), | |
1741 cc_reg, const0_rtx); | |
1742 emit_insn (gen_rtx_SET (target, compare)); | |
1743 return target; | |
1744 } | |
1745 | |
1746 /* The comparison code associated with ptest condition. */ | |
1747 rtx_code m_compare; | |
1748 }; | |
1749 | |
1750 class svptrue_impl : public function_base | |
1751 { | |
1752 public: | |
1753 gimple * | |
1754 fold (gimple_folder &f) const OVERRIDE | |
1755 { | |
1756 return f.fold_to_ptrue (); | |
1757 } | |
1758 | |
1759 rtx | |
1760 expand (function_expander &e) const OVERRIDE | |
1761 { | |
1762 return aarch64_ptrue_all (e.type_suffix (0).element_bytes); | |
1763 } | |
1764 }; | |
1765 | |
1766 class svptrue_pat_impl : public function_base | |
1767 { | |
1768 public: | |
1769 gimple * | |
1770 fold (gimple_folder &f) const OVERRIDE | |
1771 { | |
1772 tree pattern_arg = gimple_call_arg (f.call, 0); | |
1773 aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg); | |
1774 | |
1775 if (pattern == AARCH64_SV_ALL) | |
1776 /* svptrue_pat_bN (SV_ALL) == svptrue_bN (). */ | |
1777 return f.fold_to_ptrue (); | |
1778 | |
1779 /* See whether we can count the number of elements in the pattern | |
1780 at compile time. If so, construct a predicate with that number | |
1781 of 1s followed by all 0s. */ | |
1782 int nelts_per_vq = f.elements_per_vq (0); | |
1783 HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, nelts_per_vq); | |
1784 if (value >= 0) | |
1785 return f.fold_to_vl_pred (value); | |
1786 | |
1787 return NULL; | |
1788 } | |
1789 | |
1790 rtx | |
1791 expand (function_expander &e) const OVERRIDE | |
1792 { | |
1793 /* In rtl, the predicate is represented as the constant: | |
1794 | |
1795 (const:V16BI (unspec:V16BI [(const_int PATTERN) | |
1796 (const_vector:VnnBI [zeros])] | |
1797 UNSPEC_PTRUE)) | |
1798 | |
1799 where nn determines the element size. */ | |
1800 rtvec vec = gen_rtvec (2, e.args[0], CONST0_RTX (e.vector_mode (0))); | |
1801 return gen_rtx_CONST (VNx16BImode, | |
1802 gen_rtx_UNSPEC (VNx16BImode, vec, UNSPEC_PTRUE)); | |
1803 } | |
1804 }; | |
1805 | |
1806 /* Implements svqdec[bhwd]{,_pat} and svqinc[bhwd]{,_pat}. */ | |
1807 class svqdec_svqinc_bhwd_impl : public function_base | |
1808 { | |
1809 public: | |
1810 CONSTEXPR svqdec_svqinc_bhwd_impl (rtx_code code_for_sint, | |
1811 rtx_code code_for_uint, | |
1812 scalar_int_mode elem_mode) | |
1813 : m_code_for_sint (code_for_sint), | |
1814 m_code_for_uint (code_for_uint), | |
1815 m_elem_mode (elem_mode) | |
1816 {} | |
1817 | |
1818 rtx | |
1819 expand (function_expander &e) const OVERRIDE | |
1820 { | |
1821 /* Treat non-_pat functions in the same way as _pat functions with | |
1822 an SV_ALL argument. */ | |
1823 if (e.args.length () == 2) | |
1824 e.args.quick_insert (1, gen_int_mode (AARCH64_SV_ALL, DImode)); | |
1825 | |
1826 /* Insert the number of elements per 128-bit block as a fake argument, | |
1827 between the pattern and the multiplier. Arguments 1, 2 and 3 then | |
1828 correspond exactly with the 3 UNSPEC_SVE_CNT_PAT operands; see | |
1829 aarch64_sve_cnt_pat for details. */ | |
1830 unsigned int elements_per_vq = 128 / GET_MODE_BITSIZE (m_elem_mode); | |
1831 e.args.quick_insert (2, gen_int_mode (elements_per_vq, DImode)); | |
1832 | |
1833 rtx_code code = (e.type_suffix (0).unsigned_p | |
1834 ? m_code_for_uint | |
1835 : m_code_for_sint); | |
1836 | |
1837 /* Choose between operating on integer scalars or integer vectors. */ | |
1838 machine_mode mode = e.vector_mode (0); | |
1839 if (e.mode_suffix_id == MODE_n) | |
1840 mode = GET_MODE_INNER (mode); | |
1841 return e.use_exact_insn (code_for_aarch64_sve_pat (code, mode)); | |
1842 } | |
1843 | |
1844 /* The saturating addition or subtraction codes to use for signed and | |
1845 unsigned values respectively. */ | |
1846 rtx_code m_code_for_sint; | |
1847 rtx_code m_code_for_uint; | |
1848 | |
1849 /* The integer mode associated with the [bhwd] suffix. */ | |
1850 scalar_int_mode m_elem_mode; | |
1851 }; | |
1852 | |
1853 /* Implements svqdec[bhwd]{,_pat}. */ | |
1854 class svqdec_bhwd_impl : public svqdec_svqinc_bhwd_impl | |
1855 { | |
1856 public: | |
1857 CONSTEXPR svqdec_bhwd_impl (scalar_int_mode elem_mode) | |
1858 : svqdec_svqinc_bhwd_impl (SS_MINUS, US_MINUS, elem_mode) {} | |
1859 }; | |
1860 | |
1861 /* Implements svqinc[bhwd]{,_pat}. */ | |
1862 class svqinc_bhwd_impl : public svqdec_svqinc_bhwd_impl | |
1863 { | |
1864 public: | |
1865 CONSTEXPR svqinc_bhwd_impl (scalar_int_mode elem_mode) | |
1866 : svqdec_svqinc_bhwd_impl (SS_PLUS, US_PLUS, elem_mode) {} | |
1867 }; | |
1868 | |
1869 /* Implements svqdecp and svqincp. */ | |
1870 class svqdecp_svqincp_impl : public function_base | |
1871 { | |
1872 public: | |
1873 CONSTEXPR svqdecp_svqincp_impl (rtx_code code_for_sint, | |
1874 rtx_code code_for_uint) | |
1875 : m_code_for_sint (code_for_sint), | |
1876 m_code_for_uint (code_for_uint) | |
1877 {} | |
1878 | |
1879 rtx | |
1880 expand (function_expander &e) const OVERRIDE | |
1881 { | |
1882 rtx_code code = (e.type_suffix (0).unsigned_p | |
1883 ? m_code_for_uint | |
1884 : m_code_for_sint); | |
1885 insn_code icode; | |
1886 if (e.mode_suffix_id == MODE_n) | |
1887 { | |
1888 /* Increment or decrement a scalar (whose mode is given by the first | |
1889 type suffix) by the number of active elements in a predicate | |
1890 (whose mode is given by the second type suffix). */ | |
1891 machine_mode mode = GET_MODE_INNER (e.vector_mode (0)); | |
1892 icode = code_for_aarch64_sve_cntp (code, mode, e.vector_mode (1)); | |
1893 } | |
1894 else | |
1895 /* Increment a vector by the number of active elements in a predicate, | |
1896 with the vector mode determining the predicate mode. */ | |
1897 icode = code_for_aarch64_sve_cntp (code, e.vector_mode (0)); | |
1898 return e.use_exact_insn (icode); | |
1899 } | |
1900 | |
1901 /* The saturating addition or subtraction codes to use for signed and | |
1902 unsigned values respectively. */ | |
1903 rtx_code m_code_for_sint; | |
1904 rtx_code m_code_for_uint; | |
1905 }; | |
1906 | |
1907 class svrdffr_impl : public function_base | |
1908 { | |
1909 public: | |
1910 unsigned int | |
1911 call_properties (const function_instance &) const OVERRIDE | |
1912 { | |
1913 return CP_READ_FFR; | |
1914 } | |
1915 | |
1916 rtx | |
1917 expand (function_expander &e) const OVERRIDE | |
1918 { | |
1919 /* See the block comment in aarch64-sve.md for details about the | |
1920 FFR handling. */ | |
1921 emit_insn (gen_aarch64_copy_ffr_to_ffrt ()); | |
1922 rtx result = e.use_exact_insn (e.pred == PRED_z | |
1923 ? CODE_FOR_aarch64_rdffr_z | |
1924 : CODE_FOR_aarch64_rdffr); | |
1925 emit_insn (gen_aarch64_update_ffrt ()); | |
1926 return result; | |
1927 } | |
1928 }; | |
1929 | |
1930 class svreinterpret_impl : public quiet<function_base> | |
1931 { | |
1932 public: | |
1933 gimple * | |
1934 fold (gimple_folder &f) const OVERRIDE | |
1935 { | |
1936 /* Punt to rtl if the effect of the reinterpret on registers does not | |
1937 conform to GCC's endianness model. */ | |
1938 if (!targetm.can_change_mode_class (f.vector_mode (0), | |
1939 f.vector_mode (1), FP_REGS)) | |
1940 return NULL; | |
1941 | |
1942 /* Otherwise svreinterpret corresponds directly to a VIEW_CONVERT_EXPR | |
1943 reinterpretation. */ | |
1944 tree rhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (f.lhs), | |
1945 gimple_call_arg (f.call, 0)); | |
1946 return gimple_build_assign (f.lhs, VIEW_CONVERT_EXPR, rhs); | |
1947 } | |
1948 | |
1949 rtx | |
1950 expand (function_expander &e) const OVERRIDE | |
1951 { | |
1952 machine_mode mode = e.vector_mode (0); | |
1953 return e.use_exact_insn (code_for_aarch64_sve_reinterpret (mode)); | |
1954 } | |
1955 }; | |
1956 | |
1957 class svrev_impl : public permute | |
1958 { | |
1959 public: | |
1960 gimple * | |
1961 fold (gimple_folder &f) const OVERRIDE | |
1962 { | |
1963 /* Punt for now on _b16 and wider; we'd need more complex evpc logic | |
1964 to rerecognize the result. */ | |
1965 if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8) | |
1966 return NULL; | |
1967 | |
1968 /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }. */ | |
1969 poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); | |
1970 vec_perm_builder builder (nelts, 1, 3); | |
1971 for (int i = 0; i < 3; ++i) | |
1972 builder.quick_push (nelts - i - 1); | |
1973 return fold_permute (f, builder); | |
1974 } | |
1975 | |
1976 rtx | |
1977 expand (function_expander &e) const OVERRIDE | |
1978 { | |
1979 return e.use_exact_insn (code_for_aarch64_sve_rev (e.vector_mode (0))); | |
1980 } | |
1981 }; | |
1982 | |
1983 class svsel_impl : public quiet<function_base> | |
1984 { | |
1985 public: | |
1986 gimple * | |
1987 fold (gimple_folder &f) const OVERRIDE | |
1988 { | |
1989 /* svsel corresponds exactly to VEC_COND_EXPR. */ | |
1990 gimple_seq stmts = NULL; | |
1991 tree pred = f.convert_pred (stmts, f.vector_type (0), 0); | |
1992 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); | |
1993 return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, | |
1994 gimple_call_arg (f.call, 1), | |
1995 gimple_call_arg (f.call, 2)); | |
1996 } | |
1997 | |
1998 rtx | |
1999 expand (function_expander &e) const OVERRIDE | |
2000 { | |
2001 /* svsel (cond, truev, falsev) is vcond_mask (truev, falsev, cond). */ | |
2002 e.rotate_inputs_left (0, 3); | |
2003 insn_code icode = convert_optab_handler (vcond_mask_optab, | |
2004 e.vector_mode (0), | |
2005 e.gp_mode (0)); | |
2006 return e.use_exact_insn (icode); | |
2007 } | |
2008 }; | |
2009 | |
2010 /* Implements svset2, svset3 and svset4. */ | |
2011 class svset_impl : public quiet<multi_vector_function> | |
2012 { | |
2013 public: | |
2014 CONSTEXPR svset_impl (unsigned int vectors_per_tuple) | |
2015 : quiet<multi_vector_function> (vectors_per_tuple) {} | |
2016 | |
2017 gimple * | |
2018 fold (gimple_folder &f) const OVERRIDE | |
2019 { | |
2020 tree rhs_tuple = gimple_call_arg (f.call, 0); | |
2021 tree index = gimple_call_arg (f.call, 1); | |
2022 tree rhs_vector = gimple_call_arg (f.call, 2); | |
2023 | |
2024 /* Replace the call with two statements: a copy of the full tuple | |
2025 to the call result, followed by an update of the individual vector. | |
2026 | |
2027 The fold routines expect the replacement statement to have the | |
2028 same lhs as the original call, so return the copy statement | |
2029 rather than the field update. */ | |
2030 gassign *copy = gimple_build_assign (unshare_expr (f.lhs), rhs_tuple); | |
2031 | |
2032 /* Get a reference to the individual vector. */ | |
2033 tree field = tuple_type_field (TREE_TYPE (f.lhs)); | |
2034 tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field), | |
2035 f.lhs, field, NULL_TREE); | |
2036 tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector), | |
2037 lhs_array, index, NULL_TREE, NULL_TREE); | |
2038 gassign *update = gimple_build_assign (lhs_vector, rhs_vector); | |
2039 gsi_insert_after (f.gsi, update, GSI_SAME_STMT); | |
2040 | |
2041 return copy; | |
2042 } | |
2043 | |
2044 rtx | |
2045 expand (function_expander &e) const OVERRIDE | |
2046 { | |
2047 rtx rhs_tuple = e.args[0]; | |
2048 unsigned int index = INTVAL (e.args[1]); | |
2049 rtx rhs_vector = e.args[2]; | |
2050 | |
2051 /* First copy the full tuple to the target register. */ | |
2052 rtx lhs_tuple = e.get_nonoverlapping_reg_target (); | |
2053 emit_move_insn (lhs_tuple, rhs_tuple); | |
2054 | |
2055 /* ...then update the individual vector. */ | |
2056 rtx lhs_vector = simplify_gen_subreg (GET_MODE (rhs_vector), | |
2057 lhs_tuple, GET_MODE (lhs_tuple), | |
2058 index * BYTES_PER_SVE_VECTOR); | |
2059 emit_move_insn (lhs_vector, rhs_vector); | |
2060 return lhs_vector; | |
2061 } | |
2062 }; | |
2063 | |
2064 class svsetffr_impl : public function_base | |
2065 { | |
2066 public: | |
2067 unsigned int | |
2068 call_properties (const function_instance &) const OVERRIDE | |
2069 { | |
2070 return CP_WRITE_FFR; | |
2071 } | |
2072 | |
2073 rtx | |
2074 expand (function_expander &e) const OVERRIDE | |
2075 { | |
2076 e.args.quick_push (CONSTM1_RTX (VNx16BImode)); | |
2077 return e.use_exact_insn (CODE_FOR_aarch64_wrffr); | |
2078 } | |
2079 }; | |
2080 | |
2081 class svst1_impl : public full_width_access | |
2082 { | |
2083 public: | |
2084 unsigned int | |
2085 call_properties (const function_instance &) const OVERRIDE | |
2086 { | |
2087 return CP_WRITE_MEMORY; | |
2088 } | |
2089 | |
2090 gimple * | |
2091 fold (gimple_folder &f) const OVERRIDE | |
2092 { | |
2093 tree vectype = f.vector_type (0); | |
2094 | |
2095 /* Get the predicate and base pointer. */ | |
2096 gimple_seq stmts = NULL; | |
2097 tree pred = f.convert_pred (stmts, vectype, 0); | |
2098 tree base = f.fold_contiguous_base (stmts, vectype); | |
2099 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); | |
2100 | |
2101 tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); | |
2102 tree rhs = gimple_call_arg (f.call, gimple_call_num_args (f.call) - 1); | |
2103 return gimple_build_call_internal (IFN_MASK_STORE, 4, | |
2104 base, cookie, pred, rhs); | |
2105 } | |
2106 | |
2107 rtx | |
2108 expand (function_expander &e) const OVERRIDE | |
2109 { | |
2110 insn_code icode = convert_optab_handler (maskstore_optab, | |
2111 e.vector_mode (0), e.gp_mode (0)); | |
2112 return e.use_contiguous_store_insn (icode); | |
2113 } | |
2114 }; | |
2115 | |
2116 class svst1_scatter_impl : public full_width_access | |
2117 { | |
2118 public: | |
2119 unsigned int | |
2120 call_properties (const function_instance &) const OVERRIDE | |
2121 { | |
2122 return CP_WRITE_MEMORY; | |
2123 } | |
2124 | |
2125 rtx | |
2126 expand (function_expander &e) const OVERRIDE | |
2127 { | |
2128 e.prepare_gather_address_operands (1); | |
2129 /* Put the predicate last, as required by mask_scatter_store_optab. */ | |
2130 e.rotate_inputs_left (0, 6); | |
2131 machine_mode mem_mode = e.memory_vector_mode (); | |
2132 machine_mode int_mode = aarch64_sve_int_mode (mem_mode); | |
2133 insn_code icode = convert_optab_handler (mask_scatter_store_optab, | |
2134 mem_mode, int_mode); | |
2135 return e.use_exact_insn (icode); | |
2136 } | |
2137 }; | |
2138 | |
2139 /* Implements truncating forms of svst1_scatter. */ | |
2140 class svst1_scatter_truncate_impl : public truncating_store | |
2141 { | |
2142 public: | |
2143 CONSTEXPR svst1_scatter_truncate_impl (scalar_int_mode to_mode) | |
2144 : truncating_store (to_mode) {} | |
2145 | |
2146 rtx | |
2147 expand (function_expander &e) const OVERRIDE | |
2148 { | |
2149 e.prepare_gather_address_operands (1); | |
2150 /* Put the predicate last, since the truncating scatters use the same | |
2151 operand order as mask_scatter_store_optab. */ | |
2152 e.rotate_inputs_left (0, 6); | |
2153 insn_code icode = code_for_aarch64_scatter_store_trunc | |
2154 (e.memory_vector_mode (), e.vector_mode (0)); | |
2155 return e.use_exact_insn (icode); | |
2156 } | |
2157 }; | |
2158 | |
2159 /* Implements truncating contiguous forms of svst1. */ | |
2160 class svst1_truncate_impl : public truncating_store | |
2161 { | |
2162 public: | |
2163 CONSTEXPR svst1_truncate_impl (scalar_int_mode to_mode) | |
2164 : truncating_store (to_mode) {} | |
2165 | |
2166 rtx | |
2167 expand (function_expander &e) const OVERRIDE | |
2168 { | |
2169 insn_code icode = code_for_aarch64_store_trunc (e.memory_vector_mode (), | |
2170 e.vector_mode (0)); | |
2171 return e.use_contiguous_store_insn (icode); | |
2172 } | |
2173 }; | |
2174 | |
2175 /* Implements svst2, svst3 and svst4. */ | |
2176 class svst234_impl : public full_width_access | |
2177 { | |
2178 public: | |
2179 CONSTEXPR svst234_impl (unsigned int vectors_per_tuple) | |
2180 : full_width_access (vectors_per_tuple) {} | |
2181 | |
2182 unsigned int | |
2183 call_properties (const function_instance &) const OVERRIDE | |
2184 { | |
2185 return CP_WRITE_MEMORY; | |
2186 } | |
2187 | |
2188 gimple * | |
2189 fold (gimple_folder &f) const OVERRIDE | |
2190 { | |
2191 tree vectype = f.vector_type (0); | |
2192 | |
2193 /* Get the predicate and base pointer. */ | |
2194 gimple_seq stmts = NULL; | |
2195 tree pred = f.convert_pred (stmts, vectype, 0); | |
2196 tree base = f.fold_contiguous_base (stmts, vectype); | |
2197 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); | |
2198 | |
2199 /* View the stored data as an array of vectors. */ | |
2200 unsigned int num_args = gimple_call_num_args (f.call); | |
2201 tree rhs_tuple = gimple_call_arg (f.call, num_args - 1); | |
2202 tree field = tuple_type_field (TREE_TYPE (rhs_tuple)); | |
2203 tree rhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field), rhs_tuple); | |
2204 | |
2205 tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); | |
2206 return gimple_build_call_internal (IFN_MASK_STORE_LANES, 4, | |
2207 base, cookie, pred, rhs_array); | |
2208 } | |
2209 | |
2210 rtx | |
2211 expand (function_expander &e) const OVERRIDE | |
2212 { | |
2213 machine_mode tuple_mode = GET_MODE (e.args.last ()); | |
2214 insn_code icode = convert_optab_handler (vec_mask_store_lanes_optab, | |
2215 tuple_mode, e.vector_mode (0)); | |
2216 return e.use_contiguous_store_insn (icode); | |
2217 } | |
2218 }; | |
2219 | |
2220 class svstnt1_impl : public full_width_access | |
2221 { | |
2222 public: | |
2223 unsigned int | |
2224 call_properties (const function_instance &) const OVERRIDE | |
2225 { | |
2226 return CP_WRITE_MEMORY; | |
2227 } | |
2228 | |
2229 rtx | |
2230 expand (function_expander &e) const OVERRIDE | |
2231 { | |
2232 insn_code icode = code_for_aarch64_stnt1 (e.vector_mode (0)); | |
2233 return e.use_contiguous_store_insn (icode); | |
2234 } | |
2235 }; | |
2236 | |
2237 class svsub_impl : public rtx_code_function | |
2238 { | |
2239 public: | |
2240 CONSTEXPR svsub_impl () | |
2241 : rtx_code_function (MINUS, MINUS, UNSPEC_COND_FSUB) {} | |
2242 | |
2243 rtx | |
2244 expand (function_expander &e) const OVERRIDE | |
2245 { | |
2246 /* Canonicalize subtractions of constants to additions. */ | |
2247 machine_mode mode = e.vector_mode (0); | |
2248 if (e.try_negating_argument (2, mode)) | |
2249 return e.map_to_rtx_codes (PLUS, PLUS, UNSPEC_COND_FADD); | |
2250 | |
2251 return rtx_code_function::expand (e); | |
2252 } | |
2253 }; | |
2254 | |
2255 class svtbl_impl : public permute | |
2256 { | |
2257 public: | |
2258 rtx | |
2259 expand (function_expander &e) const OVERRIDE | |
2260 { | |
2261 return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0))); | |
2262 } | |
2263 }; | |
2264 | |
2265 /* Implements svtrn1 and svtrn2. */ | |
2266 class svtrn_impl : public binary_permute | |
2267 { | |
2268 public: | |
2269 CONSTEXPR svtrn_impl (int base) | |
2270 : binary_permute (base ? UNSPEC_TRN2 : UNSPEC_TRN1), m_base (base) {} | |
2271 | |
2272 gimple * | |
2273 fold (gimple_folder &f) const OVERRIDE | |
2274 { | |
2275 /* svtrn1: { 0, nelts, 2, nelts + 2, 4, nelts + 4, ... } | |
2276 svtrn2: as for svtrn1, but with 1 added to each index. */ | |
2277 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); | |
2278 vec_perm_builder builder (nelts, 2, 3); | |
2279 for (unsigned int i = 0; i < 3; ++i) | |
2280 { | |
2281 builder.quick_push (m_base + i * 2); | |
2282 builder.quick_push (m_base + i * 2 + nelts); | |
2283 } | |
2284 return fold_permute (f, builder); | |
2285 } | |
2286 | |
2287 /* 0 for svtrn1, 1 for svtrn2. */ | |
2288 unsigned int m_base; | |
2289 }; | |
2290 | |
2291 /* Base class for svundef{,2,3,4}. */ | |
2292 class svundef_impl : public quiet<multi_vector_function> | |
2293 { | |
2294 public: | |
2295 CONSTEXPR svundef_impl (unsigned int vectors_per_tuple) | |
2296 : quiet<multi_vector_function> (vectors_per_tuple) {} | |
2297 | |
2298 gimple * | |
2299 fold (gimple_folder &f) const OVERRIDE | |
2300 { | |
2301 /* Don't fold svundef at the gimple level. There's no exact | |
2302 correspondence for SSA_NAMEs, and we explicitly don't want | |
2303 to generate a specific value (like an all-zeros vector). */ | |
2304 if (vectors_per_tuple () == 1) | |
2305 return NULL; | |
2306 return gimple_build_assign (f.lhs, build_clobber (TREE_TYPE (f.lhs))); | |
2307 } | |
2308 | |
2309 rtx | |
2310 expand (function_expander &e) const OVERRIDE | |
2311 { | |
2312 rtx target = e.get_reg_target (); | |
2313 emit_clobber (copy_rtx (target)); | |
2314 return target; | |
2315 } | |
2316 }; | |
2317 | |
2318 /* Implements svunpklo and svunpkhi. */ | |
2319 class svunpk_impl : public quiet<function_base> | |
2320 { | |
2321 public: | |
2322 CONSTEXPR svunpk_impl (bool high_p) : m_high_p (high_p) {} | |
2323 | |
2324 gimple * | |
2325 fold (gimple_folder &f) const OVERRIDE | |
2326 { | |
2327 /* Don't fold the predicate ops, since every bit of the svbool_t | |
2328 result is significant. */ | |
2329 if (f.type_suffix_ids[0] == TYPE_SUFFIX_b) | |
2330 return NULL; | |
2331 | |
2332 /* The first half in memory is VEC_UNPACK_LO_EXPR for little-endian | |
2333 and VEC_UNPACK_HI_EXPR for big-endian. */ | |
2334 bool high_p = BYTES_BIG_ENDIAN ? !m_high_p : m_high_p; | |
2335 tree_code code = high_p ? VEC_UNPACK_HI_EXPR : VEC_UNPACK_LO_EXPR; | |
2336 return gimple_build_assign (f.lhs, code, gimple_call_arg (f.call, 0)); | |
2337 } | |
2338 | |
2339 rtx | |
2340 expand (function_expander &e) const OVERRIDE | |
2341 { | |
2342 machine_mode mode = GET_MODE (e.args[0]); | |
2343 unsigned int unpacku = m_high_p ? UNSPEC_UNPACKUHI : UNSPEC_UNPACKULO; | |
2344 unsigned int unpacks = m_high_p ? UNSPEC_UNPACKSHI : UNSPEC_UNPACKSLO; | |
2345 insn_code icode; | |
2346 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) | |
2347 icode = code_for_aarch64_sve_punpk (unpacku, mode); | |
2348 else | |
2349 { | |
2350 int unspec = e.type_suffix (0).unsigned_p ? unpacku : unpacks; | |
2351 icode = code_for_aarch64_sve_unpk (unspec, unspec, mode); | |
2352 } | |
2353 return e.use_exact_insn (icode); | |
2354 } | |
2355 | |
2356 /* True for svunpkhi, false for svunpklo. */ | |
2357 bool m_high_p; | |
2358 }; | |
2359 | |
2360 /* Also implements svsudot. */ | |
2361 class svusdot_impl : public function_base | |
2362 { | |
2363 public: | |
2364 CONSTEXPR svusdot_impl (bool su) : m_su (su) {} | |
2365 | |
2366 rtx | |
2367 expand (function_expander &e) const OVERRIDE | |
2368 { | |
2369 /* The implementation of the ACLE function svsudot (for the non-lane | |
2370 version) is through the USDOT instruction but with the second and third | |
2371 inputs swapped. */ | |
2372 if (m_su) | |
2373 e.rotate_inputs_left (1, 2); | |
2374 /* The ACLE function has the same order requirements as for svdot. | |
2375 While there's no requirement for the RTL pattern to have the same sort | |
2376 of order as that for <sur>dot_prod, it's easier to read. | |
2377 Hence we do the same rotation on arguments as svdot_impl does. */ | |
2378 e.rotate_inputs_left (0, 3); | |
2379 machine_mode mode = e.vector_mode (0); | |
2380 insn_code icode = code_for_aarch64_dot_prod (UNSPEC_USDOT, mode); | |
2381 return e.use_exact_insn (icode); | |
2382 } | |
2383 | |
2384 private: | |
2385 bool m_su; | |
2386 }; | |
2387 | |
2388 /* Implements svuzp1 and svuzp2. */ | |
2389 class svuzp_impl : public binary_permute | |
2390 { | |
2391 public: | |
2392 CONSTEXPR svuzp_impl (unsigned int base) | |
2393 : binary_permute (base ? UNSPEC_UZP2 : UNSPEC_UZP1), m_base (base) {} | |
2394 | |
2395 gimple * | |
2396 fold (gimple_folder &f) const OVERRIDE | |
2397 { | |
2398 /* svuzp1: { 0, 2, 4, 6, ... } | |
2399 svuzp2: { 1, 3, 5, 7, ... }. */ | |
2400 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); | |
2401 vec_perm_builder builder (nelts, 1, 3); | |
2402 for (unsigned int i = 0; i < 3; ++i) | |
2403 builder.quick_push (m_base + i * 2); | |
2404 return fold_permute (f, builder); | |
2405 } | |
2406 | |
2407 /* 0 for svuzp1, 1 for svuzp2. */ | |
2408 unsigned int m_base; | |
2409 }; | |
2410 | |
2411 /* A function_base for svwhilele and svwhilelt functions. */ | |
2412 class svwhilelx_impl : public while_comparison | |
2413 { | |
2414 public: | |
2415 CONSTEXPR svwhilelx_impl (int unspec_for_sint, int unspec_for_uint, bool eq_p) | |
2416 : while_comparison (unspec_for_sint, unspec_for_uint), m_eq_p (eq_p) | |
2417 {} | |
2418 | |
2419 /* Try to fold a call by treating its arguments as constants of type T. */ | |
2420 template<typename T> | |
2421 gimple * | |
2422 fold_type (gimple_folder &f) const | |
2423 { | |
2424 /* Only handle cases in which both operands are constant. */ | |
2425 T arg0, arg1; | |
2426 if (!poly_int_tree_p (gimple_call_arg (f.call, 0), &arg0) | |
2427 || !poly_int_tree_p (gimple_call_arg (f.call, 1), &arg1)) | |
2428 return NULL; | |
2429 | |
2430 /* Check whether the result is known to be all-false. */ | |
2431 if (m_eq_p ? known_gt (arg0, arg1) : known_ge (arg0, arg1)) | |
2432 return f.fold_to_pfalse (); | |
2433 | |
2434 /* Punt if we can't tell at compile time whether the result | |
2435 is all-false. */ | |
2436 if (m_eq_p ? maybe_gt (arg0, arg1) : maybe_ge (arg0, arg1)) | |
2437 return NULL; | |
2438 | |
2439 /* At this point we know the result has at least one set element. */ | |
2440 poly_uint64 diff = arg1 - arg0; | |
2441 poly_uint64 nelts = GET_MODE_NUNITS (f.vector_mode (0)); | |
2442 | |
2443 /* Canonicalize the svwhilele form to the svwhilelt form. Subtract | |
2444 from NELTS rather than adding to DIFF, to prevent overflow. */ | |
2445 if (m_eq_p) | |
2446 nelts -= 1; | |
2447 | |
2448 /* Check whether the result is known to be all-true. */ | |
2449 if (known_ge (diff, nelts)) | |
2450 return f.fold_to_ptrue (); | |
2451 | |
2452 /* Punt if DIFF might not be the actual number of set elements | |
2453 in the result. Conditional equality is fine. */ | |
2454 if (maybe_gt (diff, nelts)) | |
2455 return NULL; | |
2456 | |
2457 /* At this point we know that the predicate will have DIFF set elements | |
2458 for svwhilelt and DIFF + 1 set elements for svwhilele (which stops | |
2459 after rather than before ARG1 is reached). See if we can create | |
2460 the predicate at compile time. */ | |
2461 unsigned HOST_WIDE_INT vl; | |
2462 if (diff.is_constant (&vl)) | |
2463 /* Overflow is no longer possible after the checks above. */ | |
2464 return f.fold_to_vl_pred (m_eq_p ? vl + 1 : vl); | |
2465 | |
2466 return NULL; | |
2467 } | |
2468 | |
2469 gimple * | |
2470 fold (gimple_folder &f) const OVERRIDE | |
2471 { | |
2472 if (f.type_suffix (1).unsigned_p) | |
2473 return fold_type<poly_uint64> (f); | |
2474 else | |
2475 return fold_type<poly_int64> (f); | |
2476 } | |
2477 | |
2478 /* True svwhilele, false for svwhilelt. */ | |
2479 bool m_eq_p; | |
2480 }; | |
2481 | |
2482 class svwrffr_impl : public function_base | |
2483 { | |
2484 public: | |
2485 unsigned int | |
2486 call_properties (const function_instance &) const OVERRIDE | |
2487 { | |
2488 return CP_WRITE_FFR; | |
2489 } | |
2490 | |
2491 rtx | |
2492 expand (function_expander &e) const OVERRIDE | |
2493 { | |
2494 return e.use_exact_insn (CODE_FOR_aarch64_wrffr); | |
2495 } | |
2496 }; | |
2497 | |
2498 /* Implements svzip1 and svzip2. */ | |
2499 class svzip_impl : public binary_permute | |
2500 { | |
2501 public: | |
2502 CONSTEXPR svzip_impl (unsigned int base) | |
2503 : binary_permute (base ? UNSPEC_ZIP2 : UNSPEC_ZIP1), m_base (base) {} | |
2504 | |
2505 gimple * | |
2506 fold (gimple_folder &f) const OVERRIDE | |
2507 { | |
2508 /* svzip1: { 0, nelts, 1, nelts + 1, 2, nelts + 2, ... } | |
2509 svzip2: as for svzip1, but with nelts / 2 added to each index. */ | |
2510 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); | |
2511 poly_uint64 base = m_base * exact_div (nelts, 2); | |
2512 vec_perm_builder builder (nelts, 2, 3); | |
2513 for (unsigned int i = 0; i < 3; ++i) | |
2514 { | |
2515 builder.quick_push (base + i); | |
2516 builder.quick_push (base + i + nelts); | |
2517 } | |
2518 return fold_permute (f, builder); | |
2519 } | |
2520 | |
2521 /* 0 for svzip1, 1 for svzip2. */ | |
2522 unsigned int m_base; | |
2523 }; | |
2524 | |
2525 } /* end anonymous namespace */ | |
2526 | |
2527 namespace aarch64_sve { | |
2528 | |
2529 FUNCTION (svabd, svabd_impl,) | |
2530 FUNCTION (svabs, quiet<rtx_code_function>, (ABS, ABS, UNSPEC_COND_FABS)) | |
2531 FUNCTION (svacge, svac_impl, (UNSPEC_COND_FCMGE)) | |
2532 FUNCTION (svacgt, svac_impl, (UNSPEC_COND_FCMGT)) | |
2533 FUNCTION (svacle, svac_impl, (UNSPEC_COND_FCMLE)) | |
2534 FUNCTION (svaclt, svac_impl, (UNSPEC_COND_FCMLT)) | |
2535 FUNCTION (svadd, rtx_code_function, (PLUS, PLUS, UNSPEC_COND_FADD)) | |
2536 FUNCTION (svadda, svadda_impl,) | |
2537 FUNCTION (svaddv, reduction, (UNSPEC_SADDV, UNSPEC_UADDV, UNSPEC_FADDV)) | |
2538 FUNCTION (svadrb, svadr_bhwd_impl, (0)) | |
2539 FUNCTION (svadrd, svadr_bhwd_impl, (3)) | |
2540 FUNCTION (svadrh, svadr_bhwd_impl, (1)) | |
2541 FUNCTION (svadrw, svadr_bhwd_impl, (2)) | |
2542 FUNCTION (svand, rtx_code_function, (AND, AND)) | |
2543 FUNCTION (svandv, reduction, (UNSPEC_ANDV)) | |
2544 FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT)) | |
2545 FUNCTION (svasr_wide, shift_wide, (ASHIFTRT, UNSPEC_ASHIFTRT_WIDE)) | |
2546 FUNCTION (svasrd, unspec_based_function, (UNSPEC_ASRD, -1, -1)) | |
2547 FUNCTION (svbfdot, fixed_insn_function, (CODE_FOR_aarch64_sve_bfdotvnx4sf)) | |
2548 FUNCTION (svbfdot_lane, fixed_insn_function, | |
2549 (CODE_FOR_aarch64_sve_bfdot_lanevnx4sf)) | |
2550 FUNCTION (svbfmlalb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlalbvnx4sf)) | |
2551 FUNCTION (svbfmlalb_lane, fixed_insn_function, | |
2552 (CODE_FOR_aarch64_sve_bfmlalb_lanevnx4sf)) | |
2553 FUNCTION (svbfmlalt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlaltvnx4sf)) | |
2554 FUNCTION (svbfmlalt_lane, fixed_insn_function, | |
2555 (CODE_FOR_aarch64_sve_bfmlalt_lanevnx4sf)) | |
2556 FUNCTION (svbfmmla, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmmlavnx4sf)) | |
2557 FUNCTION (svbic, svbic_impl,) | |
2558 FUNCTION (svbrka, svbrk_unary_impl, (UNSPEC_BRKA)) | |
2559 FUNCTION (svbrkb, svbrk_unary_impl, (UNSPEC_BRKB)) | |
2560 FUNCTION (svbrkn, svbrk_binary_impl, (UNSPEC_BRKN)) | |
2561 FUNCTION (svbrkpa, svbrk_binary_impl, (UNSPEC_BRKPA)) | |
2562 FUNCTION (svbrkpb, svbrk_binary_impl, (UNSPEC_BRKPB)) | |
2563 FUNCTION (svcadd, svcadd_impl,) | |
2564 FUNCTION (svclasta, svclast_impl, (UNSPEC_CLASTA)) | |
2565 FUNCTION (svclastb, svclast_impl, (UNSPEC_CLASTB)) | |
2566 FUNCTION (svcls, unary_count, (CLRSB)) | |
2567 FUNCTION (svclz, unary_count, (CLZ)) | |
2568 FUNCTION (svcmla, svcmla_impl,) | |
2569 FUNCTION (svcmla_lane, svcmla_lane_impl,) | |
2570 FUNCTION (svcmpeq, svcmp_impl, (EQ_EXPR, UNSPEC_COND_FCMEQ)) | |
2571 FUNCTION (svcmpeq_wide, svcmp_wide_impl, (EQ_EXPR, UNSPEC_COND_CMPEQ_WIDE, | |
2572 UNSPEC_COND_CMPEQ_WIDE)) | |
2573 FUNCTION (svcmpge, svcmp_impl, (GE_EXPR, UNSPEC_COND_FCMGE)) | |
2574 FUNCTION (svcmpge_wide, svcmp_wide_impl, (GE_EXPR, UNSPEC_COND_CMPGE_WIDE, | |
2575 UNSPEC_COND_CMPHS_WIDE)) | |
2576 FUNCTION (svcmpgt, svcmp_impl, (GT_EXPR, UNSPEC_COND_FCMGT)) | |
2577 FUNCTION (svcmpgt_wide, svcmp_wide_impl, (GT_EXPR, UNSPEC_COND_CMPGT_WIDE, | |
2578 UNSPEC_COND_CMPHI_WIDE)) | |
2579 FUNCTION (svcmple, svcmp_impl, (LE_EXPR, UNSPEC_COND_FCMLE)) | |
2580 FUNCTION (svcmple_wide, svcmp_wide_impl, (LE_EXPR, UNSPEC_COND_CMPLE_WIDE, | |
2581 UNSPEC_COND_CMPLS_WIDE)) | |
2582 FUNCTION (svcmplt, svcmp_impl, (LT_EXPR, UNSPEC_COND_FCMLT)) | |
2583 FUNCTION (svcmplt_wide, svcmp_wide_impl, (LT_EXPR, UNSPEC_COND_CMPLT_WIDE, | |
2584 UNSPEC_COND_CMPLO_WIDE)) | |
2585 FUNCTION (svcmpne, svcmp_impl, (NE_EXPR, UNSPEC_COND_FCMNE)) | |
2586 FUNCTION (svcmpne_wide, svcmp_wide_impl, (NE_EXPR, UNSPEC_COND_CMPNE_WIDE, | |
2587 UNSPEC_COND_CMPNE_WIDE)) | |
2588 FUNCTION (svcmpuo, svcmpuo_impl,) | |
2589 FUNCTION (svcnot, svcnot_impl,) | |
2590 FUNCTION (svcnt, unary_count, (POPCOUNT)) | |
2591 FUNCTION (svcntb, svcnt_bhwd_impl, (VNx16QImode)) | |
2592 FUNCTION (svcntb_pat, svcnt_bhwd_pat_impl, (VNx16QImode)) | |
2593 FUNCTION (svcntd, svcnt_bhwd_impl, (VNx2DImode)) | |
2594 FUNCTION (svcntd_pat, svcnt_bhwd_pat_impl, (VNx2DImode)) | |
2595 FUNCTION (svcnth, svcnt_bhwd_impl, (VNx8HImode)) | |
2596 FUNCTION (svcnth_pat, svcnt_bhwd_pat_impl, (VNx8HImode)) | |
2597 FUNCTION (svcntp, svcntp_impl,) | |
2598 FUNCTION (svcntw, svcnt_bhwd_impl, (VNx4SImode)) | |
2599 FUNCTION (svcntw_pat, svcnt_bhwd_pat_impl, (VNx4SImode)) | |
2600 FUNCTION (svcompact, QUIET_CODE_FOR_MODE0 (aarch64_sve_compact),) | |
2601 FUNCTION (svcreate2, svcreate_impl, (2)) | |
2602 FUNCTION (svcreate3, svcreate_impl, (3)) | |
2603 FUNCTION (svcreate4, svcreate_impl, (4)) | |
2604 FUNCTION (svcvt, svcvt_impl,) | |
2605 FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),) | |
2606 FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV)) | |
2607 FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV)) | |
2608 FUNCTION (svdot, svdot_impl,) | |
2609 FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, -1)) | |
2610 FUNCTION (svdup, svdup_impl,) | |
2611 FUNCTION (svdup_lane, svdup_lane_impl,) | |
2612 FUNCTION (svdupq, svdupq_impl,) | |
2613 FUNCTION (svdupq_lane, svdupq_lane_impl,) | |
2614 FUNCTION (sveor, rtx_code_function, (XOR, XOR, -1)) | |
2615 FUNCTION (sveorv, reduction, (UNSPEC_XORV)) | |
2616 FUNCTION (svexpa, unspec_based_function, (-1, -1, UNSPEC_FEXPA)) | |
2617 FUNCTION (svext, QUIET_CODE_FOR_MODE0 (aarch64_sve_ext),) | |
2618 FUNCTION (svextb, svext_bhw_impl, (QImode)) | |
2619 FUNCTION (svexth, svext_bhw_impl, (HImode)) | |
2620 FUNCTION (svextw, svext_bhw_impl, (SImode)) | |
2621 FUNCTION (svget2, svget_impl, (2)) | |
2622 FUNCTION (svget3, svget_impl, (3)) | |
2623 FUNCTION (svget4, svget_impl, (4)) | |
2624 FUNCTION (svindex, svindex_impl,) | |
2625 FUNCTION (svinsr, svinsr_impl,) | |
2626 FUNCTION (svlasta, svlast_impl, (UNSPEC_LASTA)) | |
2627 FUNCTION (svlastb, svlast_impl, (UNSPEC_LASTB)) | |
2628 FUNCTION (svld1, svld1_impl,) | |
2629 FUNCTION (svld1_gather, svld1_gather_impl,) | |
2630 FUNCTION (svld1ro, svld1ro_impl,) | |
2631 FUNCTION (svld1rq, svld1rq_impl,) | |
2632 FUNCTION (svld1sb, svld1_extend_impl, (TYPE_SUFFIX_s8)) | |
2633 FUNCTION (svld1sb_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s8)) | |
2634 FUNCTION (svld1sh, svld1_extend_impl, (TYPE_SUFFIX_s16)) | |
2635 FUNCTION (svld1sh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s16)) | |
2636 FUNCTION (svld1sw, svld1_extend_impl, (TYPE_SUFFIX_s32)) | |
2637 FUNCTION (svld1sw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s32)) | |
2638 FUNCTION (svld1ub, svld1_extend_impl, (TYPE_SUFFIX_u8)) | |
2639 FUNCTION (svld1ub_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u8)) | |
2640 FUNCTION (svld1uh, svld1_extend_impl, (TYPE_SUFFIX_u16)) | |
2641 FUNCTION (svld1uh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u16)) | |
2642 FUNCTION (svld1uw, svld1_extend_impl, (TYPE_SUFFIX_u32)) | |
2643 FUNCTION (svld1uw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u32)) | |
2644 FUNCTION (svld2, svld234_impl, (2)) | |
2645 FUNCTION (svld3, svld234_impl, (3)) | |
2646 FUNCTION (svld4, svld234_impl, (4)) | |
2647 FUNCTION (svldff1, svldxf1_impl, (UNSPEC_LDFF1)) | |
2648 FUNCTION (svldff1_gather, svldff1_gather_impl,) | |
2649 FUNCTION (svldff1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDFF1)) | |
2650 FUNCTION (svldff1sb_gather, svldff1_gather_extend, (TYPE_SUFFIX_s8)) | |
2651 FUNCTION (svldff1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDFF1)) | |
2652 FUNCTION (svldff1sh_gather, svldff1_gather_extend, (TYPE_SUFFIX_s16)) | |
2653 FUNCTION (svldff1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDFF1)) | |
2654 FUNCTION (svldff1sw_gather, svldff1_gather_extend, (TYPE_SUFFIX_s32)) | |
2655 FUNCTION (svldff1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDFF1)) | |
2656 FUNCTION (svldff1ub_gather, svldff1_gather_extend, (TYPE_SUFFIX_u8)) | |
2657 FUNCTION (svldff1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDFF1)) | |
2658 FUNCTION (svldff1uh_gather, svldff1_gather_extend, (TYPE_SUFFIX_u16)) | |
2659 FUNCTION (svldff1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDFF1)) | |
2660 FUNCTION (svldff1uw_gather, svldff1_gather_extend, (TYPE_SUFFIX_u32)) | |
2661 FUNCTION (svldnf1, svldxf1_impl, (UNSPEC_LDNF1)) | |
2662 FUNCTION (svldnf1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDNF1)) | |
2663 FUNCTION (svldnf1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDNF1)) | |
2664 FUNCTION (svldnf1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDNF1)) | |
2665 FUNCTION (svldnf1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDNF1)) | |
2666 FUNCTION (svldnf1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDNF1)) | |
2667 FUNCTION (svldnf1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDNF1)) | |
2668 FUNCTION (svldnt1, svldnt1_impl,) | |
2669 FUNCTION (svlen, svlen_impl,) | |
2670 FUNCTION (svlsl, rtx_code_function, (ASHIFT, ASHIFT)) | |
2671 FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE)) | |
2672 FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT)) | |
2673 FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE)) | |
2674 FUNCTION (svmad, svmad_impl,) | |
2675 FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX)) | |
2676 FUNCTION (svmaxnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMAXNM)) | |
2677 FUNCTION (svmaxnmv, reduction, (UNSPEC_FMAXNMV)) | |
2678 FUNCTION (svmaxv, reduction, (UNSPEC_SMAXV, UNSPEC_UMAXV, UNSPEC_FMAXV)) | |
2679 FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN)) | |
2680 FUNCTION (svminnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMINNM)) | |
2681 FUNCTION (svminnmv, reduction, (UNSPEC_FMINNMV)) | |
2682 FUNCTION (svminv, reduction, (UNSPEC_SMINV, UNSPEC_UMINV, UNSPEC_FMINV)) | |
2683 FUNCTION (svmla, svmla_impl,) | |
2684 FUNCTION (svmla_lane, svmla_lane_impl,) | |
2685 FUNCTION (svmls, svmls_impl,) | |
2686 FUNCTION (svmls_lane, svmls_lane_impl,) | |
2687 FUNCTION (svmmla, svmmla_impl,) | |
2688 FUNCTION (svmov, svmov_impl,) | |
2689 FUNCTION (svmsb, svmsb_impl,) | |
2690 FUNCTION (svmul, rtx_code_function, (MULT, MULT, UNSPEC_COND_FMUL)) | |
2691 FUNCTION (svmul_lane, CODE_FOR_MODE0 (aarch64_mul_lane),) | |
2692 FUNCTION (svmulh, unspec_based_function, (UNSPEC_SMUL_HIGHPART, | |
2693 UNSPEC_UMUL_HIGHPART, -1)) | |
2694 FUNCTION (svmulx, unspec_based_function, (-1, -1, UNSPEC_COND_FMULX)) | |
2695 FUNCTION (svnand, svnand_impl,) | |
2696 FUNCTION (svneg, quiet<rtx_code_function>, (NEG, NEG, UNSPEC_COND_FNEG)) | |
2697 FUNCTION (svnmad, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLA)) | |
2698 FUNCTION (svnmla, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLA)) | |
2699 FUNCTION (svnmls, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLS)) | |
2700 FUNCTION (svnmsb, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLS)) | |
2701 FUNCTION (svnor, svnor_impl,) | |
2702 FUNCTION (svnot, svnot_impl,) | |
2703 FUNCTION (svorn, svorn_impl,) | |
2704 FUNCTION (svorr, rtx_code_function, (IOR, IOR)) | |
2705 FUNCTION (svorv, reduction, (UNSPEC_IORV)) | |
2706 FUNCTION (svpfalse, svpfalse_impl,) | |
2707 FUNCTION (svpfirst, svpfirst_svpnext_impl, (UNSPEC_PFIRST)) | |
2708 FUNCTION (svpnext, svpfirst_svpnext_impl, (UNSPEC_PNEXT)) | |
2709 FUNCTION (svprfb, svprf_bhwd_impl, (VNx16QImode)) | |
2710 FUNCTION (svprfb_gather, svprf_bhwd_gather_impl, (VNx16QImode)) | |
2711 FUNCTION (svprfd, svprf_bhwd_impl, (VNx2DImode)) | |
2712 FUNCTION (svprfd_gather, svprf_bhwd_gather_impl, (VNx2DImode)) | |
2713 FUNCTION (svprfh, svprf_bhwd_impl, (VNx8HImode)) | |
2714 FUNCTION (svprfh_gather, svprf_bhwd_gather_impl, (VNx8HImode)) | |
2715 FUNCTION (svprfw, svprf_bhwd_impl, (VNx4SImode)) | |
2716 FUNCTION (svprfw_gather, svprf_bhwd_gather_impl, (VNx4SImode)) | |
2717 FUNCTION (svptest_any, svptest_impl, (NE)) | |
2718 FUNCTION (svptest_first, svptest_impl, (LT)) | |
2719 FUNCTION (svptest_last, svptest_impl, (LTU)) | |
2720 FUNCTION (svptrue, svptrue_impl,) | |
2721 FUNCTION (svptrue_pat, svptrue_pat_impl,) | |
2722 FUNCTION (svqadd, rtx_code_function, (SS_PLUS, US_PLUS, -1)) | |
2723 FUNCTION (svqdecb, svqdec_bhwd_impl, (QImode)) | |
2724 FUNCTION (svqdecb_pat, svqdec_bhwd_impl, (QImode)) | |
2725 FUNCTION (svqdecd, svqdec_bhwd_impl, (DImode)) | |
2726 FUNCTION (svqdecd_pat, svqdec_bhwd_impl, (DImode)) | |
2727 FUNCTION (svqdech, svqdec_bhwd_impl, (HImode)) | |
2728 FUNCTION (svqdech_pat, svqdec_bhwd_impl, (HImode)) | |
2729 FUNCTION (svqdecp, svqdecp_svqincp_impl, (SS_MINUS, US_MINUS)) | |
2730 FUNCTION (svqdecw, svqdec_bhwd_impl, (SImode)) | |
2731 FUNCTION (svqdecw_pat, svqdec_bhwd_impl, (SImode)) | |
2732 FUNCTION (svqincb, svqinc_bhwd_impl, (QImode)) | |
2733 FUNCTION (svqincb_pat, svqinc_bhwd_impl, (QImode)) | |
2734 FUNCTION (svqincd, svqinc_bhwd_impl, (DImode)) | |
2735 FUNCTION (svqincd_pat, svqinc_bhwd_impl, (DImode)) | |
2736 FUNCTION (svqinch, svqinc_bhwd_impl, (HImode)) | |
2737 FUNCTION (svqinch_pat, svqinc_bhwd_impl, (HImode)) | |
2738 FUNCTION (svqincp, svqdecp_svqincp_impl, (SS_PLUS, US_PLUS)) | |
2739 FUNCTION (svqincw, svqinc_bhwd_impl, (SImode)) | |
2740 FUNCTION (svqincw_pat, svqinc_bhwd_impl, (SImode)) | |
2741 FUNCTION (svqsub, rtx_code_function, (SS_MINUS, US_MINUS, -1)) | |
2742 FUNCTION (svrbit, unspec_based_function, (UNSPEC_RBIT, UNSPEC_RBIT, -1)) | |
2743 FUNCTION (svrdffr, svrdffr_impl,) | |
2744 FUNCTION (svrecpe, unspec_based_function, (-1, UNSPEC_URECPE, UNSPEC_FRECPE)) | |
2745 FUNCTION (svrecps, unspec_based_function, (-1, -1, UNSPEC_FRECPS)) | |
2746 FUNCTION (svrecpx, unspec_based_function, (-1, -1, UNSPEC_COND_FRECPX)) | |
2747 FUNCTION (svreinterpret, svreinterpret_impl,) | |
2748 FUNCTION (svrev, svrev_impl,) | |
2749 FUNCTION (svrevb, unspec_based_function, (UNSPEC_REVB, UNSPEC_REVB, -1)) | |
2750 FUNCTION (svrevh, unspec_based_function, (UNSPEC_REVH, UNSPEC_REVH, -1)) | |
2751 FUNCTION (svrevw, unspec_based_function, (UNSPEC_REVW, UNSPEC_REVW, -1)) | |
2752 FUNCTION (svrinta, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTA)) | |
2753 FUNCTION (svrinti, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTI)) | |
2754 FUNCTION (svrintm, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTM)) | |
2755 FUNCTION (svrintn, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTN)) | |
2756 FUNCTION (svrintp, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTP)) | |
2757 FUNCTION (svrintx, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTX)) | |
2758 FUNCTION (svrintz, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTZ)) | |
2759 FUNCTION (svrsqrte, unspec_based_function, (-1, UNSPEC_RSQRTE, UNSPEC_RSQRTE)) | |
2760 FUNCTION (svrsqrts, unspec_based_function, (-1, -1, UNSPEC_RSQRTS)) | |
2761 FUNCTION (svscale, unspec_based_function, (-1, -1, UNSPEC_COND_FSCALE)) | |
2762 FUNCTION (svsel, svsel_impl,) | |
2763 FUNCTION (svset2, svset_impl, (2)) | |
2764 FUNCTION (svset3, svset_impl, (3)) | |
2765 FUNCTION (svset4, svset_impl, (4)) | |
2766 FUNCTION (svsetffr, svsetffr_impl,) | |
2767 FUNCTION (svsplice, QUIET_CODE_FOR_MODE0 (aarch64_sve_splice),) | |
2768 FUNCTION (svsqrt, rtx_code_function, (SQRT, SQRT, UNSPEC_COND_FSQRT)) | |
2769 FUNCTION (svst1, svst1_impl,) | |
2770 FUNCTION (svst1_scatter, svst1_scatter_impl,) | |
2771 FUNCTION (svst1b, svst1_truncate_impl, (QImode)) | |
2772 FUNCTION (svst1b_scatter, svst1_scatter_truncate_impl, (QImode)) | |
2773 FUNCTION (svst1h, svst1_truncate_impl, (HImode)) | |
2774 FUNCTION (svst1h_scatter, svst1_scatter_truncate_impl, (HImode)) | |
2775 FUNCTION (svst1w, svst1_truncate_impl, (SImode)) | |
2776 FUNCTION (svst1w_scatter, svst1_scatter_truncate_impl, (SImode)) | |
2777 FUNCTION (svst2, svst234_impl, (2)) | |
2778 FUNCTION (svst3, svst234_impl, (3)) | |
2779 FUNCTION (svst4, svst234_impl, (4)) | |
2780 FUNCTION (svstnt1, svstnt1_impl,) | |
2781 FUNCTION (svsub, svsub_impl,) | |
2782 FUNCTION (svsubr, rtx_code_function_rotated, (MINUS, MINUS, UNSPEC_COND_FSUB)) | |
2783 FUNCTION (svsudot, svusdot_impl, (true)) | |
2784 FUNCTION (svsudot_lane, svdotprod_lane_impl, (UNSPEC_SUDOT, -1, -1)) | |
2785 FUNCTION (svtbl, svtbl_impl,) | |
2786 FUNCTION (svtmad, CODE_FOR_MODE0 (aarch64_sve_tmad),) | |
2787 FUNCTION (svtrn1, svtrn_impl, (0)) | |
2788 FUNCTION (svtrn1q, unspec_based_function, (UNSPEC_TRN1Q, UNSPEC_TRN1Q, | |
2789 UNSPEC_TRN1Q)) | |
2790 FUNCTION (svtrn2, svtrn_impl, (1)) | |
2791 FUNCTION (svtrn2q, unspec_based_function, (UNSPEC_TRN2Q, UNSPEC_TRN2Q, | |
2792 UNSPEC_TRN2Q)) | |
2793 FUNCTION (svtsmul, unspec_based_function, (-1, -1, UNSPEC_FTSMUL)) | |
2794 FUNCTION (svtssel, unspec_based_function, (-1, -1, UNSPEC_FTSSEL)) | |
2795 FUNCTION (svundef, svundef_impl, (1)) | |
2796 FUNCTION (svundef2, svundef_impl, (2)) | |
2797 FUNCTION (svundef3, svundef_impl, (3)) | |
2798 FUNCTION (svundef4, svundef_impl, (4)) | |
2799 FUNCTION (svunpkhi, svunpk_impl, (true)) | |
2800 FUNCTION (svunpklo, svunpk_impl, (false)) | |
2801 FUNCTION (svusdot, svusdot_impl, (false)) | |
2802 FUNCTION (svusdot_lane, svdotprod_lane_impl, (UNSPEC_USDOT, -1, -1)) | |
2803 FUNCTION (svusmmla, unspec_based_add_function, (UNSPEC_USMATMUL, -1, -1)) | |
2804 FUNCTION (svuzp1, svuzp_impl, (0)) | |
2805 FUNCTION (svuzp1q, unspec_based_function, (UNSPEC_UZP1Q, UNSPEC_UZP1Q, | |
2806 UNSPEC_UZP1Q)) | |
2807 FUNCTION (svuzp2, svuzp_impl, (1)) | |
2808 FUNCTION (svuzp2q, unspec_based_function, (UNSPEC_UZP2Q, UNSPEC_UZP2Q, | |
2809 UNSPEC_UZP2Q)) | |
2810 FUNCTION (svwhilele, svwhilelx_impl, (UNSPEC_WHILELE, UNSPEC_WHILELS, true)) | |
2811 FUNCTION (svwhilelt, svwhilelx_impl, (UNSPEC_WHILELT, UNSPEC_WHILELO, false)) | |
2812 FUNCTION (svwrffr, svwrffr_impl,) | |
2813 FUNCTION (svzip1, svzip_impl, (0)) | |
2814 FUNCTION (svzip1q, unspec_based_function, (UNSPEC_ZIP1Q, UNSPEC_ZIP1Q, | |
2815 UNSPEC_ZIP1Q)) | |
2816 FUNCTION (svzip2, svzip_impl, (1)) | |
2817 FUNCTION (svzip2q, unspec_based_function, (UNSPEC_ZIP2Q, UNSPEC_ZIP2Q, | |
2818 UNSPEC_ZIP2Q)) | |
2819 | |
2820 } /* end namespace aarch64_sve */ |