Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/aarch64/aarch64-builtins.c @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | 84e7813d76e9 |
children |
comparison
equal
deleted
inserted
replaced
131:84e7813d76e9 | 145:1830386684a0 |
---|---|
1 /* Builtins' description for AArch64 SIMD architecture. | 1 /* Builtins' description for AArch64 SIMD architecture. |
2 Copyright (C) 2011-2018 Free Software Foundation, Inc. | 2 Copyright (C) 2011-2020 Free Software Foundation, Inc. |
3 Contributed by ARM Ltd. | 3 Contributed by ARM Ltd. |
4 | 4 |
5 This file is part of GCC. | 5 This file is part of GCC. |
6 | 6 |
7 GCC is free software; you can redistribute it and/or modify it | 7 GCC is free software; you can redistribute it and/or modify it |
40 #include "explow.h" | 40 #include "explow.h" |
41 #include "expr.h" | 41 #include "expr.h" |
42 #include "langhooks.h" | 42 #include "langhooks.h" |
43 #include "gimple-iterator.h" | 43 #include "gimple-iterator.h" |
44 #include "case-cfn-macros.h" | 44 #include "case-cfn-macros.h" |
45 #include "emit-rtl.h" | |
45 | 46 |
46 #define v8qi_UP E_V8QImode | 47 #define v8qi_UP E_V8QImode |
47 #define v4hi_UP E_V4HImode | 48 #define v4hi_UP E_V4HImode |
48 #define v4hf_UP E_V4HFmode | 49 #define v4hf_UP E_V4HFmode |
49 #define v2si_UP E_V2SImode | 50 #define v2si_UP E_V2SImode |
65 #define si_UP E_SImode | 66 #define si_UP E_SImode |
66 #define sf_UP E_SFmode | 67 #define sf_UP E_SFmode |
67 #define hi_UP E_HImode | 68 #define hi_UP E_HImode |
68 #define hf_UP E_HFmode | 69 #define hf_UP E_HFmode |
69 #define qi_UP E_QImode | 70 #define qi_UP E_QImode |
71 #define bf_UP E_BFmode | |
72 #define v4bf_UP E_V4BFmode | |
73 #define v8bf_UP E_V8BFmode | |
70 #define UP(X) X##_UP | 74 #define UP(X) X##_UP |
71 | 75 |
72 #define SIMD_MAX_BUILTIN_ARGS 5 | 76 #define SIMD_MAX_BUILTIN_ARGS 5 |
73 | 77 |
74 enum aarch64_type_qualifiers | 78 enum aarch64_type_qualifiers |
100 /* Polynomial types. */ | 104 /* Polynomial types. */ |
101 qualifier_poly = 0x100, | 105 qualifier_poly = 0x100, |
102 /* Lane indices - must be in range, and flipped for bigendian. */ | 106 /* Lane indices - must be in range, and flipped for bigendian. */ |
103 qualifier_lane_index = 0x200, | 107 qualifier_lane_index = 0x200, |
104 /* Lane indices for single lane structure loads and stores. */ | 108 /* Lane indices for single lane structure loads and stores. */ |
105 qualifier_struct_load_store_lane_index = 0x400 | 109 qualifier_struct_load_store_lane_index = 0x400, |
110 /* Lane indices selected in pairs. - must be in range, and flipped for | |
111 bigendian. */ | |
112 qualifier_lane_pair_index = 0x800, | |
113 /* Lane indices selected in quadtuplets. - must be in range, and flipped for | |
114 bigendian. */ | |
115 qualifier_lane_quadtup_index = 0x1000, | |
106 }; | 116 }; |
107 | 117 |
108 typedef struct | 118 typedef struct |
109 { | 119 { |
110 const char *name; | 120 const char *name; |
167 static enum aarch64_type_qualifiers | 177 static enum aarch64_type_qualifiers |
168 aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] | 178 aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] |
169 = { qualifier_unsigned, qualifier_unsigned, | 179 = { qualifier_unsigned, qualifier_unsigned, |
170 qualifier_unsigned, qualifier_immediate }; | 180 qualifier_unsigned, qualifier_immediate }; |
171 #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers) | 181 #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers) |
172 | 182 static enum aarch64_type_qualifiers |
173 | 183 aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS] |
184 = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none }; | |
185 #define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers) | |
186 | |
187 | |
188 static enum aarch64_type_qualifiers | |
189 aarch64_types_quadop_lane_pair_qualifiers[SIMD_MAX_BUILTIN_ARGS] | |
190 = { qualifier_none, qualifier_none, qualifier_none, | |
191 qualifier_none, qualifier_lane_pair_index }; | |
192 #define TYPES_QUADOP_LANE_PAIR (aarch64_types_quadop_lane_pair_qualifiers) | |
174 static enum aarch64_type_qualifiers | 193 static enum aarch64_type_qualifiers |
175 aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] | 194 aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] |
176 = { qualifier_none, qualifier_none, qualifier_none, | 195 = { qualifier_none, qualifier_none, qualifier_none, |
177 qualifier_none, qualifier_lane_index }; | 196 qualifier_none, qualifier_lane_index }; |
178 #define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers) | 197 #define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers) |
179 static enum aarch64_type_qualifiers | 198 static enum aarch64_type_qualifiers |
180 aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] | 199 aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] |
181 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, | 200 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, |
182 qualifier_unsigned, qualifier_lane_index }; | 201 qualifier_unsigned, qualifier_lane_index }; |
183 #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers) | 202 #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers) |
203 | |
204 static enum aarch64_type_qualifiers | |
205 aarch64_types_quadopssus_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS] | |
206 = { qualifier_none, qualifier_none, qualifier_unsigned, | |
207 qualifier_none, qualifier_lane_quadtup_index }; | |
208 #define TYPES_QUADOPSSUS_LANE_QUADTUP \ | |
209 (aarch64_types_quadopssus_lane_quadtup_qualifiers) | |
210 static enum aarch64_type_qualifiers | |
211 aarch64_types_quadopsssu_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS] | |
212 = { qualifier_none, qualifier_none, qualifier_none, | |
213 qualifier_unsigned, qualifier_lane_quadtup_index }; | |
214 #define TYPES_QUADOPSSSU_LANE_QUADTUP \ | |
215 (aarch64_types_quadopsssu_lane_quadtup_qualifiers) | |
184 | 216 |
185 static enum aarch64_type_qualifiers | 217 static enum aarch64_type_qualifiers |
186 aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] | 218 aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] |
187 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, | 219 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, |
188 qualifier_unsigned, qualifier_immediate }; | 220 qualifier_unsigned, qualifier_immediate }; |
354 CRC32_BUILTIN (crc32cb, QI) \ | 386 CRC32_BUILTIN (crc32cb, QI) \ |
355 CRC32_BUILTIN (crc32ch, HI) \ | 387 CRC32_BUILTIN (crc32ch, HI) \ |
356 CRC32_BUILTIN (crc32cw, SI) \ | 388 CRC32_BUILTIN (crc32cw, SI) \ |
357 CRC32_BUILTIN (crc32cx, DI) | 389 CRC32_BUILTIN (crc32cx, DI) |
358 | 390 |
391 /* The next 8 FCMLA instrinsics require some special handling compared the | |
392 normal simd intrinsics. */ | |
393 #define AARCH64_SIMD_FCMLA_LANEQ_BUILTINS \ | |
394 FCMLA_LANEQ_BUILTIN (0, v2sf, fcmla, V2SF, false) \ | |
395 FCMLA_LANEQ_BUILTIN (90, v2sf, fcmla, V2SF, false) \ | |
396 FCMLA_LANEQ_BUILTIN (180, v2sf, fcmla, V2SF, false) \ | |
397 FCMLA_LANEQ_BUILTIN (270, v2sf, fcmla, V2SF, false) \ | |
398 FCMLA_LANEQ_BUILTIN (0, v4hf, fcmla_laneq, V4HF, true) \ | |
399 FCMLA_LANEQ_BUILTIN (90, v4hf, fcmla_laneq, V4HF, true) \ | |
400 FCMLA_LANEQ_BUILTIN (180, v4hf, fcmla_laneq, V4HF, true) \ | |
401 FCMLA_LANEQ_BUILTIN (270, v4hf, fcmla_laneq, V4HF, true) \ | |
402 | |
359 typedef struct | 403 typedef struct |
360 { | 404 { |
361 const char *name; | 405 const char *name; |
362 machine_mode mode; | 406 machine_mode mode; |
363 const enum insn_code icode; | 407 const enum insn_code icode; |
364 unsigned int fcode; | 408 unsigned int fcode; |
365 } aarch64_crc_builtin_datum; | 409 } aarch64_crc_builtin_datum; |
366 | 410 |
411 /* Hold information about how to expand the FCMLA_LANEQ builtins. */ | |
412 typedef struct | |
413 { | |
414 const char *name; | |
415 machine_mode mode; | |
416 const enum insn_code icode; | |
417 unsigned int fcode; | |
418 bool lane; | |
419 } aarch64_fcmla_laneq_builtin_datum; | |
420 | |
367 #define CRC32_BUILTIN(N, M) \ | 421 #define CRC32_BUILTIN(N, M) \ |
368 AARCH64_BUILTIN_##N, | 422 AARCH64_BUILTIN_##N, |
423 | |
424 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \ | |
425 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M, | |
369 | 426 |
370 #undef VAR1 | 427 #undef VAR1 |
371 #define VAR1(T, N, MAP, A) \ | 428 #define VAR1(T, N, MAP, A) \ |
372 AARCH64_SIMD_BUILTIN_##T##_##N##A, | 429 AARCH64_SIMD_BUILTIN_##T##_##N##A, |
373 | 430 |
396 AARCH64_CRC32_BUILTINS | 453 AARCH64_CRC32_BUILTINS |
397 AARCH64_CRC32_BUILTIN_MAX, | 454 AARCH64_CRC32_BUILTIN_MAX, |
398 /* ARMv8.3-A Pointer Authentication Builtins. */ | 455 /* ARMv8.3-A Pointer Authentication Builtins. */ |
399 AARCH64_PAUTH_BUILTIN_AUTIA1716, | 456 AARCH64_PAUTH_BUILTIN_AUTIA1716, |
400 AARCH64_PAUTH_BUILTIN_PACIA1716, | 457 AARCH64_PAUTH_BUILTIN_PACIA1716, |
458 AARCH64_PAUTH_BUILTIN_AUTIB1716, | |
459 AARCH64_PAUTH_BUILTIN_PACIB1716, | |
401 AARCH64_PAUTH_BUILTIN_XPACLRI, | 460 AARCH64_PAUTH_BUILTIN_XPACLRI, |
461 /* Special cased Armv8.3-A Complex FMA by Lane quad Builtins. */ | |
462 AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE, | |
463 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS | |
464 /* Builtin for Arm8.3-a Javascript conversion instruction. */ | |
465 AARCH64_JSCVT, | |
466 /* TME builtins. */ | |
467 AARCH64_TME_BUILTIN_TSTART, | |
468 AARCH64_TME_BUILTIN_TCOMMIT, | |
469 AARCH64_TME_BUILTIN_TTEST, | |
470 AARCH64_TME_BUILTIN_TCANCEL, | |
471 /* Armv8.5-a RNG instruction builtins. */ | |
472 AARCH64_BUILTIN_RNG_RNDR, | |
473 AARCH64_BUILTIN_RNG_RNDRRS, | |
474 /* MEMTAG builtins. */ | |
475 AARCH64_MEMTAG_BUILTIN_START, | |
476 AARCH64_MEMTAG_BUILTIN_IRG, | |
477 AARCH64_MEMTAG_BUILTIN_GMI, | |
478 AARCH64_MEMTAG_BUILTIN_SUBP, | |
479 AARCH64_MEMTAG_BUILTIN_INC_TAG, | |
480 AARCH64_MEMTAG_BUILTIN_SET_TAG, | |
481 AARCH64_MEMTAG_BUILTIN_GET_TAG, | |
482 AARCH64_MEMTAG_BUILTIN_END, | |
402 AARCH64_BUILTIN_MAX | 483 AARCH64_BUILTIN_MAX |
403 }; | 484 }; |
404 | 485 |
405 #undef CRC32_BUILTIN | 486 #undef CRC32_BUILTIN |
406 #define CRC32_BUILTIN(N, M) \ | 487 #define CRC32_BUILTIN(N, M) \ |
407 {"__builtin_aarch64_"#N, E_##M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N}, | 488 {"__builtin_aarch64_"#N, E_##M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N}, |
408 | 489 |
409 static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = { | 490 static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = { |
410 AARCH64_CRC32_BUILTINS | 491 AARCH64_CRC32_BUILTINS |
492 }; | |
493 | |
494 | |
495 #undef FCMLA_LANEQ_BUILTIN | |
496 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \ | |
497 {"__builtin_aarch64_fcmla_laneq"#I#N, E_##M##mode, CODE_FOR_aarch64_##X##I##N, \ | |
498 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M, T}, | |
499 | |
500 /* This structure contains how to manage the mapping form the builtin to the | |
501 instruction to generate in the backend and how to invoke the instruction. */ | |
502 static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = { | |
503 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS | |
411 }; | 504 }; |
412 | 505 |
413 #undef CRC32_BUILTIN | 506 #undef CRC32_BUILTIN |
414 | 507 |
415 static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX]; | 508 static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX]; |
496 /* The user-visible __fp16 type, and a pointer to that type. Used | 589 /* The user-visible __fp16 type, and a pointer to that type. Used |
497 across the back-end. */ | 590 across the back-end. */ |
498 tree aarch64_fp16_type_node = NULL_TREE; | 591 tree aarch64_fp16_type_node = NULL_TREE; |
499 tree aarch64_fp16_ptr_type_node = NULL_TREE; | 592 tree aarch64_fp16_ptr_type_node = NULL_TREE; |
500 | 593 |
594 /* Back-end node type for brain float (bfloat) types. */ | |
595 tree aarch64_bf16_type_node = NULL_TREE; | |
596 tree aarch64_bf16_ptr_type_node = NULL_TREE; | |
597 | |
598 /* Wrapper around add_builtin_function. NAME is the name of the built-in | |
599 function, TYPE is the function type, and CODE is the function subcode | |
600 (relative to AARCH64_BUILTIN_GENERAL). */ | |
601 static tree | |
602 aarch64_general_add_builtin (const char *name, tree type, unsigned int code) | |
603 { | |
604 code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL; | |
605 return add_builtin_function (name, type, code, BUILT_IN_MD, | |
606 NULL, NULL_TREE); | |
607 } | |
608 | |
501 static const char * | 609 static const char * |
502 aarch64_mangle_builtin_scalar_type (const_tree type) | 610 aarch64_mangle_builtin_scalar_type (const_tree type) |
503 { | 611 { |
504 int i = 0; | 612 int i = 0; |
505 | 613 |
534 | 642 |
535 return NULL; | 643 return NULL; |
536 } | 644 } |
537 | 645 |
538 const char * | 646 const char * |
539 aarch64_mangle_builtin_type (const_tree type) | 647 aarch64_general_mangle_builtin_type (const_tree type) |
540 { | 648 { |
541 const char *mangle; | 649 const char *mangle; |
542 /* Walk through all the AArch64 builtins types tables to filter out the | 650 /* Walk through all the AArch64 builtins types tables to filter out the |
543 incoming type. */ | 651 incoming type. */ |
544 if ((mangle = aarch64_mangle_builtin_vector_type (type)) | 652 if ((mangle = aarch64_mangle_builtin_vector_type (type)) |
576 return aarch64_fp16_type_node; | 684 return aarch64_fp16_type_node; |
577 case E_SFmode: | 685 case E_SFmode: |
578 return float_type_node; | 686 return float_type_node; |
579 case E_DFmode: | 687 case E_DFmode: |
580 return double_type_node; | 688 return double_type_node; |
689 case E_BFmode: | |
690 return aarch64_bf16_type_node; | |
581 default: | 691 default: |
582 gcc_unreachable (); | 692 gcc_unreachable (); |
583 } | 693 } |
584 #undef QUAL_TYPE | 694 #undef QUAL_TYPE |
585 } | 695 } |
666 aarch64_simd_types[Float16x8_t].eltype = aarch64_fp16_type_node; | 776 aarch64_simd_types[Float16x8_t].eltype = aarch64_fp16_type_node; |
667 aarch64_simd_types[Float32x2_t].eltype = float_type_node; | 777 aarch64_simd_types[Float32x2_t].eltype = float_type_node; |
668 aarch64_simd_types[Float32x4_t].eltype = float_type_node; | 778 aarch64_simd_types[Float32x4_t].eltype = float_type_node; |
669 aarch64_simd_types[Float64x1_t].eltype = double_type_node; | 779 aarch64_simd_types[Float64x1_t].eltype = double_type_node; |
670 aarch64_simd_types[Float64x2_t].eltype = double_type_node; | 780 aarch64_simd_types[Float64x2_t].eltype = double_type_node; |
781 | |
782 /* Init Bfloat vector types with underlying __bf16 type. */ | |
783 aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node; | |
784 aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node; | |
671 | 785 |
672 for (i = 0; i < nelts; i++) | 786 for (i = 0; i < nelts; i++) |
673 { | 787 { |
674 tree eltype = aarch64_simd_types[i].eltype; | 788 tree eltype = aarch64_simd_types[i].eltype; |
675 machine_mode mode = aarch64_simd_types[i].mode; | 789 machine_mode mode = aarch64_simd_types[i].mode; |
744 "__builtin_aarch64_simd_udi"); | 858 "__builtin_aarch64_simd_udi"); |
745 } | 859 } |
746 | 860 |
747 static bool aarch64_simd_builtins_initialized_p = false; | 861 static bool aarch64_simd_builtins_initialized_p = false; |
748 | 862 |
863 /* Due to the architecture not providing lane variant of the lane instructions | |
864 for fcmla we can't use the standard simd builtin expansion code, but we | |
865 still want the majority of the validation that would normally be done. */ | |
866 | |
867 void | |
868 aarch64_init_fcmla_laneq_builtins (void) | |
869 { | |
870 unsigned int i = 0; | |
871 | |
872 for (i = 0; i < ARRAY_SIZE (aarch64_fcmla_lane_builtin_data); ++i) | |
873 { | |
874 aarch64_fcmla_laneq_builtin_datum* d | |
875 = &aarch64_fcmla_lane_builtin_data[i]; | |
876 tree argtype = aarch64_lookup_simd_builtin_type (d->mode, qualifier_none); | |
877 machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require (); | |
878 tree quadtype | |
879 = aarch64_lookup_simd_builtin_type (quadmode, qualifier_none); | |
880 tree lanetype | |
881 = aarch64_simd_builtin_std_type (SImode, qualifier_lane_pair_index); | |
882 tree ftype = build_function_type_list (argtype, argtype, argtype, | |
883 quadtype, lanetype, NULL_TREE); | |
884 tree fndecl = aarch64_general_add_builtin (d->name, ftype, d->fcode); | |
885 | |
886 aarch64_builtin_decls[d->fcode] = fndecl; | |
887 } | |
888 } | |
889 | |
749 void | 890 void |
750 aarch64_init_simd_builtins (void) | 891 aarch64_init_simd_builtins (void) |
751 { | 892 { |
752 unsigned int i, fcode = AARCH64_SIMD_PATTERN_START; | 893 unsigned int i, fcode = AARCH64_SIMD_PATTERN_START; |
753 | 894 |
767 tree lane_check_fpr = build_function_type_list (void_type_node, | 908 tree lane_check_fpr = build_function_type_list (void_type_node, |
768 size_type_node, | 909 size_type_node, |
769 size_type_node, | 910 size_type_node, |
770 intSI_type_node, | 911 intSI_type_node, |
771 NULL); | 912 NULL); |
772 aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK] = | 913 aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK] |
773 add_builtin_function ("__builtin_aarch64_im_lane_boundsi", lane_check_fpr, | 914 = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi", |
774 AARCH64_SIMD_BUILTIN_LANE_CHECK, BUILT_IN_MD, | 915 lane_check_fpr, |
775 NULL, NULL_TREE); | 916 AARCH64_SIMD_BUILTIN_LANE_CHECK); |
776 | 917 |
777 for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++) | 918 for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++) |
778 { | 919 { |
779 bool print_type_signature_p = false; | 920 bool print_type_signature_p = false; |
780 char type_signature[SIMD_MAX_BUILTIN_ARGS + 1] = { 0 }; | 921 char type_signature[SIMD_MAX_BUILTIN_ARGS + 1] = { 0 }; |
868 d->name, type_signature); | 1009 d->name, type_signature); |
869 else | 1010 else |
870 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s", | 1011 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s", |
871 d->name); | 1012 d->name); |
872 | 1013 |
873 fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, | 1014 fndecl = aarch64_general_add_builtin (namebuf, ftype, fcode); |
874 NULL, NULL_TREE); | |
875 aarch64_builtin_decls[fcode] = fndecl; | 1015 aarch64_builtin_decls[fcode] = fndecl; |
876 } | 1016 } |
1017 | |
1018 /* Initialize the remaining fcmla_laneq intrinsics. */ | |
1019 aarch64_init_fcmla_laneq_builtins (); | |
877 } | 1020 } |
878 | 1021 |
879 static void | 1022 static void |
880 aarch64_init_crc32_builtins () | 1023 aarch64_init_crc32_builtins () |
881 { | 1024 { |
886 { | 1029 { |
887 aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i]; | 1030 aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i]; |
888 tree argtype = aarch64_simd_builtin_std_type (d->mode, | 1031 tree argtype = aarch64_simd_builtin_std_type (d->mode, |
889 qualifier_unsigned); | 1032 qualifier_unsigned); |
890 tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE); | 1033 tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE); |
891 tree fndecl = add_builtin_function (d->name, ftype, d->fcode, | 1034 tree fndecl = aarch64_general_add_builtin (d->name, ftype, d->fcode); |
892 BUILT_IN_MD, NULL, NULL_TREE); | |
893 | 1035 |
894 aarch64_builtin_decls[d->fcode] = fndecl; | 1036 aarch64_builtin_decls[d->fcode] = fndecl; |
895 } | 1037 } |
896 } | 1038 } |
897 | 1039 |
927 builtin_decls_data *bdd_end = bdd + (sizeof (bdda) / sizeof (builtin_decls_data)); | 1069 builtin_decls_data *bdd_end = bdd + (sizeof (bdda) / sizeof (builtin_decls_data)); |
928 | 1070 |
929 for (; bdd < bdd_end; bdd++) | 1071 for (; bdd < bdd_end; bdd++) |
930 { | 1072 { |
931 ftype = build_function_type_list (bdd->type_node, bdd->type_node, NULL_TREE); | 1073 ftype = build_function_type_list (bdd->type_node, bdd->type_node, NULL_TREE); |
932 fndecl = add_builtin_function (bdd->builtin_name, | 1074 fndecl = aarch64_general_add_builtin (bdd->builtin_name, |
933 ftype, bdd->function_code, BUILT_IN_MD, NULL, NULL_TREE); | 1075 ftype, bdd->function_code); |
934 aarch64_builtin_decls[bdd->function_code] = fndecl; | 1076 aarch64_builtin_decls[bdd->function_code] = fndecl; |
935 } | 1077 } |
936 } | 1078 } |
937 | 1079 |
938 /* Initialize the backend types that support the user-visible __fp16 | 1080 /* Initialize the backend types that support the user-visible __fp16 |
946 TYPE_PRECISION (aarch64_fp16_type_node) = 16; | 1088 TYPE_PRECISION (aarch64_fp16_type_node) = 16; |
947 layout_type (aarch64_fp16_type_node); | 1089 layout_type (aarch64_fp16_type_node); |
948 | 1090 |
949 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16"); | 1091 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16"); |
950 aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node); | 1092 aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node); |
1093 } | |
1094 | |
1095 /* Initialize the backend REAL_TYPE type supporting bfloat types. */ | |
1096 static void | |
1097 aarch64_init_bf16_types (void) | |
1098 { | |
1099 aarch64_bf16_type_node = make_node (REAL_TYPE); | |
1100 TYPE_PRECISION (aarch64_bf16_type_node) = 16; | |
1101 SET_TYPE_MODE (aarch64_bf16_type_node, BFmode); | |
1102 layout_type (aarch64_bf16_type_node); | |
1103 | |
1104 lang_hooks.types.register_builtin_type (aarch64_bf16_type_node, "__bf16"); | |
1105 aarch64_bf16_ptr_type_node = build_pointer_type (aarch64_bf16_type_node); | |
951 } | 1106 } |
952 | 1107 |
953 /* Pointer authentication builtins that will become NOP on legacy platform. | 1108 /* Pointer authentication builtins that will become NOP on legacy platform. |
954 Currently, these builtins are for internal use only (libgcc EH unwinder). */ | 1109 Currently, these builtins are for internal use only (libgcc EH unwinder). */ |
955 | 1110 |
962 unsigned_intDI_type_node, NULL_TREE); | 1117 unsigned_intDI_type_node, NULL_TREE); |
963 tree ftype_pointer_strip | 1118 tree ftype_pointer_strip |
964 = build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE); | 1119 = build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE); |
965 | 1120 |
966 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIA1716] | 1121 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIA1716] |
967 = add_builtin_function ("__builtin_aarch64_autia1716", ftype_pointer_auth, | 1122 = aarch64_general_add_builtin ("__builtin_aarch64_autia1716", |
968 AARCH64_PAUTH_BUILTIN_AUTIA1716, BUILT_IN_MD, NULL, | 1123 ftype_pointer_auth, |
969 NULL_TREE); | 1124 AARCH64_PAUTH_BUILTIN_AUTIA1716); |
970 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIA1716] | 1125 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIA1716] |
971 = add_builtin_function ("__builtin_aarch64_pacia1716", ftype_pointer_auth, | 1126 = aarch64_general_add_builtin ("__builtin_aarch64_pacia1716", |
972 AARCH64_PAUTH_BUILTIN_PACIA1716, BUILT_IN_MD, NULL, | 1127 ftype_pointer_auth, |
973 NULL_TREE); | 1128 AARCH64_PAUTH_BUILTIN_PACIA1716); |
1129 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIB1716] | |
1130 = aarch64_general_add_builtin ("__builtin_aarch64_autib1716", | |
1131 ftype_pointer_auth, | |
1132 AARCH64_PAUTH_BUILTIN_AUTIB1716); | |
1133 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIB1716] | |
1134 = aarch64_general_add_builtin ("__builtin_aarch64_pacib1716", | |
1135 ftype_pointer_auth, | |
1136 AARCH64_PAUTH_BUILTIN_PACIB1716); | |
974 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_XPACLRI] | 1137 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_XPACLRI] |
975 = add_builtin_function ("__builtin_aarch64_xpaclri", ftype_pointer_strip, | 1138 = aarch64_general_add_builtin ("__builtin_aarch64_xpaclri", |
976 AARCH64_PAUTH_BUILTIN_XPACLRI, BUILT_IN_MD, NULL, | 1139 ftype_pointer_strip, |
977 NULL_TREE); | 1140 AARCH64_PAUTH_BUILTIN_XPACLRI); |
978 } | 1141 } |
1142 | |
1143 /* Initialize the transactional memory extension (TME) builtins. */ | |
1144 static void | |
1145 aarch64_init_tme_builtins (void) | |
1146 { | |
1147 tree ftype_uint64_void | |
1148 = build_function_type_list (uint64_type_node, NULL); | |
1149 tree ftype_void_void | |
1150 = build_function_type_list (void_type_node, NULL); | |
1151 tree ftype_void_uint64 | |
1152 = build_function_type_list (void_type_node, uint64_type_node, NULL); | |
1153 | |
1154 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TSTART] | |
1155 = aarch64_general_add_builtin ("__builtin_aarch64_tstart", | |
1156 ftype_uint64_void, | |
1157 AARCH64_TME_BUILTIN_TSTART); | |
1158 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TTEST] | |
1159 = aarch64_general_add_builtin ("__builtin_aarch64_ttest", | |
1160 ftype_uint64_void, | |
1161 AARCH64_TME_BUILTIN_TTEST); | |
1162 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCOMMIT] | |
1163 = aarch64_general_add_builtin ("__builtin_aarch64_tcommit", | |
1164 ftype_void_void, | |
1165 AARCH64_TME_BUILTIN_TCOMMIT); | |
1166 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCANCEL] | |
1167 = aarch64_general_add_builtin ("__builtin_aarch64_tcancel", | |
1168 ftype_void_uint64, | |
1169 AARCH64_TME_BUILTIN_TCANCEL); | |
1170 } | |
1171 | |
1172 /* Add builtins for Random Number instructions. */ | |
1173 | |
1174 static void | |
1175 aarch64_init_rng_builtins (void) | |
1176 { | |
1177 tree unsigned_ptr_type = build_pointer_type (unsigned_intDI_type_node); | |
1178 tree ftype | |
1179 = build_function_type_list (integer_type_node, unsigned_ptr_type, NULL); | |
1180 aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDR] | |
1181 = aarch64_general_add_builtin ("__builtin_aarch64_rndr", ftype, | |
1182 AARCH64_BUILTIN_RNG_RNDR); | |
1183 aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDRRS] | |
1184 = aarch64_general_add_builtin ("__builtin_aarch64_rndrrs", ftype, | |
1185 AARCH64_BUILTIN_RNG_RNDRRS); | |
1186 } | |
1187 | |
1188 /* Initialize the memory tagging extension (MTE) builtins. */ | |
1189 struct | |
1190 { | |
1191 tree ftype; | |
1192 enum insn_code icode; | |
1193 } aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_END - | |
1194 AARCH64_MEMTAG_BUILTIN_START - 1]; | |
1195 | |
1196 static void | |
1197 aarch64_init_memtag_builtins (void) | |
1198 { | |
1199 tree fntype = NULL; | |
1200 | |
1201 #define AARCH64_INIT_MEMTAG_BUILTINS_DECL(F, N, I, T) \ | |
1202 aarch64_builtin_decls[AARCH64_MEMTAG_BUILTIN_##F] \ | |
1203 = aarch64_general_add_builtin ("__builtin_aarch64_memtag_"#N, \ | |
1204 T, AARCH64_MEMTAG_BUILTIN_##F); \ | |
1205 aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_##F - \ | |
1206 AARCH64_MEMTAG_BUILTIN_START - 1] = \ | |
1207 {T, CODE_FOR_##I}; | |
1208 | |
1209 fntype = build_function_type_list (ptr_type_node, ptr_type_node, | |
1210 uint64_type_node, NULL); | |
1211 AARCH64_INIT_MEMTAG_BUILTINS_DECL (IRG, irg, irg, fntype); | |
1212 | |
1213 fntype = build_function_type_list (uint64_type_node, ptr_type_node, | |
1214 uint64_type_node, NULL); | |
1215 AARCH64_INIT_MEMTAG_BUILTINS_DECL (GMI, gmi, gmi, fntype); | |
1216 | |
1217 fntype = build_function_type_list (ptrdiff_type_node, ptr_type_node, | |
1218 ptr_type_node, NULL); | |
1219 AARCH64_INIT_MEMTAG_BUILTINS_DECL (SUBP, subp, subp, fntype); | |
1220 | |
1221 fntype = build_function_type_list (ptr_type_node, ptr_type_node, | |
1222 unsigned_type_node, NULL); | |
1223 AARCH64_INIT_MEMTAG_BUILTINS_DECL (INC_TAG, inc_tag, addg, fntype); | |
1224 | |
1225 fntype = build_function_type_list (void_type_node, ptr_type_node, NULL); | |
1226 AARCH64_INIT_MEMTAG_BUILTINS_DECL (SET_TAG, set_tag, stg, fntype); | |
1227 | |
1228 fntype = build_function_type_list (ptr_type_node, ptr_type_node, NULL); | |
1229 AARCH64_INIT_MEMTAG_BUILTINS_DECL (GET_TAG, get_tag, ldg, fntype); | |
1230 | |
1231 #undef AARCH64_INIT_MEMTAG_BUILTINS_DECL | |
1232 } | |
1233 | |
1234 /* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group. */ | |
979 | 1235 |
980 void | 1236 void |
981 aarch64_init_builtins (void) | 1237 aarch64_general_init_builtins (void) |
982 { | 1238 { |
983 tree ftype_set_fpr | 1239 tree ftype_set_fpr |
984 = build_function_type_list (void_type_node, unsigned_type_node, NULL); | 1240 = build_function_type_list (void_type_node, unsigned_type_node, NULL); |
985 tree ftype_get_fpr | 1241 tree ftype_get_fpr |
986 = build_function_type_list (unsigned_type_node, NULL); | 1242 = build_function_type_list (unsigned_type_node, NULL); |
987 | 1243 |
988 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR] | 1244 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR] |
989 = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr, | 1245 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr", |
990 AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); | 1246 ftype_get_fpr, |
1247 AARCH64_BUILTIN_GET_FPCR); | |
991 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR] | 1248 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR] |
992 = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr, | 1249 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr", |
993 AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); | 1250 ftype_set_fpr, |
1251 AARCH64_BUILTIN_SET_FPCR); | |
994 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR] | 1252 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR] |
995 = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr, | 1253 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr", |
996 AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); | 1254 ftype_get_fpr, |
1255 AARCH64_BUILTIN_GET_FPSR); | |
997 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR] | 1256 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR] |
998 = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr, | 1257 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr", |
999 AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); | 1258 ftype_set_fpr, |
1259 AARCH64_BUILTIN_SET_FPSR); | |
1000 | 1260 |
1001 aarch64_init_fp16_types (); | 1261 aarch64_init_fp16_types (); |
1262 | |
1263 aarch64_init_bf16_types (); | |
1002 | 1264 |
1003 if (TARGET_SIMD) | 1265 if (TARGET_SIMD) |
1004 aarch64_init_simd_builtins (); | 1266 aarch64_init_simd_builtins (); |
1005 | 1267 |
1006 aarch64_init_crc32_builtins (); | 1268 aarch64_init_crc32_builtins (); |
1007 aarch64_init_builtin_rsqrt (); | 1269 aarch64_init_builtin_rsqrt (); |
1270 aarch64_init_rng_builtins (); | |
1271 | |
1272 tree ftype_jcvt | |
1273 = build_function_type_list (intSI_type_node, double_type_node, NULL); | |
1274 aarch64_builtin_decls[AARCH64_JSCVT] | |
1275 = aarch64_general_add_builtin ("__builtin_aarch64_jcvtzs", ftype_jcvt, | |
1276 AARCH64_JSCVT); | |
1008 | 1277 |
1009 /* Initialize pointer authentication builtins which are backed by instructions | 1278 /* Initialize pointer authentication builtins which are backed by instructions |
1010 in NOP encoding space. | 1279 in NOP encoding space. |
1011 | 1280 |
1012 NOTE: these builtins are supposed to be used by libgcc unwinder only, as | 1281 NOTE: these builtins are supposed to be used by libgcc unwinder only, as |
1013 there is no support on return address signing under ILP32, we don't | 1282 there is no support on return address signing under ILP32, we don't |
1014 register them. */ | 1283 register them. */ |
1015 if (!TARGET_ILP32) | 1284 if (!TARGET_ILP32) |
1016 aarch64_init_pauth_hint_builtins (); | 1285 aarch64_init_pauth_hint_builtins (); |
1017 } | 1286 |
1018 | 1287 if (TARGET_TME) |
1288 aarch64_init_tme_builtins (); | |
1289 | |
1290 if (TARGET_MEMTAG) | |
1291 aarch64_init_memtag_builtins (); | |
1292 } | |
1293 | |
1294 /* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group. */ | |
1019 tree | 1295 tree |
1020 aarch64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) | 1296 aarch64_general_builtin_decl (unsigned code, bool) |
1021 { | 1297 { |
1022 if (code >= AARCH64_BUILTIN_MAX) | 1298 if (code >= AARCH64_BUILTIN_MAX) |
1023 return error_mark_node; | 1299 return error_mark_node; |
1024 | 1300 |
1025 return aarch64_builtin_decls[code]; | 1301 return aarch64_builtin_decls[code]; |
1029 { | 1305 { |
1030 SIMD_ARG_COPY_TO_REG, | 1306 SIMD_ARG_COPY_TO_REG, |
1031 SIMD_ARG_CONSTANT, | 1307 SIMD_ARG_CONSTANT, |
1032 SIMD_ARG_LANE_INDEX, | 1308 SIMD_ARG_LANE_INDEX, |
1033 SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX, | 1309 SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX, |
1310 SIMD_ARG_LANE_PAIR_INDEX, | |
1311 SIMD_ARG_LANE_QUADTUP_INDEX, | |
1034 SIMD_ARG_STOP | 1312 SIMD_ARG_STOP |
1035 } builtin_simd_arg; | 1313 } builtin_simd_arg; |
1036 | 1314 |
1037 | 1315 |
1038 static rtx | 1316 static rtx |
1100 = GET_MODE_NUNITS (vmode).to_constant (); | 1378 = GET_MODE_NUNITS (vmode).to_constant (); |
1101 aarch64_simd_lane_bounds (op[opc], 0, nunits, exp); | 1379 aarch64_simd_lane_bounds (op[opc], 0, nunits, exp); |
1102 /* Keep to GCC-vector-extension lane indices in the RTL. */ | 1380 /* Keep to GCC-vector-extension lane indices in the RTL. */ |
1103 op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc])); | 1381 op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc])); |
1104 } | 1382 } |
1105 /* Fall through - if the lane index isn't a constant then | 1383 /* If the lane index isn't a constant then error out. */ |
1106 the next case will error. */ | 1384 goto constant_arg; |
1107 /* FALLTHRU */ | 1385 |
1386 case SIMD_ARG_LANE_PAIR_INDEX: | |
1387 /* Must be a previous operand into which this is an index and | |
1388 index is restricted to nunits / 2. */ | |
1389 gcc_assert (opc > 0); | |
1390 if (CONST_INT_P (op[opc])) | |
1391 { | |
1392 machine_mode vmode = insn_data[icode].operand[opc - 1].mode; | |
1393 unsigned int nunits | |
1394 = GET_MODE_NUNITS (vmode).to_constant (); | |
1395 aarch64_simd_lane_bounds (op[opc], 0, nunits / 2, exp); | |
1396 /* Keep to GCC-vector-extension lane indices in the RTL. */ | |
1397 int lane = INTVAL (op[opc]); | |
1398 op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), | |
1399 SImode); | |
1400 } | |
1401 /* If the lane index isn't a constant then error out. */ | |
1402 goto constant_arg; | |
1403 case SIMD_ARG_LANE_QUADTUP_INDEX: | |
1404 /* Must be a previous operand into which this is an index and | |
1405 index is restricted to nunits / 4. */ | |
1406 gcc_assert (opc > 0); | |
1407 if (CONST_INT_P (op[opc])) | |
1408 { | |
1409 machine_mode vmode = insn_data[icode].operand[opc - 1].mode; | |
1410 unsigned int nunits | |
1411 = GET_MODE_NUNITS (vmode).to_constant (); | |
1412 aarch64_simd_lane_bounds (op[opc], 0, nunits / 4, exp); | |
1413 /* Keep to GCC-vector-extension lane indices in the RTL. */ | |
1414 int lane = INTVAL (op[opc]); | |
1415 op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), | |
1416 SImode); | |
1417 } | |
1418 /* If the lane index isn't a constant then error out. */ | |
1419 goto constant_arg; | |
1108 case SIMD_ARG_CONSTANT: | 1420 case SIMD_ARG_CONSTANT: |
1109 constant_arg: | 1421 constant_arg: |
1110 if (!(*insn_data[icode].operand[opc].predicate) | 1422 if (!(*insn_data[icode].operand[opc].predicate) |
1111 (op[opc], mode)) | 1423 (op[opc], mode)) |
1112 { | 1424 { |
1213 int operands_k = k - is_void; | 1525 int operands_k = k - is_void; |
1214 int expr_args_k = k - 1; | 1526 int expr_args_k = k - 1; |
1215 | 1527 |
1216 if (d->qualifiers[qualifiers_k] & qualifier_lane_index) | 1528 if (d->qualifiers[qualifiers_k] & qualifier_lane_index) |
1217 args[k] = SIMD_ARG_LANE_INDEX; | 1529 args[k] = SIMD_ARG_LANE_INDEX; |
1530 else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index) | |
1531 args[k] = SIMD_ARG_LANE_PAIR_INDEX; | |
1532 else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index) | |
1533 args[k] = SIMD_ARG_LANE_QUADTUP_INDEX; | |
1218 else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index) | 1534 else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index) |
1219 args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX; | 1535 args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX; |
1220 else if (d->qualifiers[qualifiers_k] & qualifier_immediate) | 1536 else if (d->qualifiers[qualifiers_k] & qualifier_immediate) |
1221 args[k] = SIMD_ARG_CONSTANT; | 1537 args[k] = SIMD_ARG_CONSTANT; |
1222 else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate) | 1538 else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate) |
1315 emit_insn (gen (target, op0)); | 1631 emit_insn (gen (target, op0)); |
1316 | 1632 |
1317 return target; | 1633 return target; |
1318 } | 1634 } |
1319 | 1635 |
1320 /* Expand an expression EXP that calls a built-in function, | 1636 /* Expand a FCMLA lane expression EXP with code FCODE and |
1321 with result going to TARGET if that's convenient. */ | 1637 result going to TARGET if that is convenient. */ |
1638 | |
1322 rtx | 1639 rtx |
1323 aarch64_expand_builtin (tree exp, | 1640 aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode) |
1324 rtx target, | 1641 { |
1325 rtx subtarget ATTRIBUTE_UNUSED, | 1642 int bcode = fcode - AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE - 1; |
1326 machine_mode mode ATTRIBUTE_UNUSED, | 1643 aarch64_fcmla_laneq_builtin_datum* d |
1327 int ignore ATTRIBUTE_UNUSED) | 1644 = &aarch64_fcmla_lane_builtin_data[bcode]; |
1328 { | 1645 machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require (); |
1329 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | 1646 rtx op0 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 0))); |
1330 int fcode = DECL_FUNCTION_CODE (fndecl); | 1647 rtx op1 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 1))); |
1648 rtx op2 = force_reg (quadmode, expand_normal (CALL_EXPR_ARG (exp, 2))); | |
1649 tree tmp = CALL_EXPR_ARG (exp, 3); | |
1650 rtx lane_idx = expand_expr (tmp, NULL_RTX, VOIDmode, EXPAND_INITIALIZER); | |
1651 | |
1652 /* Validate that the lane index is a constant. */ | |
1653 if (!CONST_INT_P (lane_idx)) | |
1654 { | |
1655 error ("%Kargument %d must be a constant immediate", exp, 4); | |
1656 return const0_rtx; | |
1657 } | |
1658 | |
1659 /* Validate that the index is within the expected range. */ | |
1660 int nunits = GET_MODE_NUNITS (quadmode).to_constant (); | |
1661 aarch64_simd_lane_bounds (lane_idx, 0, nunits / 2, exp); | |
1662 | |
1663 /* Generate the correct register and mode. */ | |
1664 int lane = INTVAL (lane_idx); | |
1665 | |
1666 if (lane < nunits / 4) | |
1667 op2 = simplify_gen_subreg (d->mode, op2, quadmode, | |
1668 subreg_lowpart_offset (d->mode, quadmode)); | |
1669 else | |
1670 { | |
1671 /* Select the upper 64 bits, either a V2SF or V4HF, this however | |
1672 is quite messy, as the operation required even though simple | |
1673 doesn't have a simple RTL pattern, and seems it's quite hard to | |
1674 define using a single RTL pattern. The target generic version | |
1675 gen_highpart_mode generates code that isn't optimal. */ | |
1676 rtx temp1 = gen_reg_rtx (d->mode); | |
1677 rtx temp2 = gen_reg_rtx (DImode); | |
1678 temp1 = simplify_gen_subreg (d->mode, op2, quadmode, | |
1679 subreg_lowpart_offset (d->mode, quadmode)); | |
1680 temp1 = simplify_gen_subreg (V2DImode, temp1, d->mode, 0); | |
1681 if (BYTES_BIG_ENDIAN) | |
1682 emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const0_rtx)); | |
1683 else | |
1684 emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const1_rtx)); | |
1685 op2 = simplify_gen_subreg (d->mode, temp2, GET_MODE (temp2), 0); | |
1686 | |
1687 /* And recalculate the index. */ | |
1688 lane -= nunits / 4; | |
1689 } | |
1690 | |
1691 /* Keep to GCC-vector-extension lane indices in the RTL, only nunits / 4 | |
1692 (max nunits in range check) are valid. Which means only 0-1, so we | |
1693 only need to know the order in a V2mode. */ | |
1694 lane_idx = aarch64_endian_lane_rtx (V2DImode, lane); | |
1695 | |
1696 if (!target) | |
1697 target = gen_reg_rtx (d->mode); | |
1698 else | |
1699 target = force_reg (d->mode, target); | |
1700 | |
1701 rtx pat = NULL_RTX; | |
1702 | |
1703 if (d->lane) | |
1704 pat = GEN_FCN (d->icode) (target, op0, op1, op2, lane_idx); | |
1705 else | |
1706 pat = GEN_FCN (d->icode) (target, op0, op1, op2); | |
1707 | |
1708 if (!pat) | |
1709 return NULL_RTX; | |
1710 | |
1711 emit_insn (pat); | |
1712 return target; | |
1713 } | |
1714 | |
1715 /* Function to expand an expression EXP which calls one of the Transactional | |
1716 Memory Extension (TME) builtins FCODE with the result going to TARGET. */ | |
1717 static rtx | |
1718 aarch64_expand_builtin_tme (int fcode, tree exp, rtx target) | |
1719 { | |
1720 switch (fcode) | |
1721 { | |
1722 case AARCH64_TME_BUILTIN_TSTART: | |
1723 target = gen_reg_rtx (DImode); | |
1724 emit_insn (GEN_FCN (CODE_FOR_tstart) (target)); | |
1725 break; | |
1726 | |
1727 case AARCH64_TME_BUILTIN_TTEST: | |
1728 target = gen_reg_rtx (DImode); | |
1729 emit_insn (GEN_FCN (CODE_FOR_ttest) (target)); | |
1730 break; | |
1731 | |
1732 case AARCH64_TME_BUILTIN_TCOMMIT: | |
1733 emit_insn (GEN_FCN (CODE_FOR_tcommit) ()); | |
1734 break; | |
1735 | |
1736 case AARCH64_TME_BUILTIN_TCANCEL: | |
1737 { | |
1738 tree arg0 = CALL_EXPR_ARG (exp, 0); | |
1739 rtx op0 = expand_normal (arg0); | |
1740 if (CONST_INT_P (op0) && UINTVAL (op0) <= 65536) | |
1741 emit_insn (GEN_FCN (CODE_FOR_tcancel) (op0)); | |
1742 else | |
1743 { | |
1744 error ("%Kargument must be a 16-bit constant immediate", exp); | |
1745 return const0_rtx; | |
1746 } | |
1747 } | |
1748 break; | |
1749 | |
1750 default : | |
1751 gcc_unreachable (); | |
1752 } | |
1753 return target; | |
1754 } | |
1755 | |
1756 /* Expand a random number builtin EXP with code FCODE, putting the result | |
1757 int TARGET. If IGNORE is true the return value is ignored. */ | |
1758 | |
1759 rtx | |
1760 aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore) | |
1761 { | |
1762 rtx pat; | |
1763 enum insn_code icode; | |
1764 if (fcode == AARCH64_BUILTIN_RNG_RNDR) | |
1765 icode = CODE_FOR_aarch64_rndr; | |
1766 else if (fcode == AARCH64_BUILTIN_RNG_RNDRRS) | |
1767 icode = CODE_FOR_aarch64_rndrrs; | |
1768 else | |
1769 gcc_unreachable (); | |
1770 | |
1771 rtx rand = gen_reg_rtx (DImode); | |
1772 pat = GEN_FCN (icode) (rand); | |
1773 if (!pat) | |
1774 return NULL_RTX; | |
1775 | |
1776 tree arg0 = CALL_EXPR_ARG (exp, 0); | |
1777 rtx res_addr = expand_normal (arg0); | |
1778 res_addr = convert_memory_address (Pmode, res_addr); | |
1779 rtx res_mem = gen_rtx_MEM (DImode, res_addr); | |
1780 emit_insn (pat); | |
1781 emit_move_insn (res_mem, rand); | |
1782 /* If the status result is unused don't generate the CSET code. */ | |
1783 if (ignore) | |
1784 return target; | |
1785 | |
1786 rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM); | |
1787 rtx cmp_rtx = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx); | |
1788 emit_insn (gen_aarch64_cstoresi (target, cmp_rtx, cc_reg)); | |
1789 return target; | |
1790 } | |
1791 | |
1792 /* Expand an expression EXP that calls a MEMTAG built-in FCODE | |
1793 with result going to TARGET. */ | |
1794 static rtx | |
1795 aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target) | |
1796 { | |
1797 if (TARGET_ILP32) | |
1798 { | |
1799 error ("Memory Tagging Extension does not support %<-mabi=ilp32%>"); | |
1800 return const0_rtx; | |
1801 } | |
1802 | |
1803 rtx pat = NULL; | |
1804 enum insn_code icode = aarch64_memtag_builtin_data[fcode - | |
1805 AARCH64_MEMTAG_BUILTIN_START - 1].icode; | |
1806 | |
1807 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0)); | |
1808 machine_mode mode0 = GET_MODE (op0); | |
1809 op0 = force_reg (mode0 == VOIDmode ? DImode : mode0, op0); | |
1810 op0 = convert_to_mode (DImode, op0, true); | |
1811 | |
1812 switch (fcode) | |
1813 { | |
1814 case AARCH64_MEMTAG_BUILTIN_IRG: | |
1815 case AARCH64_MEMTAG_BUILTIN_GMI: | |
1816 case AARCH64_MEMTAG_BUILTIN_SUBP: | |
1817 case AARCH64_MEMTAG_BUILTIN_INC_TAG: | |
1818 { | |
1819 if (! target | |
1820 || GET_MODE (target) != DImode | |
1821 || ! (*insn_data[icode].operand[0].predicate) (target, DImode)) | |
1822 target = gen_reg_rtx (DImode); | |
1823 | |
1824 if (fcode == AARCH64_MEMTAG_BUILTIN_INC_TAG) | |
1825 { | |
1826 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1)); | |
1827 | |
1828 if ((*insn_data[icode].operand[3].predicate) (op1, QImode)) | |
1829 { | |
1830 pat = GEN_FCN (icode) (target, op0, const0_rtx, op1); | |
1831 break; | |
1832 } | |
1833 error ("%Kargument %d must be a constant immediate " | |
1834 "in range [0,15]", exp, 2); | |
1835 return const0_rtx; | |
1836 } | |
1837 else | |
1838 { | |
1839 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1)); | |
1840 machine_mode mode1 = GET_MODE (op1); | |
1841 op1 = force_reg (mode1 == VOIDmode ? DImode : mode1, op1); | |
1842 op1 = convert_to_mode (DImode, op1, true); | |
1843 pat = GEN_FCN (icode) (target, op0, op1); | |
1844 } | |
1845 break; | |
1846 } | |
1847 case AARCH64_MEMTAG_BUILTIN_GET_TAG: | |
1848 target = op0; | |
1849 pat = GEN_FCN (icode) (target, op0, const0_rtx); | |
1850 break; | |
1851 case AARCH64_MEMTAG_BUILTIN_SET_TAG: | |
1852 pat = GEN_FCN (icode) (op0, op0, const0_rtx); | |
1853 break; | |
1854 default: | |
1855 gcc_unreachable(); | |
1856 } | |
1857 | |
1858 if (!pat) | |
1859 return NULL_RTX; | |
1860 | |
1861 emit_insn (pat); | |
1862 return target; | |
1863 } | |
1864 | |
1865 /* Expand an expression EXP that calls built-in function FCODE, | |
1866 with result going to TARGET if that's convenient. IGNORE is true | |
1867 if the result of the builtin is ignored. */ | |
1868 rtx | |
1869 aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, | |
1870 int ignore) | |
1871 { | |
1331 int icode; | 1872 int icode; |
1332 rtx pat, op0; | 1873 rtx pat, op0; |
1333 tree arg0; | 1874 tree arg0; |
1334 | 1875 |
1335 switch (fcode) | 1876 switch (fcode) |
1358 emit_insn (pat); | 1899 emit_insn (pat); |
1359 return target; | 1900 return target; |
1360 | 1901 |
1361 case AARCH64_PAUTH_BUILTIN_AUTIA1716: | 1902 case AARCH64_PAUTH_BUILTIN_AUTIA1716: |
1362 case AARCH64_PAUTH_BUILTIN_PACIA1716: | 1903 case AARCH64_PAUTH_BUILTIN_PACIA1716: |
1904 case AARCH64_PAUTH_BUILTIN_AUTIB1716: | |
1905 case AARCH64_PAUTH_BUILTIN_PACIB1716: | |
1363 case AARCH64_PAUTH_BUILTIN_XPACLRI: | 1906 case AARCH64_PAUTH_BUILTIN_XPACLRI: |
1364 arg0 = CALL_EXPR_ARG (exp, 0); | 1907 arg0 = CALL_EXPR_ARG (exp, 0); |
1365 op0 = force_reg (Pmode, expand_normal (arg0)); | 1908 op0 = force_reg (Pmode, expand_normal (arg0)); |
1366 | 1909 |
1367 if (!target) | 1910 if (!target) |
1381 } | 1924 } |
1382 else | 1925 else |
1383 { | 1926 { |
1384 tree arg1 = CALL_EXPR_ARG (exp, 1); | 1927 tree arg1 = CALL_EXPR_ARG (exp, 1); |
1385 rtx op1 = force_reg (Pmode, expand_normal (arg1)); | 1928 rtx op1 = force_reg (Pmode, expand_normal (arg1)); |
1386 icode = (fcode == AARCH64_PAUTH_BUILTIN_PACIA1716 | 1929 switch (fcode) |
1387 ? CODE_FOR_paci1716 : CODE_FOR_auti1716); | 1930 { |
1931 case AARCH64_PAUTH_BUILTIN_AUTIA1716: | |
1932 icode = CODE_FOR_autia1716; | |
1933 break; | |
1934 case AARCH64_PAUTH_BUILTIN_AUTIB1716: | |
1935 icode = CODE_FOR_autib1716; | |
1936 break; | |
1937 case AARCH64_PAUTH_BUILTIN_PACIA1716: | |
1938 icode = CODE_FOR_pacia1716; | |
1939 break; | |
1940 case AARCH64_PAUTH_BUILTIN_PACIB1716: | |
1941 icode = CODE_FOR_pacib1716; | |
1942 break; | |
1943 default: | |
1944 icode = 0; | |
1945 gcc_unreachable (); | |
1946 } | |
1388 | 1947 |
1389 rtx x16_reg = gen_rtx_REG (Pmode, R16_REGNUM); | 1948 rtx x16_reg = gen_rtx_REG (Pmode, R16_REGNUM); |
1390 rtx x17_reg = gen_rtx_REG (Pmode, R17_REGNUM); | 1949 rtx x17_reg = gen_rtx_REG (Pmode, R17_REGNUM); |
1391 emit_move_insn (x17_reg, op0); | 1950 emit_move_insn (x17_reg, op0); |
1392 emit_move_insn (x16_reg, op1); | 1951 emit_move_insn (x16_reg, op1); |
1393 emit_insn (GEN_FCN (icode) ()); | 1952 emit_insn (GEN_FCN (icode) ()); |
1394 emit_move_insn (target, x17_reg); | 1953 emit_move_insn (target, x17_reg); |
1395 } | 1954 } |
1396 | 1955 |
1397 return target; | 1956 return target; |
1957 | |
1958 case AARCH64_JSCVT: | |
1959 arg0 = CALL_EXPR_ARG (exp, 0); | |
1960 op0 = force_reg (DFmode, expand_normal (arg0)); | |
1961 if (!target) | |
1962 target = gen_reg_rtx (SImode); | |
1963 else | |
1964 target = force_reg (SImode, target); | |
1965 emit_insn (GEN_FCN (CODE_FOR_aarch64_fjcvtzs) (target, op0)); | |
1966 return target; | |
1967 | |
1968 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF: | |
1969 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF: | |
1970 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF: | |
1971 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V2SF: | |
1972 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF: | |
1973 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF: | |
1974 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF: | |
1975 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF: | |
1976 return aarch64_expand_fcmla_builtin (exp, target, fcode); | |
1977 case AARCH64_BUILTIN_RNG_RNDR: | |
1978 case AARCH64_BUILTIN_RNG_RNDRRS: | |
1979 return aarch64_expand_rng_builtin (exp, target, fcode, ignore); | |
1398 } | 1980 } |
1399 | 1981 |
1400 if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX) | 1982 if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX) |
1401 return aarch64_simd_expand_builtin (fcode, exp, target); | 1983 return aarch64_simd_expand_builtin (fcode, exp, target); |
1402 else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX) | 1984 else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX) |
1407 || fcode == AARCH64_BUILTIN_RSQRT_V2DF | 1989 || fcode == AARCH64_BUILTIN_RSQRT_V2DF |
1408 || fcode == AARCH64_BUILTIN_RSQRT_V2SF | 1990 || fcode == AARCH64_BUILTIN_RSQRT_V2SF |
1409 || fcode == AARCH64_BUILTIN_RSQRT_V4SF) | 1991 || fcode == AARCH64_BUILTIN_RSQRT_V4SF) |
1410 return aarch64_expand_builtin_rsqrt (fcode, exp, target); | 1992 return aarch64_expand_builtin_rsqrt (fcode, exp, target); |
1411 | 1993 |
1994 if (fcode == AARCH64_TME_BUILTIN_TSTART | |
1995 || fcode == AARCH64_TME_BUILTIN_TCOMMIT | |
1996 || fcode == AARCH64_TME_BUILTIN_TTEST | |
1997 || fcode == AARCH64_TME_BUILTIN_TCANCEL) | |
1998 return aarch64_expand_builtin_tme (fcode, exp, target); | |
1999 | |
2000 if (fcode >= AARCH64_MEMTAG_BUILTIN_START | |
2001 && fcode <= AARCH64_MEMTAG_BUILTIN_END) | |
2002 return aarch64_expand_builtin_memtag (fcode, exp, target); | |
2003 | |
1412 gcc_unreachable (); | 2004 gcc_unreachable (); |
1413 } | 2005 } |
1414 | 2006 |
1415 tree | 2007 tree |
1416 aarch64_builtin_vectorized_function (unsigned int fn, tree type_out, | 2008 aarch64_builtin_vectorized_function (unsigned int fn, tree type_out, |
1417 tree type_in) | 2009 tree type_in) |
1418 { | 2010 { |
1419 machine_mode in_mode, out_mode; | 2011 machine_mode in_mode, out_mode; |
1420 unsigned HOST_WIDE_INT in_n, out_n; | |
1421 | 2012 |
1422 if (TREE_CODE (type_out) != VECTOR_TYPE | 2013 if (TREE_CODE (type_out) != VECTOR_TYPE |
1423 || TREE_CODE (type_in) != VECTOR_TYPE) | 2014 || TREE_CODE (type_in) != VECTOR_TYPE) |
1424 return NULL_TREE; | 2015 return NULL_TREE; |
1425 | 2016 |
1426 out_mode = TYPE_MODE (TREE_TYPE (type_out)); | 2017 out_mode = TYPE_MODE (type_out); |
1427 in_mode = TYPE_MODE (TREE_TYPE (type_in)); | 2018 in_mode = TYPE_MODE (type_in); |
1428 if (!TYPE_VECTOR_SUBPARTS (type_out).is_constant (&out_n) | |
1429 || !TYPE_VECTOR_SUBPARTS (type_in).is_constant (&in_n)) | |
1430 return NULL_TREE; | |
1431 | 2019 |
1432 #undef AARCH64_CHECK_BUILTIN_MODE | 2020 #undef AARCH64_CHECK_BUILTIN_MODE |
1433 #define AARCH64_CHECK_BUILTIN_MODE(C, N) 1 | 2021 #define AARCH64_CHECK_BUILTIN_MODE(C, N) 1 |
1434 #define AARCH64_FIND_FRINT_VARIANT(N) \ | 2022 #define AARCH64_FIND_FRINT_VARIANT(N) \ |
1435 (AARCH64_CHECK_BUILTIN_MODE (2, D) \ | 2023 (AARCH64_CHECK_BUILTIN_MODE (2, D) \ |
1441 : NULL_TREE))) | 2029 : NULL_TREE))) |
1442 switch (fn) | 2030 switch (fn) |
1443 { | 2031 { |
1444 #undef AARCH64_CHECK_BUILTIN_MODE | 2032 #undef AARCH64_CHECK_BUILTIN_MODE |
1445 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ | 2033 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ |
1446 (out_mode == N##Fmode && out_n == C \ | 2034 (out_mode == V##C##N##Fmode && in_mode == V##C##N##Fmode) |
1447 && in_mode == N##Fmode && in_n == C) | |
1448 CASE_CFN_FLOOR: | 2035 CASE_CFN_FLOOR: |
1449 return AARCH64_FIND_FRINT_VARIANT (floor); | 2036 return AARCH64_FIND_FRINT_VARIANT (floor); |
1450 CASE_CFN_CEIL: | 2037 CASE_CFN_CEIL: |
1451 return AARCH64_FIND_FRINT_VARIANT (ceil); | 2038 return AARCH64_FIND_FRINT_VARIANT (ceil); |
1452 CASE_CFN_TRUNC: | 2039 CASE_CFN_TRUNC: |
1457 return AARCH64_FIND_FRINT_VARIANT (nearbyint); | 2044 return AARCH64_FIND_FRINT_VARIANT (nearbyint); |
1458 CASE_CFN_SQRT: | 2045 CASE_CFN_SQRT: |
1459 return AARCH64_FIND_FRINT_VARIANT (sqrt); | 2046 return AARCH64_FIND_FRINT_VARIANT (sqrt); |
1460 #undef AARCH64_CHECK_BUILTIN_MODE | 2047 #undef AARCH64_CHECK_BUILTIN_MODE |
1461 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ | 2048 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ |
1462 (out_mode == SImode && out_n == C \ | 2049 (out_mode == V##C##SImode && in_mode == V##C##N##Imode) |
1463 && in_mode == N##Imode && in_n == C) | |
1464 CASE_CFN_CLZ: | 2050 CASE_CFN_CLZ: |
1465 { | 2051 { |
1466 if (AARCH64_CHECK_BUILTIN_MODE (4, S)) | 2052 if (AARCH64_CHECK_BUILTIN_MODE (4, S)) |
1467 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si]; | 2053 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si]; |
1468 return NULL_TREE; | 2054 return NULL_TREE; |
1475 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv4si]; | 2061 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv4si]; |
1476 return NULL_TREE; | 2062 return NULL_TREE; |
1477 } | 2063 } |
1478 #undef AARCH64_CHECK_BUILTIN_MODE | 2064 #undef AARCH64_CHECK_BUILTIN_MODE |
1479 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ | 2065 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ |
1480 (out_mode == N##Imode && out_n == C \ | 2066 (out_mode == V##C##N##Imode && in_mode == V##C##N##Fmode) |
1481 && in_mode == N##Fmode && in_n == C) | |
1482 CASE_CFN_IFLOOR: | 2067 CASE_CFN_IFLOOR: |
1483 CASE_CFN_LFLOOR: | 2068 CASE_CFN_LFLOOR: |
1484 CASE_CFN_LLFLOOR: | 2069 CASE_CFN_LLFLOOR: |
1485 { | 2070 { |
1486 enum aarch64_builtins builtin; | 2071 enum aarch64_builtins builtin; |
1525 else | 2110 else |
1526 return NULL_TREE; | 2111 return NULL_TREE; |
1527 | 2112 |
1528 return aarch64_builtin_decls[builtin]; | 2113 return aarch64_builtin_decls[builtin]; |
1529 } | 2114 } |
1530 case CFN_BUILT_IN_BSWAP16: | |
1531 #undef AARCH64_CHECK_BUILTIN_MODE | |
1532 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ | |
1533 (out_mode == N##Imode && out_n == C \ | |
1534 && in_mode == N##Imode && in_n == C) | |
1535 if (AARCH64_CHECK_BUILTIN_MODE (4, H)) | |
1536 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi]; | |
1537 else if (AARCH64_CHECK_BUILTIN_MODE (8, H)) | |
1538 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi]; | |
1539 else | |
1540 return NULL_TREE; | |
1541 case CFN_BUILT_IN_BSWAP32: | |
1542 if (AARCH64_CHECK_BUILTIN_MODE (2, S)) | |
1543 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si]; | |
1544 else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) | |
1545 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si]; | |
1546 else | |
1547 return NULL_TREE; | |
1548 case CFN_BUILT_IN_BSWAP64: | |
1549 if (AARCH64_CHECK_BUILTIN_MODE (2, D)) | |
1550 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di]; | |
1551 else | |
1552 return NULL_TREE; | |
1553 default: | 2115 default: |
1554 return NULL_TREE; | 2116 return NULL_TREE; |
1555 } | 2117 } |
1556 | 2118 |
1557 return NULL_TREE; | 2119 return NULL_TREE; |
1558 } | 2120 } |
1559 | 2121 |
1560 /* Return builtin for reciprocal square root. */ | 2122 /* Return builtin for reciprocal square root. */ |
1561 | 2123 |
1562 tree | 2124 tree |
1563 aarch64_builtin_rsqrt (unsigned int fn) | 2125 aarch64_general_builtin_rsqrt (unsigned int fn) |
1564 { | 2126 { |
1565 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df) | 2127 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df) |
1566 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF]; | 2128 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF]; |
1567 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2sf) | 2129 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2sf) |
1568 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2SF]; | 2130 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2SF]; |
1573 | 2135 |
1574 #undef VAR1 | 2136 #undef VAR1 |
1575 #define VAR1(T, N, MAP, A) \ | 2137 #define VAR1(T, N, MAP, A) \ |
1576 case AARCH64_SIMD_BUILTIN_##T##_##N##A: | 2138 case AARCH64_SIMD_BUILTIN_##T##_##N##A: |
1577 | 2139 |
2140 /* Try to fold a call to the built-in function with subcode FCODE. The | |
2141 function is passed the N_ARGS arguments in ARGS and it returns a value | |
2142 of type TYPE. Return the new expression on success and NULL_TREE on | |
2143 failure. */ | |
1578 tree | 2144 tree |
1579 aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args, | 2145 aarch64_general_fold_builtin (unsigned int fcode, tree type, |
1580 bool ignore ATTRIBUTE_UNUSED) | 2146 unsigned int n_args ATTRIBUTE_UNUSED, tree *args) |
1581 { | 2147 { |
1582 int fcode = DECL_FUNCTION_CODE (fndecl); | |
1583 tree type = TREE_TYPE (TREE_TYPE (fndecl)); | |
1584 | |
1585 switch (fcode) | 2148 switch (fcode) |
1586 { | 2149 { |
1587 BUILTIN_VDQF (UNOP, abs, 2) | 2150 BUILTIN_VDQF (UNOP, abs, 2) |
1588 return fold_build1 (ABS_EXPR, type, args[0]); | 2151 return fold_build1 (ABS_EXPR, type, args[0]); |
1589 VAR1 (UNOP, floatv2si, 2, v2sf) | 2152 VAR1 (UNOP, floatv2si, 2, v2sf) |
1595 } | 2158 } |
1596 | 2159 |
1597 return NULL_TREE; | 2160 return NULL_TREE; |
1598 } | 2161 } |
1599 | 2162 |
1600 bool | 2163 /* Try to fold STMT, given that it's a call to the built-in function with |
1601 aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi) | 2164 subcode FCODE. Return the new statement on success and null on |
1602 { | 2165 failure. */ |
1603 bool changed = false; | 2166 gimple * |
1604 gimple *stmt = gsi_stmt (*gsi); | 2167 aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt) |
1605 tree call = gimple_call_fn (stmt); | 2168 { |
1606 tree fndecl; | |
1607 gimple *new_stmt = NULL; | 2169 gimple *new_stmt = NULL; |
1608 | 2170 unsigned nargs = gimple_call_num_args (stmt); |
1609 if (call) | 2171 tree *args = (nargs > 0 |
2172 ? gimple_call_arg_ptr (stmt, 0) | |
2173 : &error_mark_node); | |
2174 | |
2175 /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int | |
2176 and unsigned int; it will distinguish according to the types of | |
2177 the arguments to the __builtin. */ | |
2178 switch (fcode) | |
1610 { | 2179 { |
1611 fndecl = gimple_call_fndecl (stmt); | 2180 BUILTIN_VALL (UNOP, reduc_plus_scal_, 10) |
1612 if (fndecl) | 2181 new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS, |
2182 1, args[0]); | |
2183 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); | |
2184 break; | |
2185 BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10) | |
2186 BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10) | |
2187 new_stmt = gimple_build_call_internal (IFN_REDUC_MAX, | |
2188 1, args[0]); | |
2189 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); | |
2190 break; | |
2191 BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10) | |
2192 BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10) | |
2193 new_stmt = gimple_build_call_internal (IFN_REDUC_MIN, | |
2194 1, args[0]); | |
2195 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); | |
2196 break; | |
2197 BUILTIN_GPF (BINOP, fmulx, 0) | |
1613 { | 2198 { |
1614 int fcode = DECL_FUNCTION_CODE (fndecl); | 2199 gcc_assert (nargs == 2); |
1615 unsigned nargs = gimple_call_num_args (stmt); | 2200 bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST; |
1616 tree *args = (nargs > 0 | 2201 bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST; |
1617 ? gimple_call_arg_ptr (stmt, 0) | 2202 if (a0_cst_p || a1_cst_p) |
1618 : &error_mark_node); | |
1619 | |
1620 /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int | |
1621 and unsigned int; it will distinguish according to the types of | |
1622 the arguments to the __builtin. */ | |
1623 switch (fcode) | |
1624 { | 2203 { |
1625 BUILTIN_VALL (UNOP, reduc_plus_scal_, 10) | 2204 if (a0_cst_p && a1_cst_p) |
1626 new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS, | |
1627 1, args[0]); | |
1628 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); | |
1629 break; | |
1630 BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10) | |
1631 BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10) | |
1632 new_stmt = gimple_build_call_internal (IFN_REDUC_MAX, | |
1633 1, args[0]); | |
1634 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); | |
1635 break; | |
1636 BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10) | |
1637 BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10) | |
1638 new_stmt = gimple_build_call_internal (IFN_REDUC_MIN, | |
1639 1, args[0]); | |
1640 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); | |
1641 break; | |
1642 BUILTIN_GPF (BINOP, fmulx, 0) | |
1643 { | 2205 { |
1644 gcc_assert (nargs == 2); | 2206 tree t0 = TREE_TYPE (args[0]); |
1645 bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST; | 2207 real_value a0 = (TREE_REAL_CST (args[0])); |
1646 bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST; | 2208 real_value a1 = (TREE_REAL_CST (args[1])); |
1647 if (a0_cst_p || a1_cst_p) | 2209 if (real_equal (&a1, &dconst0)) |
2210 std::swap (a0, a1); | |
2211 /* According to real_equal (), +0 equals -0. */ | |
2212 if (real_equal (&a0, &dconst0) && real_isinf (&a1)) | |
1648 { | 2213 { |
1649 if (a0_cst_p && a1_cst_p) | 2214 real_value res = dconst2; |
1650 { | 2215 res.sign = a0.sign ^ a1.sign; |
1651 tree t0 = TREE_TYPE (args[0]); | 2216 new_stmt = gimple_build_assign (gimple_call_lhs (stmt), |
1652 real_value a0 = (TREE_REAL_CST (args[0])); | 2217 REAL_CST, |
1653 real_value a1 = (TREE_REAL_CST (args[1])); | 2218 build_real (t0, res)); |
1654 if (real_equal (&a1, &dconst0)) | |
1655 std::swap (a0, a1); | |
1656 /* According to real_equal (), +0 equals -0. */ | |
1657 if (real_equal (&a0, &dconst0) && real_isinf (&a1)) | |
1658 { | |
1659 real_value res = dconst2; | |
1660 res.sign = a0.sign ^ a1.sign; | |
1661 new_stmt = | |
1662 gimple_build_assign (gimple_call_lhs (stmt), | |
1663 REAL_CST, | |
1664 build_real (t0, res)); | |
1665 } | |
1666 else | |
1667 new_stmt = | |
1668 gimple_build_assign (gimple_call_lhs (stmt), | |
1669 MULT_EXPR, | |
1670 args[0], args[1]); | |
1671 } | |
1672 else /* a0_cst_p ^ a1_cst_p. */ | |
1673 { | |
1674 real_value const_part = a0_cst_p | |
1675 ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]); | |
1676 if (!real_equal (&const_part, &dconst0) | |
1677 && !real_isinf (&const_part)) | |
1678 new_stmt = | |
1679 gimple_build_assign (gimple_call_lhs (stmt), | |
1680 MULT_EXPR, args[0], args[1]); | |
1681 } | |
1682 } | 2219 } |
1683 if (new_stmt) | 2220 else |
1684 { | 2221 new_stmt = gimple_build_assign (gimple_call_lhs (stmt), |
1685 gimple_set_vuse (new_stmt, gimple_vuse (stmt)); | 2222 MULT_EXPR, |
1686 gimple_set_vdef (new_stmt, gimple_vdef (stmt)); | 2223 args[0], args[1]); |
1687 } | |
1688 break; | |
1689 } | 2224 } |
1690 default: | 2225 else /* a0_cst_p ^ a1_cst_p. */ |
1691 break; | 2226 { |
2227 real_value const_part = a0_cst_p | |
2228 ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]); | |
2229 if (!real_equal (&const_part, &dconst0) | |
2230 && !real_isinf (&const_part)) | |
2231 new_stmt = gimple_build_assign (gimple_call_lhs (stmt), | |
2232 MULT_EXPR, args[0], | |
2233 args[1]); | |
2234 } | |
1692 } | 2235 } |
2236 if (new_stmt) | |
2237 { | |
2238 gimple_set_vuse (new_stmt, gimple_vuse (stmt)); | |
2239 gimple_set_vdef (new_stmt, gimple_vdef (stmt)); | |
2240 } | |
2241 break; | |
1693 } | 2242 } |
2243 default: | |
2244 break; | |
1694 } | 2245 } |
1695 | 2246 return new_stmt; |
1696 if (new_stmt) | |
1697 { | |
1698 gsi_replace (gsi, new_stmt, true); | |
1699 changed = true; | |
1700 } | |
1701 | |
1702 return changed; | |
1703 } | 2247 } |
1704 | 2248 |
1705 void | 2249 void |
1706 aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) | 2250 aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) |
1707 { | 2251 { |
1799 *update = build2 (COMPOUND_EXPR, void_type_node, | 2343 *update = build2 (COMPOUND_EXPR, void_type_node, |
1800 build2 (COMPOUND_EXPR, void_type_node, | 2344 build2 (COMPOUND_EXPR, void_type_node, |
1801 reload_fenv, restore_fnenv), update_call); | 2345 reload_fenv, restore_fnenv), update_call); |
1802 } | 2346 } |
1803 | 2347 |
2348 /* Resolve overloaded MEMTAG build-in functions. */ | |
2349 #define AARCH64_BUILTIN_SUBCODE(F) \ | |
2350 (DECL_MD_FUNCTION_CODE (F) >> AARCH64_BUILTIN_SHIFT) | |
2351 | |
2352 static tree | |
2353 aarch64_resolve_overloaded_memtag (location_t loc, | |
2354 tree fndecl, void *pass_params) | |
2355 { | |
2356 vec<tree, va_gc> *params = static_cast<vec<tree, va_gc> *> (pass_params); | |
2357 unsigned param_num = params ? params->length() : 0; | |
2358 unsigned int fcode = AARCH64_BUILTIN_SUBCODE (fndecl); | |
2359 tree inittype = aarch64_memtag_builtin_data[ | |
2360 fcode - AARCH64_MEMTAG_BUILTIN_START - 1].ftype; | |
2361 unsigned arg_num = list_length (TYPE_ARG_TYPES (inittype)) - 1; | |
2362 | |
2363 if (param_num != arg_num) | |
2364 { | |
2365 TREE_TYPE (fndecl) = inittype; | |
2366 return NULL_TREE; | |
2367 } | |
2368 tree retype = NULL; | |
2369 | |
2370 if (fcode == AARCH64_MEMTAG_BUILTIN_SUBP) | |
2371 { | |
2372 tree t0 = TREE_TYPE ((*params)[0]); | |
2373 tree t1 = TREE_TYPE ((*params)[1]); | |
2374 | |
2375 if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE) | |
2376 t0 = ptr_type_node; | |
2377 if (t1 == error_mark_node || TREE_CODE (t1) != POINTER_TYPE) | |
2378 t1 = ptr_type_node; | |
2379 | |
2380 if (TYPE_MODE (t0) != DImode) | |
2381 warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit", | |
2382 (int)tree_to_shwi (DECL_SIZE ((*params)[0]))); | |
2383 | |
2384 if (TYPE_MODE (t1) != DImode) | |
2385 warning_at (loc, 1, "expected 64-bit address but argument 2 is %d-bit", | |
2386 (int)tree_to_shwi (DECL_SIZE ((*params)[1]))); | |
2387 | |
2388 retype = build_function_type_list (ptrdiff_type_node, t0, t1, NULL); | |
2389 } | |
2390 else | |
2391 { | |
2392 tree t0 = TREE_TYPE ((*params)[0]); | |
2393 | |
2394 if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE) | |
2395 { | |
2396 TREE_TYPE (fndecl) = inittype; | |
2397 return NULL_TREE; | |
2398 } | |
2399 | |
2400 if (TYPE_MODE (t0) != DImode) | |
2401 warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit", | |
2402 (int)tree_to_shwi (DECL_SIZE ((*params)[0]))); | |
2403 | |
2404 switch (fcode) | |
2405 { | |
2406 case AARCH64_MEMTAG_BUILTIN_IRG: | |
2407 retype = build_function_type_list (t0, t0, uint64_type_node, NULL); | |
2408 break; | |
2409 case AARCH64_MEMTAG_BUILTIN_GMI: | |
2410 retype = build_function_type_list (uint64_type_node, t0, | |
2411 uint64_type_node, NULL); | |
2412 break; | |
2413 case AARCH64_MEMTAG_BUILTIN_INC_TAG: | |
2414 retype = build_function_type_list (t0, t0, unsigned_type_node, NULL); | |
2415 break; | |
2416 case AARCH64_MEMTAG_BUILTIN_SET_TAG: | |
2417 retype = build_function_type_list (void_type_node, t0, NULL); | |
2418 break; | |
2419 case AARCH64_MEMTAG_BUILTIN_GET_TAG: | |
2420 retype = build_function_type_list (t0, t0, NULL); | |
2421 break; | |
2422 default: | |
2423 return NULL_TREE; | |
2424 } | |
2425 } | |
2426 | |
2427 if (!retype || retype == error_mark_node) | |
2428 TREE_TYPE (fndecl) = inittype; | |
2429 else | |
2430 TREE_TYPE (fndecl) = retype; | |
2431 | |
2432 return NULL_TREE; | |
2433 } | |
2434 | |
2435 /* Called at aarch64_resolve_overloaded_builtin in aarch64-c.c. */ | |
2436 tree | |
2437 aarch64_resolve_overloaded_builtin_general (location_t loc, tree function, | |
2438 void *pass_params) | |
2439 { | |
2440 unsigned int fcode = AARCH64_BUILTIN_SUBCODE (function); | |
2441 | |
2442 if (fcode >= AARCH64_MEMTAG_BUILTIN_START | |
2443 && fcode <= AARCH64_MEMTAG_BUILTIN_END) | |
2444 return aarch64_resolve_overloaded_memtag(loc, function, pass_params); | |
2445 | |
2446 return NULL_TREE; | |
2447 } | |
1804 | 2448 |
1805 #undef AARCH64_CHECK_BUILTIN_MODE | 2449 #undef AARCH64_CHECK_BUILTIN_MODE |
1806 #undef AARCH64_FIND_FRINT_VARIANT | 2450 #undef AARCH64_FIND_FRINT_VARIANT |
1807 #undef CF0 | 2451 #undef CF0 |
1808 #undef CF1 | 2452 #undef CF1 |