comparison gcc/config/aarch64/aarch64-builtins.c @ 145:1830386684a0

gcc-9.2.0
author anatofuz
date Thu, 13 Feb 2020 11:34:05 +0900
parents 84e7813d76e9
children
comparison
equal deleted inserted replaced
131:84e7813d76e9 145:1830386684a0
1 /* Builtins' description for AArch64 SIMD architecture. 1 /* Builtins' description for AArch64 SIMD architecture.
2 Copyright (C) 2011-2018 Free Software Foundation, Inc. 2 Copyright (C) 2011-2020 Free Software Foundation, Inc.
3 Contributed by ARM Ltd. 3 Contributed by ARM Ltd.
4 4
5 This file is part of GCC. 5 This file is part of GCC.
6 6
7 GCC is free software; you can redistribute it and/or modify it 7 GCC is free software; you can redistribute it and/or modify it
40 #include "explow.h" 40 #include "explow.h"
41 #include "expr.h" 41 #include "expr.h"
42 #include "langhooks.h" 42 #include "langhooks.h"
43 #include "gimple-iterator.h" 43 #include "gimple-iterator.h"
44 #include "case-cfn-macros.h" 44 #include "case-cfn-macros.h"
45 #include "emit-rtl.h"
45 46
46 #define v8qi_UP E_V8QImode 47 #define v8qi_UP E_V8QImode
47 #define v4hi_UP E_V4HImode 48 #define v4hi_UP E_V4HImode
48 #define v4hf_UP E_V4HFmode 49 #define v4hf_UP E_V4HFmode
49 #define v2si_UP E_V2SImode 50 #define v2si_UP E_V2SImode
65 #define si_UP E_SImode 66 #define si_UP E_SImode
66 #define sf_UP E_SFmode 67 #define sf_UP E_SFmode
67 #define hi_UP E_HImode 68 #define hi_UP E_HImode
68 #define hf_UP E_HFmode 69 #define hf_UP E_HFmode
69 #define qi_UP E_QImode 70 #define qi_UP E_QImode
71 #define bf_UP E_BFmode
72 #define v4bf_UP E_V4BFmode
73 #define v8bf_UP E_V8BFmode
70 #define UP(X) X##_UP 74 #define UP(X) X##_UP
71 75
72 #define SIMD_MAX_BUILTIN_ARGS 5 76 #define SIMD_MAX_BUILTIN_ARGS 5
73 77
74 enum aarch64_type_qualifiers 78 enum aarch64_type_qualifiers
100 /* Polynomial types. */ 104 /* Polynomial types. */
101 qualifier_poly = 0x100, 105 qualifier_poly = 0x100,
102 /* Lane indices - must be in range, and flipped for bigendian. */ 106 /* Lane indices - must be in range, and flipped for bigendian. */
103 qualifier_lane_index = 0x200, 107 qualifier_lane_index = 0x200,
104 /* Lane indices for single lane structure loads and stores. */ 108 /* Lane indices for single lane structure loads and stores. */
105 qualifier_struct_load_store_lane_index = 0x400 109 qualifier_struct_load_store_lane_index = 0x400,
110 /* Lane indices selected in pairs. - must be in range, and flipped for
111 bigendian. */
112 qualifier_lane_pair_index = 0x800,
113 /* Lane indices selected in quadtuplets. - must be in range, and flipped for
114 bigendian. */
115 qualifier_lane_quadtup_index = 0x1000,
106 }; 116 };
107 117
108 typedef struct 118 typedef struct
109 { 119 {
110 const char *name; 120 const char *name;
167 static enum aarch64_type_qualifiers 177 static enum aarch64_type_qualifiers
168 aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] 178 aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
169 = { qualifier_unsigned, qualifier_unsigned, 179 = { qualifier_unsigned, qualifier_unsigned,
170 qualifier_unsigned, qualifier_immediate }; 180 qualifier_unsigned, qualifier_immediate };
171 #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers) 181 #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers)
172 182 static enum aarch64_type_qualifiers
173 183 aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
184 = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none };
185 #define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers)
186
187
188 static enum aarch64_type_qualifiers
189 aarch64_types_quadop_lane_pair_qualifiers[SIMD_MAX_BUILTIN_ARGS]
190 = { qualifier_none, qualifier_none, qualifier_none,
191 qualifier_none, qualifier_lane_pair_index };
192 #define TYPES_QUADOP_LANE_PAIR (aarch64_types_quadop_lane_pair_qualifiers)
174 static enum aarch64_type_qualifiers 193 static enum aarch64_type_qualifiers
175 aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] 194 aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
176 = { qualifier_none, qualifier_none, qualifier_none, 195 = { qualifier_none, qualifier_none, qualifier_none,
177 qualifier_none, qualifier_lane_index }; 196 qualifier_none, qualifier_lane_index };
178 #define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers) 197 #define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers)
179 static enum aarch64_type_qualifiers 198 static enum aarch64_type_qualifiers
180 aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] 199 aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
181 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, 200 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
182 qualifier_unsigned, qualifier_lane_index }; 201 qualifier_unsigned, qualifier_lane_index };
183 #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers) 202 #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers)
203
204 static enum aarch64_type_qualifiers
205 aarch64_types_quadopssus_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
206 = { qualifier_none, qualifier_none, qualifier_unsigned,
207 qualifier_none, qualifier_lane_quadtup_index };
208 #define TYPES_QUADOPSSUS_LANE_QUADTUP \
209 (aarch64_types_quadopssus_lane_quadtup_qualifiers)
210 static enum aarch64_type_qualifiers
211 aarch64_types_quadopsssu_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
212 = { qualifier_none, qualifier_none, qualifier_none,
213 qualifier_unsigned, qualifier_lane_quadtup_index };
214 #define TYPES_QUADOPSSSU_LANE_QUADTUP \
215 (aarch64_types_quadopsssu_lane_quadtup_qualifiers)
184 216
185 static enum aarch64_type_qualifiers 217 static enum aarch64_type_qualifiers
186 aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] 218 aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
187 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, 219 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
188 qualifier_unsigned, qualifier_immediate }; 220 qualifier_unsigned, qualifier_immediate };
354 CRC32_BUILTIN (crc32cb, QI) \ 386 CRC32_BUILTIN (crc32cb, QI) \
355 CRC32_BUILTIN (crc32ch, HI) \ 387 CRC32_BUILTIN (crc32ch, HI) \
356 CRC32_BUILTIN (crc32cw, SI) \ 388 CRC32_BUILTIN (crc32cw, SI) \
357 CRC32_BUILTIN (crc32cx, DI) 389 CRC32_BUILTIN (crc32cx, DI)
358 390
391 /* The next 8 FCMLA instrinsics require some special handling compared the
392 normal simd intrinsics. */
393 #define AARCH64_SIMD_FCMLA_LANEQ_BUILTINS \
394 FCMLA_LANEQ_BUILTIN (0, v2sf, fcmla, V2SF, false) \
395 FCMLA_LANEQ_BUILTIN (90, v2sf, fcmla, V2SF, false) \
396 FCMLA_LANEQ_BUILTIN (180, v2sf, fcmla, V2SF, false) \
397 FCMLA_LANEQ_BUILTIN (270, v2sf, fcmla, V2SF, false) \
398 FCMLA_LANEQ_BUILTIN (0, v4hf, fcmla_laneq, V4HF, true) \
399 FCMLA_LANEQ_BUILTIN (90, v4hf, fcmla_laneq, V4HF, true) \
400 FCMLA_LANEQ_BUILTIN (180, v4hf, fcmla_laneq, V4HF, true) \
401 FCMLA_LANEQ_BUILTIN (270, v4hf, fcmla_laneq, V4HF, true) \
402
359 typedef struct 403 typedef struct
360 { 404 {
361 const char *name; 405 const char *name;
362 machine_mode mode; 406 machine_mode mode;
363 const enum insn_code icode; 407 const enum insn_code icode;
364 unsigned int fcode; 408 unsigned int fcode;
365 } aarch64_crc_builtin_datum; 409 } aarch64_crc_builtin_datum;
366 410
411 /* Hold information about how to expand the FCMLA_LANEQ builtins. */
412 typedef struct
413 {
414 const char *name;
415 machine_mode mode;
416 const enum insn_code icode;
417 unsigned int fcode;
418 bool lane;
419 } aarch64_fcmla_laneq_builtin_datum;
420
367 #define CRC32_BUILTIN(N, M) \ 421 #define CRC32_BUILTIN(N, M) \
368 AARCH64_BUILTIN_##N, 422 AARCH64_BUILTIN_##N,
423
424 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
425 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M,
369 426
370 #undef VAR1 427 #undef VAR1
371 #define VAR1(T, N, MAP, A) \ 428 #define VAR1(T, N, MAP, A) \
372 AARCH64_SIMD_BUILTIN_##T##_##N##A, 429 AARCH64_SIMD_BUILTIN_##T##_##N##A,
373 430
396 AARCH64_CRC32_BUILTINS 453 AARCH64_CRC32_BUILTINS
397 AARCH64_CRC32_BUILTIN_MAX, 454 AARCH64_CRC32_BUILTIN_MAX,
398 /* ARMv8.3-A Pointer Authentication Builtins. */ 455 /* ARMv8.3-A Pointer Authentication Builtins. */
399 AARCH64_PAUTH_BUILTIN_AUTIA1716, 456 AARCH64_PAUTH_BUILTIN_AUTIA1716,
400 AARCH64_PAUTH_BUILTIN_PACIA1716, 457 AARCH64_PAUTH_BUILTIN_PACIA1716,
458 AARCH64_PAUTH_BUILTIN_AUTIB1716,
459 AARCH64_PAUTH_BUILTIN_PACIB1716,
401 AARCH64_PAUTH_BUILTIN_XPACLRI, 460 AARCH64_PAUTH_BUILTIN_XPACLRI,
461 /* Special cased Armv8.3-A Complex FMA by Lane quad Builtins. */
462 AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE,
463 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
464 /* Builtin for Arm8.3-a Javascript conversion instruction. */
465 AARCH64_JSCVT,
466 /* TME builtins. */
467 AARCH64_TME_BUILTIN_TSTART,
468 AARCH64_TME_BUILTIN_TCOMMIT,
469 AARCH64_TME_BUILTIN_TTEST,
470 AARCH64_TME_BUILTIN_TCANCEL,
471 /* Armv8.5-a RNG instruction builtins. */
472 AARCH64_BUILTIN_RNG_RNDR,
473 AARCH64_BUILTIN_RNG_RNDRRS,
474 /* MEMTAG builtins. */
475 AARCH64_MEMTAG_BUILTIN_START,
476 AARCH64_MEMTAG_BUILTIN_IRG,
477 AARCH64_MEMTAG_BUILTIN_GMI,
478 AARCH64_MEMTAG_BUILTIN_SUBP,
479 AARCH64_MEMTAG_BUILTIN_INC_TAG,
480 AARCH64_MEMTAG_BUILTIN_SET_TAG,
481 AARCH64_MEMTAG_BUILTIN_GET_TAG,
482 AARCH64_MEMTAG_BUILTIN_END,
402 AARCH64_BUILTIN_MAX 483 AARCH64_BUILTIN_MAX
403 }; 484 };
404 485
405 #undef CRC32_BUILTIN 486 #undef CRC32_BUILTIN
406 #define CRC32_BUILTIN(N, M) \ 487 #define CRC32_BUILTIN(N, M) \
407 {"__builtin_aarch64_"#N, E_##M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N}, 488 {"__builtin_aarch64_"#N, E_##M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N},
408 489
409 static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = { 490 static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = {
410 AARCH64_CRC32_BUILTINS 491 AARCH64_CRC32_BUILTINS
492 };
493
494
495 #undef FCMLA_LANEQ_BUILTIN
496 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
497 {"__builtin_aarch64_fcmla_laneq"#I#N, E_##M##mode, CODE_FOR_aarch64_##X##I##N, \
498 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M, T},
499
500 /* This structure contains how to manage the mapping form the builtin to the
501 instruction to generate in the backend and how to invoke the instruction. */
502 static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = {
503 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
411 }; 504 };
412 505
413 #undef CRC32_BUILTIN 506 #undef CRC32_BUILTIN
414 507
415 static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX]; 508 static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
496 /* The user-visible __fp16 type, and a pointer to that type. Used 589 /* The user-visible __fp16 type, and a pointer to that type. Used
497 across the back-end. */ 590 across the back-end. */
498 tree aarch64_fp16_type_node = NULL_TREE; 591 tree aarch64_fp16_type_node = NULL_TREE;
499 tree aarch64_fp16_ptr_type_node = NULL_TREE; 592 tree aarch64_fp16_ptr_type_node = NULL_TREE;
500 593
594 /* Back-end node type for brain float (bfloat) types. */
595 tree aarch64_bf16_type_node = NULL_TREE;
596 tree aarch64_bf16_ptr_type_node = NULL_TREE;
597
598 /* Wrapper around add_builtin_function. NAME is the name of the built-in
599 function, TYPE is the function type, and CODE is the function subcode
600 (relative to AARCH64_BUILTIN_GENERAL). */
601 static tree
602 aarch64_general_add_builtin (const char *name, tree type, unsigned int code)
603 {
604 code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL;
605 return add_builtin_function (name, type, code, BUILT_IN_MD,
606 NULL, NULL_TREE);
607 }
608
501 static const char * 609 static const char *
502 aarch64_mangle_builtin_scalar_type (const_tree type) 610 aarch64_mangle_builtin_scalar_type (const_tree type)
503 { 611 {
504 int i = 0; 612 int i = 0;
505 613
534 642
535 return NULL; 643 return NULL;
536 } 644 }
537 645
538 const char * 646 const char *
539 aarch64_mangle_builtin_type (const_tree type) 647 aarch64_general_mangle_builtin_type (const_tree type)
540 { 648 {
541 const char *mangle; 649 const char *mangle;
542 /* Walk through all the AArch64 builtins types tables to filter out the 650 /* Walk through all the AArch64 builtins types tables to filter out the
543 incoming type. */ 651 incoming type. */
544 if ((mangle = aarch64_mangle_builtin_vector_type (type)) 652 if ((mangle = aarch64_mangle_builtin_vector_type (type))
576 return aarch64_fp16_type_node; 684 return aarch64_fp16_type_node;
577 case E_SFmode: 685 case E_SFmode:
578 return float_type_node; 686 return float_type_node;
579 case E_DFmode: 687 case E_DFmode:
580 return double_type_node; 688 return double_type_node;
689 case E_BFmode:
690 return aarch64_bf16_type_node;
581 default: 691 default:
582 gcc_unreachable (); 692 gcc_unreachable ();
583 } 693 }
584 #undef QUAL_TYPE 694 #undef QUAL_TYPE
585 } 695 }
666 aarch64_simd_types[Float16x8_t].eltype = aarch64_fp16_type_node; 776 aarch64_simd_types[Float16x8_t].eltype = aarch64_fp16_type_node;
667 aarch64_simd_types[Float32x2_t].eltype = float_type_node; 777 aarch64_simd_types[Float32x2_t].eltype = float_type_node;
668 aarch64_simd_types[Float32x4_t].eltype = float_type_node; 778 aarch64_simd_types[Float32x4_t].eltype = float_type_node;
669 aarch64_simd_types[Float64x1_t].eltype = double_type_node; 779 aarch64_simd_types[Float64x1_t].eltype = double_type_node;
670 aarch64_simd_types[Float64x2_t].eltype = double_type_node; 780 aarch64_simd_types[Float64x2_t].eltype = double_type_node;
781
782 /* Init Bfloat vector types with underlying __bf16 type. */
783 aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node;
784 aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node;
671 785
672 for (i = 0; i < nelts; i++) 786 for (i = 0; i < nelts; i++)
673 { 787 {
674 tree eltype = aarch64_simd_types[i].eltype; 788 tree eltype = aarch64_simd_types[i].eltype;
675 machine_mode mode = aarch64_simd_types[i].mode; 789 machine_mode mode = aarch64_simd_types[i].mode;
744 "__builtin_aarch64_simd_udi"); 858 "__builtin_aarch64_simd_udi");
745 } 859 }
746 860
747 static bool aarch64_simd_builtins_initialized_p = false; 861 static bool aarch64_simd_builtins_initialized_p = false;
748 862
863 /* Due to the architecture not providing lane variant of the lane instructions
864 for fcmla we can't use the standard simd builtin expansion code, but we
865 still want the majority of the validation that would normally be done. */
866
867 void
868 aarch64_init_fcmla_laneq_builtins (void)
869 {
870 unsigned int i = 0;
871
872 for (i = 0; i < ARRAY_SIZE (aarch64_fcmla_lane_builtin_data); ++i)
873 {
874 aarch64_fcmla_laneq_builtin_datum* d
875 = &aarch64_fcmla_lane_builtin_data[i];
876 tree argtype = aarch64_lookup_simd_builtin_type (d->mode, qualifier_none);
877 machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
878 tree quadtype
879 = aarch64_lookup_simd_builtin_type (quadmode, qualifier_none);
880 tree lanetype
881 = aarch64_simd_builtin_std_type (SImode, qualifier_lane_pair_index);
882 tree ftype = build_function_type_list (argtype, argtype, argtype,
883 quadtype, lanetype, NULL_TREE);
884 tree fndecl = aarch64_general_add_builtin (d->name, ftype, d->fcode);
885
886 aarch64_builtin_decls[d->fcode] = fndecl;
887 }
888 }
889
749 void 890 void
750 aarch64_init_simd_builtins (void) 891 aarch64_init_simd_builtins (void)
751 { 892 {
752 unsigned int i, fcode = AARCH64_SIMD_PATTERN_START; 893 unsigned int i, fcode = AARCH64_SIMD_PATTERN_START;
753 894
767 tree lane_check_fpr = build_function_type_list (void_type_node, 908 tree lane_check_fpr = build_function_type_list (void_type_node,
768 size_type_node, 909 size_type_node,
769 size_type_node, 910 size_type_node,
770 intSI_type_node, 911 intSI_type_node,
771 NULL); 912 NULL);
772 aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK] = 913 aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK]
773 add_builtin_function ("__builtin_aarch64_im_lane_boundsi", lane_check_fpr, 914 = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi",
774 AARCH64_SIMD_BUILTIN_LANE_CHECK, BUILT_IN_MD, 915 lane_check_fpr,
775 NULL, NULL_TREE); 916 AARCH64_SIMD_BUILTIN_LANE_CHECK);
776 917
777 for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++) 918 for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
778 { 919 {
779 bool print_type_signature_p = false; 920 bool print_type_signature_p = false;
780 char type_signature[SIMD_MAX_BUILTIN_ARGS + 1] = { 0 }; 921 char type_signature[SIMD_MAX_BUILTIN_ARGS + 1] = { 0 };
868 d->name, type_signature); 1009 d->name, type_signature);
869 else 1010 else
870 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s", 1011 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s",
871 d->name); 1012 d->name);
872 1013
873 fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, 1014 fndecl = aarch64_general_add_builtin (namebuf, ftype, fcode);
874 NULL, NULL_TREE);
875 aarch64_builtin_decls[fcode] = fndecl; 1015 aarch64_builtin_decls[fcode] = fndecl;
876 } 1016 }
1017
1018 /* Initialize the remaining fcmla_laneq intrinsics. */
1019 aarch64_init_fcmla_laneq_builtins ();
877 } 1020 }
878 1021
879 static void 1022 static void
880 aarch64_init_crc32_builtins () 1023 aarch64_init_crc32_builtins ()
881 { 1024 {
886 { 1029 {
887 aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i]; 1030 aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i];
888 tree argtype = aarch64_simd_builtin_std_type (d->mode, 1031 tree argtype = aarch64_simd_builtin_std_type (d->mode,
889 qualifier_unsigned); 1032 qualifier_unsigned);
890 tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE); 1033 tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE);
891 tree fndecl = add_builtin_function (d->name, ftype, d->fcode, 1034 tree fndecl = aarch64_general_add_builtin (d->name, ftype, d->fcode);
892 BUILT_IN_MD, NULL, NULL_TREE);
893 1035
894 aarch64_builtin_decls[d->fcode] = fndecl; 1036 aarch64_builtin_decls[d->fcode] = fndecl;
895 } 1037 }
896 } 1038 }
897 1039
927 builtin_decls_data *bdd_end = bdd + (sizeof (bdda) / sizeof (builtin_decls_data)); 1069 builtin_decls_data *bdd_end = bdd + (sizeof (bdda) / sizeof (builtin_decls_data));
928 1070
929 for (; bdd < bdd_end; bdd++) 1071 for (; bdd < bdd_end; bdd++)
930 { 1072 {
931 ftype = build_function_type_list (bdd->type_node, bdd->type_node, NULL_TREE); 1073 ftype = build_function_type_list (bdd->type_node, bdd->type_node, NULL_TREE);
932 fndecl = add_builtin_function (bdd->builtin_name, 1074 fndecl = aarch64_general_add_builtin (bdd->builtin_name,
933 ftype, bdd->function_code, BUILT_IN_MD, NULL, NULL_TREE); 1075 ftype, bdd->function_code);
934 aarch64_builtin_decls[bdd->function_code] = fndecl; 1076 aarch64_builtin_decls[bdd->function_code] = fndecl;
935 } 1077 }
936 } 1078 }
937 1079
938 /* Initialize the backend types that support the user-visible __fp16 1080 /* Initialize the backend types that support the user-visible __fp16
946 TYPE_PRECISION (aarch64_fp16_type_node) = 16; 1088 TYPE_PRECISION (aarch64_fp16_type_node) = 16;
947 layout_type (aarch64_fp16_type_node); 1089 layout_type (aarch64_fp16_type_node);
948 1090
949 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16"); 1091 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16");
950 aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node); 1092 aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node);
1093 }
1094
1095 /* Initialize the backend REAL_TYPE type supporting bfloat types. */
1096 static void
1097 aarch64_init_bf16_types (void)
1098 {
1099 aarch64_bf16_type_node = make_node (REAL_TYPE);
1100 TYPE_PRECISION (aarch64_bf16_type_node) = 16;
1101 SET_TYPE_MODE (aarch64_bf16_type_node, BFmode);
1102 layout_type (aarch64_bf16_type_node);
1103
1104 lang_hooks.types.register_builtin_type (aarch64_bf16_type_node, "__bf16");
1105 aarch64_bf16_ptr_type_node = build_pointer_type (aarch64_bf16_type_node);
951 } 1106 }
952 1107
953 /* Pointer authentication builtins that will become NOP on legacy platform. 1108 /* Pointer authentication builtins that will become NOP on legacy platform.
954 Currently, these builtins are for internal use only (libgcc EH unwinder). */ 1109 Currently, these builtins are for internal use only (libgcc EH unwinder). */
955 1110
962 unsigned_intDI_type_node, NULL_TREE); 1117 unsigned_intDI_type_node, NULL_TREE);
963 tree ftype_pointer_strip 1118 tree ftype_pointer_strip
964 = build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE); 1119 = build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE);
965 1120
966 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIA1716] 1121 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIA1716]
967 = add_builtin_function ("__builtin_aarch64_autia1716", ftype_pointer_auth, 1122 = aarch64_general_add_builtin ("__builtin_aarch64_autia1716",
968 AARCH64_PAUTH_BUILTIN_AUTIA1716, BUILT_IN_MD, NULL, 1123 ftype_pointer_auth,
969 NULL_TREE); 1124 AARCH64_PAUTH_BUILTIN_AUTIA1716);
970 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIA1716] 1125 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIA1716]
971 = add_builtin_function ("__builtin_aarch64_pacia1716", ftype_pointer_auth, 1126 = aarch64_general_add_builtin ("__builtin_aarch64_pacia1716",
972 AARCH64_PAUTH_BUILTIN_PACIA1716, BUILT_IN_MD, NULL, 1127 ftype_pointer_auth,
973 NULL_TREE); 1128 AARCH64_PAUTH_BUILTIN_PACIA1716);
1129 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIB1716]
1130 = aarch64_general_add_builtin ("__builtin_aarch64_autib1716",
1131 ftype_pointer_auth,
1132 AARCH64_PAUTH_BUILTIN_AUTIB1716);
1133 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIB1716]
1134 = aarch64_general_add_builtin ("__builtin_aarch64_pacib1716",
1135 ftype_pointer_auth,
1136 AARCH64_PAUTH_BUILTIN_PACIB1716);
974 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_XPACLRI] 1137 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_XPACLRI]
975 = add_builtin_function ("__builtin_aarch64_xpaclri", ftype_pointer_strip, 1138 = aarch64_general_add_builtin ("__builtin_aarch64_xpaclri",
976 AARCH64_PAUTH_BUILTIN_XPACLRI, BUILT_IN_MD, NULL, 1139 ftype_pointer_strip,
977 NULL_TREE); 1140 AARCH64_PAUTH_BUILTIN_XPACLRI);
978 } 1141 }
1142
1143 /* Initialize the transactional memory extension (TME) builtins. */
1144 static void
1145 aarch64_init_tme_builtins (void)
1146 {
1147 tree ftype_uint64_void
1148 = build_function_type_list (uint64_type_node, NULL);
1149 tree ftype_void_void
1150 = build_function_type_list (void_type_node, NULL);
1151 tree ftype_void_uint64
1152 = build_function_type_list (void_type_node, uint64_type_node, NULL);
1153
1154 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TSTART]
1155 = aarch64_general_add_builtin ("__builtin_aarch64_tstart",
1156 ftype_uint64_void,
1157 AARCH64_TME_BUILTIN_TSTART);
1158 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TTEST]
1159 = aarch64_general_add_builtin ("__builtin_aarch64_ttest",
1160 ftype_uint64_void,
1161 AARCH64_TME_BUILTIN_TTEST);
1162 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCOMMIT]
1163 = aarch64_general_add_builtin ("__builtin_aarch64_tcommit",
1164 ftype_void_void,
1165 AARCH64_TME_BUILTIN_TCOMMIT);
1166 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCANCEL]
1167 = aarch64_general_add_builtin ("__builtin_aarch64_tcancel",
1168 ftype_void_uint64,
1169 AARCH64_TME_BUILTIN_TCANCEL);
1170 }
1171
1172 /* Add builtins for Random Number instructions. */
1173
1174 static void
1175 aarch64_init_rng_builtins (void)
1176 {
1177 tree unsigned_ptr_type = build_pointer_type (unsigned_intDI_type_node);
1178 tree ftype
1179 = build_function_type_list (integer_type_node, unsigned_ptr_type, NULL);
1180 aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDR]
1181 = aarch64_general_add_builtin ("__builtin_aarch64_rndr", ftype,
1182 AARCH64_BUILTIN_RNG_RNDR);
1183 aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDRRS]
1184 = aarch64_general_add_builtin ("__builtin_aarch64_rndrrs", ftype,
1185 AARCH64_BUILTIN_RNG_RNDRRS);
1186 }
1187
1188 /* Initialize the memory tagging extension (MTE) builtins. */
1189 struct
1190 {
1191 tree ftype;
1192 enum insn_code icode;
1193 } aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_END -
1194 AARCH64_MEMTAG_BUILTIN_START - 1];
1195
1196 static void
1197 aarch64_init_memtag_builtins (void)
1198 {
1199 tree fntype = NULL;
1200
1201 #define AARCH64_INIT_MEMTAG_BUILTINS_DECL(F, N, I, T) \
1202 aarch64_builtin_decls[AARCH64_MEMTAG_BUILTIN_##F] \
1203 = aarch64_general_add_builtin ("__builtin_aarch64_memtag_"#N, \
1204 T, AARCH64_MEMTAG_BUILTIN_##F); \
1205 aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_##F - \
1206 AARCH64_MEMTAG_BUILTIN_START - 1] = \
1207 {T, CODE_FOR_##I};
1208
1209 fntype = build_function_type_list (ptr_type_node, ptr_type_node,
1210 uint64_type_node, NULL);
1211 AARCH64_INIT_MEMTAG_BUILTINS_DECL (IRG, irg, irg, fntype);
1212
1213 fntype = build_function_type_list (uint64_type_node, ptr_type_node,
1214 uint64_type_node, NULL);
1215 AARCH64_INIT_MEMTAG_BUILTINS_DECL (GMI, gmi, gmi, fntype);
1216
1217 fntype = build_function_type_list (ptrdiff_type_node, ptr_type_node,
1218 ptr_type_node, NULL);
1219 AARCH64_INIT_MEMTAG_BUILTINS_DECL (SUBP, subp, subp, fntype);
1220
1221 fntype = build_function_type_list (ptr_type_node, ptr_type_node,
1222 unsigned_type_node, NULL);
1223 AARCH64_INIT_MEMTAG_BUILTINS_DECL (INC_TAG, inc_tag, addg, fntype);
1224
1225 fntype = build_function_type_list (void_type_node, ptr_type_node, NULL);
1226 AARCH64_INIT_MEMTAG_BUILTINS_DECL (SET_TAG, set_tag, stg, fntype);
1227
1228 fntype = build_function_type_list (ptr_type_node, ptr_type_node, NULL);
1229 AARCH64_INIT_MEMTAG_BUILTINS_DECL (GET_TAG, get_tag, ldg, fntype);
1230
1231 #undef AARCH64_INIT_MEMTAG_BUILTINS_DECL
1232 }
1233
1234 /* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group. */
979 1235
980 void 1236 void
981 aarch64_init_builtins (void) 1237 aarch64_general_init_builtins (void)
982 { 1238 {
983 tree ftype_set_fpr 1239 tree ftype_set_fpr
984 = build_function_type_list (void_type_node, unsigned_type_node, NULL); 1240 = build_function_type_list (void_type_node, unsigned_type_node, NULL);
985 tree ftype_get_fpr 1241 tree ftype_get_fpr
986 = build_function_type_list (unsigned_type_node, NULL); 1242 = build_function_type_list (unsigned_type_node, NULL);
987 1243
988 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR] 1244 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]
989 = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr, 1245 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr",
990 AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); 1246 ftype_get_fpr,
1247 AARCH64_BUILTIN_GET_FPCR);
991 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR] 1248 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]
992 = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr, 1249 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr",
993 AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); 1250 ftype_set_fpr,
1251 AARCH64_BUILTIN_SET_FPCR);
994 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR] 1252 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]
995 = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr, 1253 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr",
996 AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); 1254 ftype_get_fpr,
1255 AARCH64_BUILTIN_GET_FPSR);
997 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR] 1256 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]
998 = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr, 1257 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr",
999 AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); 1258 ftype_set_fpr,
1259 AARCH64_BUILTIN_SET_FPSR);
1000 1260
1001 aarch64_init_fp16_types (); 1261 aarch64_init_fp16_types ();
1262
1263 aarch64_init_bf16_types ();
1002 1264
1003 if (TARGET_SIMD) 1265 if (TARGET_SIMD)
1004 aarch64_init_simd_builtins (); 1266 aarch64_init_simd_builtins ();
1005 1267
1006 aarch64_init_crc32_builtins (); 1268 aarch64_init_crc32_builtins ();
1007 aarch64_init_builtin_rsqrt (); 1269 aarch64_init_builtin_rsqrt ();
1270 aarch64_init_rng_builtins ();
1271
1272 tree ftype_jcvt
1273 = build_function_type_list (intSI_type_node, double_type_node, NULL);
1274 aarch64_builtin_decls[AARCH64_JSCVT]
1275 = aarch64_general_add_builtin ("__builtin_aarch64_jcvtzs", ftype_jcvt,
1276 AARCH64_JSCVT);
1008 1277
1009 /* Initialize pointer authentication builtins which are backed by instructions 1278 /* Initialize pointer authentication builtins which are backed by instructions
1010 in NOP encoding space. 1279 in NOP encoding space.
1011 1280
1012 NOTE: these builtins are supposed to be used by libgcc unwinder only, as 1281 NOTE: these builtins are supposed to be used by libgcc unwinder only, as
1013 there is no support on return address signing under ILP32, we don't 1282 there is no support on return address signing under ILP32, we don't
1014 register them. */ 1283 register them. */
1015 if (!TARGET_ILP32) 1284 if (!TARGET_ILP32)
1016 aarch64_init_pauth_hint_builtins (); 1285 aarch64_init_pauth_hint_builtins ();
1017 } 1286
1018 1287 if (TARGET_TME)
1288 aarch64_init_tme_builtins ();
1289
1290 if (TARGET_MEMTAG)
1291 aarch64_init_memtag_builtins ();
1292 }
1293
1294 /* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group. */
1019 tree 1295 tree
1020 aarch64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) 1296 aarch64_general_builtin_decl (unsigned code, bool)
1021 { 1297 {
1022 if (code >= AARCH64_BUILTIN_MAX) 1298 if (code >= AARCH64_BUILTIN_MAX)
1023 return error_mark_node; 1299 return error_mark_node;
1024 1300
1025 return aarch64_builtin_decls[code]; 1301 return aarch64_builtin_decls[code];
1029 { 1305 {
1030 SIMD_ARG_COPY_TO_REG, 1306 SIMD_ARG_COPY_TO_REG,
1031 SIMD_ARG_CONSTANT, 1307 SIMD_ARG_CONSTANT,
1032 SIMD_ARG_LANE_INDEX, 1308 SIMD_ARG_LANE_INDEX,
1033 SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX, 1309 SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
1310 SIMD_ARG_LANE_PAIR_INDEX,
1311 SIMD_ARG_LANE_QUADTUP_INDEX,
1034 SIMD_ARG_STOP 1312 SIMD_ARG_STOP
1035 } builtin_simd_arg; 1313 } builtin_simd_arg;
1036 1314
1037 1315
1038 static rtx 1316 static rtx
1100 = GET_MODE_NUNITS (vmode).to_constant (); 1378 = GET_MODE_NUNITS (vmode).to_constant ();
1101 aarch64_simd_lane_bounds (op[opc], 0, nunits, exp); 1379 aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
1102 /* Keep to GCC-vector-extension lane indices in the RTL. */ 1380 /* Keep to GCC-vector-extension lane indices in the RTL. */
1103 op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc])); 1381 op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc]));
1104 } 1382 }
1105 /* Fall through - if the lane index isn't a constant then 1383 /* If the lane index isn't a constant then error out. */
1106 the next case will error. */ 1384 goto constant_arg;
1107 /* FALLTHRU */ 1385
1386 case SIMD_ARG_LANE_PAIR_INDEX:
1387 /* Must be a previous operand into which this is an index and
1388 index is restricted to nunits / 2. */
1389 gcc_assert (opc > 0);
1390 if (CONST_INT_P (op[opc]))
1391 {
1392 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
1393 unsigned int nunits
1394 = GET_MODE_NUNITS (vmode).to_constant ();
1395 aarch64_simd_lane_bounds (op[opc], 0, nunits / 2, exp);
1396 /* Keep to GCC-vector-extension lane indices in the RTL. */
1397 int lane = INTVAL (op[opc]);
1398 op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane),
1399 SImode);
1400 }
1401 /* If the lane index isn't a constant then error out. */
1402 goto constant_arg;
1403 case SIMD_ARG_LANE_QUADTUP_INDEX:
1404 /* Must be a previous operand into which this is an index and
1405 index is restricted to nunits / 4. */
1406 gcc_assert (opc > 0);
1407 if (CONST_INT_P (op[opc]))
1408 {
1409 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
1410 unsigned int nunits
1411 = GET_MODE_NUNITS (vmode).to_constant ();
1412 aarch64_simd_lane_bounds (op[opc], 0, nunits / 4, exp);
1413 /* Keep to GCC-vector-extension lane indices in the RTL. */
1414 int lane = INTVAL (op[opc]);
1415 op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane),
1416 SImode);
1417 }
1418 /* If the lane index isn't a constant then error out. */
1419 goto constant_arg;
1108 case SIMD_ARG_CONSTANT: 1420 case SIMD_ARG_CONSTANT:
1109 constant_arg: 1421 constant_arg:
1110 if (!(*insn_data[icode].operand[opc].predicate) 1422 if (!(*insn_data[icode].operand[opc].predicate)
1111 (op[opc], mode)) 1423 (op[opc], mode))
1112 { 1424 {
1213 int operands_k = k - is_void; 1525 int operands_k = k - is_void;
1214 int expr_args_k = k - 1; 1526 int expr_args_k = k - 1;
1215 1527
1216 if (d->qualifiers[qualifiers_k] & qualifier_lane_index) 1528 if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
1217 args[k] = SIMD_ARG_LANE_INDEX; 1529 args[k] = SIMD_ARG_LANE_INDEX;
1530 else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
1531 args[k] = SIMD_ARG_LANE_PAIR_INDEX;
1532 else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index)
1533 args[k] = SIMD_ARG_LANE_QUADTUP_INDEX;
1218 else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index) 1534 else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
1219 args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX; 1535 args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
1220 else if (d->qualifiers[qualifiers_k] & qualifier_immediate) 1536 else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
1221 args[k] = SIMD_ARG_CONSTANT; 1537 args[k] = SIMD_ARG_CONSTANT;
1222 else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate) 1538 else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
1315 emit_insn (gen (target, op0)); 1631 emit_insn (gen (target, op0));
1316 1632
1317 return target; 1633 return target;
1318 } 1634 }
1319 1635
1320 /* Expand an expression EXP that calls a built-in function, 1636 /* Expand a FCMLA lane expression EXP with code FCODE and
1321 with result going to TARGET if that's convenient. */ 1637 result going to TARGET if that is convenient. */
1638
1322 rtx 1639 rtx
1323 aarch64_expand_builtin (tree exp, 1640 aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode)
1324 rtx target, 1641 {
1325 rtx subtarget ATTRIBUTE_UNUSED, 1642 int bcode = fcode - AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE - 1;
1326 machine_mode mode ATTRIBUTE_UNUSED, 1643 aarch64_fcmla_laneq_builtin_datum* d
1327 int ignore ATTRIBUTE_UNUSED) 1644 = &aarch64_fcmla_lane_builtin_data[bcode];
1328 { 1645 machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
1329 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 1646 rtx op0 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 0)));
1330 int fcode = DECL_FUNCTION_CODE (fndecl); 1647 rtx op1 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 1)));
1648 rtx op2 = force_reg (quadmode, expand_normal (CALL_EXPR_ARG (exp, 2)));
1649 tree tmp = CALL_EXPR_ARG (exp, 3);
1650 rtx lane_idx = expand_expr (tmp, NULL_RTX, VOIDmode, EXPAND_INITIALIZER);
1651
1652 /* Validate that the lane index is a constant. */
1653 if (!CONST_INT_P (lane_idx))
1654 {
1655 error ("%Kargument %d must be a constant immediate", exp, 4);
1656 return const0_rtx;
1657 }
1658
1659 /* Validate that the index is within the expected range. */
1660 int nunits = GET_MODE_NUNITS (quadmode).to_constant ();
1661 aarch64_simd_lane_bounds (lane_idx, 0, nunits / 2, exp);
1662
1663 /* Generate the correct register and mode. */
1664 int lane = INTVAL (lane_idx);
1665
1666 if (lane < nunits / 4)
1667 op2 = simplify_gen_subreg (d->mode, op2, quadmode,
1668 subreg_lowpart_offset (d->mode, quadmode));
1669 else
1670 {
1671 /* Select the upper 64 bits, either a V2SF or V4HF, this however
1672 is quite messy, as the operation required even though simple
1673 doesn't have a simple RTL pattern, and seems it's quite hard to
1674 define using a single RTL pattern. The target generic version
1675 gen_highpart_mode generates code that isn't optimal. */
1676 rtx temp1 = gen_reg_rtx (d->mode);
1677 rtx temp2 = gen_reg_rtx (DImode);
1678 temp1 = simplify_gen_subreg (d->mode, op2, quadmode,
1679 subreg_lowpart_offset (d->mode, quadmode));
1680 temp1 = simplify_gen_subreg (V2DImode, temp1, d->mode, 0);
1681 if (BYTES_BIG_ENDIAN)
1682 emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const0_rtx));
1683 else
1684 emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const1_rtx));
1685 op2 = simplify_gen_subreg (d->mode, temp2, GET_MODE (temp2), 0);
1686
1687 /* And recalculate the index. */
1688 lane -= nunits / 4;
1689 }
1690
1691 /* Keep to GCC-vector-extension lane indices in the RTL, only nunits / 4
1692 (max nunits in range check) are valid. Which means only 0-1, so we
1693 only need to know the order in a V2mode. */
1694 lane_idx = aarch64_endian_lane_rtx (V2DImode, lane);
1695
1696 if (!target)
1697 target = gen_reg_rtx (d->mode);
1698 else
1699 target = force_reg (d->mode, target);
1700
1701 rtx pat = NULL_RTX;
1702
1703 if (d->lane)
1704 pat = GEN_FCN (d->icode) (target, op0, op1, op2, lane_idx);
1705 else
1706 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
1707
1708 if (!pat)
1709 return NULL_RTX;
1710
1711 emit_insn (pat);
1712 return target;
1713 }
1714
1715 /* Function to expand an expression EXP which calls one of the Transactional
1716 Memory Extension (TME) builtins FCODE with the result going to TARGET. */
1717 static rtx
1718 aarch64_expand_builtin_tme (int fcode, tree exp, rtx target)
1719 {
1720 switch (fcode)
1721 {
1722 case AARCH64_TME_BUILTIN_TSTART:
1723 target = gen_reg_rtx (DImode);
1724 emit_insn (GEN_FCN (CODE_FOR_tstart) (target));
1725 break;
1726
1727 case AARCH64_TME_BUILTIN_TTEST:
1728 target = gen_reg_rtx (DImode);
1729 emit_insn (GEN_FCN (CODE_FOR_ttest) (target));
1730 break;
1731
1732 case AARCH64_TME_BUILTIN_TCOMMIT:
1733 emit_insn (GEN_FCN (CODE_FOR_tcommit) ());
1734 break;
1735
1736 case AARCH64_TME_BUILTIN_TCANCEL:
1737 {
1738 tree arg0 = CALL_EXPR_ARG (exp, 0);
1739 rtx op0 = expand_normal (arg0);
1740 if (CONST_INT_P (op0) && UINTVAL (op0) <= 65536)
1741 emit_insn (GEN_FCN (CODE_FOR_tcancel) (op0));
1742 else
1743 {
1744 error ("%Kargument must be a 16-bit constant immediate", exp);
1745 return const0_rtx;
1746 }
1747 }
1748 break;
1749
1750 default :
1751 gcc_unreachable ();
1752 }
1753 return target;
1754 }
1755
1756 /* Expand a random number builtin EXP with code FCODE, putting the result
1757 int TARGET. If IGNORE is true the return value is ignored. */
1758
1759 rtx
1760 aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
1761 {
1762 rtx pat;
1763 enum insn_code icode;
1764 if (fcode == AARCH64_BUILTIN_RNG_RNDR)
1765 icode = CODE_FOR_aarch64_rndr;
1766 else if (fcode == AARCH64_BUILTIN_RNG_RNDRRS)
1767 icode = CODE_FOR_aarch64_rndrrs;
1768 else
1769 gcc_unreachable ();
1770
1771 rtx rand = gen_reg_rtx (DImode);
1772 pat = GEN_FCN (icode) (rand);
1773 if (!pat)
1774 return NULL_RTX;
1775
1776 tree arg0 = CALL_EXPR_ARG (exp, 0);
1777 rtx res_addr = expand_normal (arg0);
1778 res_addr = convert_memory_address (Pmode, res_addr);
1779 rtx res_mem = gen_rtx_MEM (DImode, res_addr);
1780 emit_insn (pat);
1781 emit_move_insn (res_mem, rand);
1782 /* If the status result is unused don't generate the CSET code. */
1783 if (ignore)
1784 return target;
1785
1786 rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM);
1787 rtx cmp_rtx = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx);
1788 emit_insn (gen_aarch64_cstoresi (target, cmp_rtx, cc_reg));
1789 return target;
1790 }
1791
1792 /* Expand an expression EXP that calls a MEMTAG built-in FCODE
1793 with result going to TARGET. */
1794 static rtx
1795 aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target)
1796 {
1797 if (TARGET_ILP32)
1798 {
1799 error ("Memory Tagging Extension does not support %<-mabi=ilp32%>");
1800 return const0_rtx;
1801 }
1802
1803 rtx pat = NULL;
1804 enum insn_code icode = aarch64_memtag_builtin_data[fcode -
1805 AARCH64_MEMTAG_BUILTIN_START - 1].icode;
1806
1807 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
1808 machine_mode mode0 = GET_MODE (op0);
1809 op0 = force_reg (mode0 == VOIDmode ? DImode : mode0, op0);
1810 op0 = convert_to_mode (DImode, op0, true);
1811
1812 switch (fcode)
1813 {
1814 case AARCH64_MEMTAG_BUILTIN_IRG:
1815 case AARCH64_MEMTAG_BUILTIN_GMI:
1816 case AARCH64_MEMTAG_BUILTIN_SUBP:
1817 case AARCH64_MEMTAG_BUILTIN_INC_TAG:
1818 {
1819 if (! target
1820 || GET_MODE (target) != DImode
1821 || ! (*insn_data[icode].operand[0].predicate) (target, DImode))
1822 target = gen_reg_rtx (DImode);
1823
1824 if (fcode == AARCH64_MEMTAG_BUILTIN_INC_TAG)
1825 {
1826 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
1827
1828 if ((*insn_data[icode].operand[3].predicate) (op1, QImode))
1829 {
1830 pat = GEN_FCN (icode) (target, op0, const0_rtx, op1);
1831 break;
1832 }
1833 error ("%Kargument %d must be a constant immediate "
1834 "in range [0,15]", exp, 2);
1835 return const0_rtx;
1836 }
1837 else
1838 {
1839 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
1840 machine_mode mode1 = GET_MODE (op1);
1841 op1 = force_reg (mode1 == VOIDmode ? DImode : mode1, op1);
1842 op1 = convert_to_mode (DImode, op1, true);
1843 pat = GEN_FCN (icode) (target, op0, op1);
1844 }
1845 break;
1846 }
1847 case AARCH64_MEMTAG_BUILTIN_GET_TAG:
1848 target = op0;
1849 pat = GEN_FCN (icode) (target, op0, const0_rtx);
1850 break;
1851 case AARCH64_MEMTAG_BUILTIN_SET_TAG:
1852 pat = GEN_FCN (icode) (op0, op0, const0_rtx);
1853 break;
1854 default:
1855 gcc_unreachable();
1856 }
1857
1858 if (!pat)
1859 return NULL_RTX;
1860
1861 emit_insn (pat);
1862 return target;
1863 }
1864
1865 /* Expand an expression EXP that calls built-in function FCODE,
1866 with result going to TARGET if that's convenient. IGNORE is true
1867 if the result of the builtin is ignored. */
1868 rtx
1869 aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
1870 int ignore)
1871 {
1331 int icode; 1872 int icode;
1332 rtx pat, op0; 1873 rtx pat, op0;
1333 tree arg0; 1874 tree arg0;
1334 1875
1335 switch (fcode) 1876 switch (fcode)
1358 emit_insn (pat); 1899 emit_insn (pat);
1359 return target; 1900 return target;
1360 1901
1361 case AARCH64_PAUTH_BUILTIN_AUTIA1716: 1902 case AARCH64_PAUTH_BUILTIN_AUTIA1716:
1362 case AARCH64_PAUTH_BUILTIN_PACIA1716: 1903 case AARCH64_PAUTH_BUILTIN_PACIA1716:
1904 case AARCH64_PAUTH_BUILTIN_AUTIB1716:
1905 case AARCH64_PAUTH_BUILTIN_PACIB1716:
1363 case AARCH64_PAUTH_BUILTIN_XPACLRI: 1906 case AARCH64_PAUTH_BUILTIN_XPACLRI:
1364 arg0 = CALL_EXPR_ARG (exp, 0); 1907 arg0 = CALL_EXPR_ARG (exp, 0);
1365 op0 = force_reg (Pmode, expand_normal (arg0)); 1908 op0 = force_reg (Pmode, expand_normal (arg0));
1366 1909
1367 if (!target) 1910 if (!target)
1381 } 1924 }
1382 else 1925 else
1383 { 1926 {
1384 tree arg1 = CALL_EXPR_ARG (exp, 1); 1927 tree arg1 = CALL_EXPR_ARG (exp, 1);
1385 rtx op1 = force_reg (Pmode, expand_normal (arg1)); 1928 rtx op1 = force_reg (Pmode, expand_normal (arg1));
1386 icode = (fcode == AARCH64_PAUTH_BUILTIN_PACIA1716 1929 switch (fcode)
1387 ? CODE_FOR_paci1716 : CODE_FOR_auti1716); 1930 {
1931 case AARCH64_PAUTH_BUILTIN_AUTIA1716:
1932 icode = CODE_FOR_autia1716;
1933 break;
1934 case AARCH64_PAUTH_BUILTIN_AUTIB1716:
1935 icode = CODE_FOR_autib1716;
1936 break;
1937 case AARCH64_PAUTH_BUILTIN_PACIA1716:
1938 icode = CODE_FOR_pacia1716;
1939 break;
1940 case AARCH64_PAUTH_BUILTIN_PACIB1716:
1941 icode = CODE_FOR_pacib1716;
1942 break;
1943 default:
1944 icode = 0;
1945 gcc_unreachable ();
1946 }
1388 1947
1389 rtx x16_reg = gen_rtx_REG (Pmode, R16_REGNUM); 1948 rtx x16_reg = gen_rtx_REG (Pmode, R16_REGNUM);
1390 rtx x17_reg = gen_rtx_REG (Pmode, R17_REGNUM); 1949 rtx x17_reg = gen_rtx_REG (Pmode, R17_REGNUM);
1391 emit_move_insn (x17_reg, op0); 1950 emit_move_insn (x17_reg, op0);
1392 emit_move_insn (x16_reg, op1); 1951 emit_move_insn (x16_reg, op1);
1393 emit_insn (GEN_FCN (icode) ()); 1952 emit_insn (GEN_FCN (icode) ());
1394 emit_move_insn (target, x17_reg); 1953 emit_move_insn (target, x17_reg);
1395 } 1954 }
1396 1955
1397 return target; 1956 return target;
1957
1958 case AARCH64_JSCVT:
1959 arg0 = CALL_EXPR_ARG (exp, 0);
1960 op0 = force_reg (DFmode, expand_normal (arg0));
1961 if (!target)
1962 target = gen_reg_rtx (SImode);
1963 else
1964 target = force_reg (SImode, target);
1965 emit_insn (GEN_FCN (CODE_FOR_aarch64_fjcvtzs) (target, op0));
1966 return target;
1967
1968 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF:
1969 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF:
1970 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF:
1971 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V2SF:
1972 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF:
1973 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF:
1974 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF:
1975 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF:
1976 return aarch64_expand_fcmla_builtin (exp, target, fcode);
1977 case AARCH64_BUILTIN_RNG_RNDR:
1978 case AARCH64_BUILTIN_RNG_RNDRRS:
1979 return aarch64_expand_rng_builtin (exp, target, fcode, ignore);
1398 } 1980 }
1399 1981
1400 if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX) 1982 if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
1401 return aarch64_simd_expand_builtin (fcode, exp, target); 1983 return aarch64_simd_expand_builtin (fcode, exp, target);
1402 else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX) 1984 else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX)
1407 || fcode == AARCH64_BUILTIN_RSQRT_V2DF 1989 || fcode == AARCH64_BUILTIN_RSQRT_V2DF
1408 || fcode == AARCH64_BUILTIN_RSQRT_V2SF 1990 || fcode == AARCH64_BUILTIN_RSQRT_V2SF
1409 || fcode == AARCH64_BUILTIN_RSQRT_V4SF) 1991 || fcode == AARCH64_BUILTIN_RSQRT_V4SF)
1410 return aarch64_expand_builtin_rsqrt (fcode, exp, target); 1992 return aarch64_expand_builtin_rsqrt (fcode, exp, target);
1411 1993
1994 if (fcode == AARCH64_TME_BUILTIN_TSTART
1995 || fcode == AARCH64_TME_BUILTIN_TCOMMIT
1996 || fcode == AARCH64_TME_BUILTIN_TTEST
1997 || fcode == AARCH64_TME_BUILTIN_TCANCEL)
1998 return aarch64_expand_builtin_tme (fcode, exp, target);
1999
2000 if (fcode >= AARCH64_MEMTAG_BUILTIN_START
2001 && fcode <= AARCH64_MEMTAG_BUILTIN_END)
2002 return aarch64_expand_builtin_memtag (fcode, exp, target);
2003
1412 gcc_unreachable (); 2004 gcc_unreachable ();
1413 } 2005 }
1414 2006
1415 tree 2007 tree
1416 aarch64_builtin_vectorized_function (unsigned int fn, tree type_out, 2008 aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
1417 tree type_in) 2009 tree type_in)
1418 { 2010 {
1419 machine_mode in_mode, out_mode; 2011 machine_mode in_mode, out_mode;
1420 unsigned HOST_WIDE_INT in_n, out_n;
1421 2012
1422 if (TREE_CODE (type_out) != VECTOR_TYPE 2013 if (TREE_CODE (type_out) != VECTOR_TYPE
1423 || TREE_CODE (type_in) != VECTOR_TYPE) 2014 || TREE_CODE (type_in) != VECTOR_TYPE)
1424 return NULL_TREE; 2015 return NULL_TREE;
1425 2016
1426 out_mode = TYPE_MODE (TREE_TYPE (type_out)); 2017 out_mode = TYPE_MODE (type_out);
1427 in_mode = TYPE_MODE (TREE_TYPE (type_in)); 2018 in_mode = TYPE_MODE (type_in);
1428 if (!TYPE_VECTOR_SUBPARTS (type_out).is_constant (&out_n)
1429 || !TYPE_VECTOR_SUBPARTS (type_in).is_constant (&in_n))
1430 return NULL_TREE;
1431 2019
1432 #undef AARCH64_CHECK_BUILTIN_MODE 2020 #undef AARCH64_CHECK_BUILTIN_MODE
1433 #define AARCH64_CHECK_BUILTIN_MODE(C, N) 1 2021 #define AARCH64_CHECK_BUILTIN_MODE(C, N) 1
1434 #define AARCH64_FIND_FRINT_VARIANT(N) \ 2022 #define AARCH64_FIND_FRINT_VARIANT(N) \
1435 (AARCH64_CHECK_BUILTIN_MODE (2, D) \ 2023 (AARCH64_CHECK_BUILTIN_MODE (2, D) \
1441 : NULL_TREE))) 2029 : NULL_TREE)))
1442 switch (fn) 2030 switch (fn)
1443 { 2031 {
1444 #undef AARCH64_CHECK_BUILTIN_MODE 2032 #undef AARCH64_CHECK_BUILTIN_MODE
1445 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ 2033 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
1446 (out_mode == N##Fmode && out_n == C \ 2034 (out_mode == V##C##N##Fmode && in_mode == V##C##N##Fmode)
1447 && in_mode == N##Fmode && in_n == C)
1448 CASE_CFN_FLOOR: 2035 CASE_CFN_FLOOR:
1449 return AARCH64_FIND_FRINT_VARIANT (floor); 2036 return AARCH64_FIND_FRINT_VARIANT (floor);
1450 CASE_CFN_CEIL: 2037 CASE_CFN_CEIL:
1451 return AARCH64_FIND_FRINT_VARIANT (ceil); 2038 return AARCH64_FIND_FRINT_VARIANT (ceil);
1452 CASE_CFN_TRUNC: 2039 CASE_CFN_TRUNC:
1457 return AARCH64_FIND_FRINT_VARIANT (nearbyint); 2044 return AARCH64_FIND_FRINT_VARIANT (nearbyint);
1458 CASE_CFN_SQRT: 2045 CASE_CFN_SQRT:
1459 return AARCH64_FIND_FRINT_VARIANT (sqrt); 2046 return AARCH64_FIND_FRINT_VARIANT (sqrt);
1460 #undef AARCH64_CHECK_BUILTIN_MODE 2047 #undef AARCH64_CHECK_BUILTIN_MODE
1461 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ 2048 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
1462 (out_mode == SImode && out_n == C \ 2049 (out_mode == V##C##SImode && in_mode == V##C##N##Imode)
1463 && in_mode == N##Imode && in_n == C)
1464 CASE_CFN_CLZ: 2050 CASE_CFN_CLZ:
1465 { 2051 {
1466 if (AARCH64_CHECK_BUILTIN_MODE (4, S)) 2052 if (AARCH64_CHECK_BUILTIN_MODE (4, S))
1467 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si]; 2053 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si];
1468 return NULL_TREE; 2054 return NULL_TREE;
1475 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv4si]; 2061 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv4si];
1476 return NULL_TREE; 2062 return NULL_TREE;
1477 } 2063 }
1478 #undef AARCH64_CHECK_BUILTIN_MODE 2064 #undef AARCH64_CHECK_BUILTIN_MODE
1479 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \ 2065 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
1480 (out_mode == N##Imode && out_n == C \ 2066 (out_mode == V##C##N##Imode && in_mode == V##C##N##Fmode)
1481 && in_mode == N##Fmode && in_n == C)
1482 CASE_CFN_IFLOOR: 2067 CASE_CFN_IFLOOR:
1483 CASE_CFN_LFLOOR: 2068 CASE_CFN_LFLOOR:
1484 CASE_CFN_LLFLOOR: 2069 CASE_CFN_LLFLOOR:
1485 { 2070 {
1486 enum aarch64_builtins builtin; 2071 enum aarch64_builtins builtin;
1525 else 2110 else
1526 return NULL_TREE; 2111 return NULL_TREE;
1527 2112
1528 return aarch64_builtin_decls[builtin]; 2113 return aarch64_builtin_decls[builtin];
1529 } 2114 }
1530 case CFN_BUILT_IN_BSWAP16:
1531 #undef AARCH64_CHECK_BUILTIN_MODE
1532 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
1533 (out_mode == N##Imode && out_n == C \
1534 && in_mode == N##Imode && in_n == C)
1535 if (AARCH64_CHECK_BUILTIN_MODE (4, H))
1536 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi];
1537 else if (AARCH64_CHECK_BUILTIN_MODE (8, H))
1538 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi];
1539 else
1540 return NULL_TREE;
1541 case CFN_BUILT_IN_BSWAP32:
1542 if (AARCH64_CHECK_BUILTIN_MODE (2, S))
1543 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si];
1544 else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
1545 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si];
1546 else
1547 return NULL_TREE;
1548 case CFN_BUILT_IN_BSWAP64:
1549 if (AARCH64_CHECK_BUILTIN_MODE (2, D))
1550 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di];
1551 else
1552 return NULL_TREE;
1553 default: 2115 default:
1554 return NULL_TREE; 2116 return NULL_TREE;
1555 } 2117 }
1556 2118
1557 return NULL_TREE; 2119 return NULL_TREE;
1558 } 2120 }
1559 2121
1560 /* Return builtin for reciprocal square root. */ 2122 /* Return builtin for reciprocal square root. */
1561 2123
1562 tree 2124 tree
1563 aarch64_builtin_rsqrt (unsigned int fn) 2125 aarch64_general_builtin_rsqrt (unsigned int fn)
1564 { 2126 {
1565 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df) 2127 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df)
1566 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF]; 2128 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF];
1567 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2sf) 2129 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2sf)
1568 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2SF]; 2130 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2SF];
1573 2135
1574 #undef VAR1 2136 #undef VAR1
1575 #define VAR1(T, N, MAP, A) \ 2137 #define VAR1(T, N, MAP, A) \
1576 case AARCH64_SIMD_BUILTIN_##T##_##N##A: 2138 case AARCH64_SIMD_BUILTIN_##T##_##N##A:
1577 2139
2140 /* Try to fold a call to the built-in function with subcode FCODE. The
2141 function is passed the N_ARGS arguments in ARGS and it returns a value
2142 of type TYPE. Return the new expression on success and NULL_TREE on
2143 failure. */
1578 tree 2144 tree
1579 aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args, 2145 aarch64_general_fold_builtin (unsigned int fcode, tree type,
1580 bool ignore ATTRIBUTE_UNUSED) 2146 unsigned int n_args ATTRIBUTE_UNUSED, tree *args)
1581 { 2147 {
1582 int fcode = DECL_FUNCTION_CODE (fndecl);
1583 tree type = TREE_TYPE (TREE_TYPE (fndecl));
1584
1585 switch (fcode) 2148 switch (fcode)
1586 { 2149 {
1587 BUILTIN_VDQF (UNOP, abs, 2) 2150 BUILTIN_VDQF (UNOP, abs, 2)
1588 return fold_build1 (ABS_EXPR, type, args[0]); 2151 return fold_build1 (ABS_EXPR, type, args[0]);
1589 VAR1 (UNOP, floatv2si, 2, v2sf) 2152 VAR1 (UNOP, floatv2si, 2, v2sf)
1595 } 2158 }
1596 2159
1597 return NULL_TREE; 2160 return NULL_TREE;
1598 } 2161 }
1599 2162
1600 bool 2163 /* Try to fold STMT, given that it's a call to the built-in function with
1601 aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi) 2164 subcode FCODE. Return the new statement on success and null on
1602 { 2165 failure. */
1603 bool changed = false; 2166 gimple *
1604 gimple *stmt = gsi_stmt (*gsi); 2167 aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt)
1605 tree call = gimple_call_fn (stmt); 2168 {
1606 tree fndecl;
1607 gimple *new_stmt = NULL; 2169 gimple *new_stmt = NULL;
1608 2170 unsigned nargs = gimple_call_num_args (stmt);
1609 if (call) 2171 tree *args = (nargs > 0
2172 ? gimple_call_arg_ptr (stmt, 0)
2173 : &error_mark_node);
2174
2175 /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int
2176 and unsigned int; it will distinguish according to the types of
2177 the arguments to the __builtin. */
2178 switch (fcode)
1610 { 2179 {
1611 fndecl = gimple_call_fndecl (stmt); 2180 BUILTIN_VALL (UNOP, reduc_plus_scal_, 10)
1612 if (fndecl) 2181 new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS,
2182 1, args[0]);
2183 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
2184 break;
2185 BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10)
2186 BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10)
2187 new_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
2188 1, args[0]);
2189 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
2190 break;
2191 BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10)
2192 BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10)
2193 new_stmt = gimple_build_call_internal (IFN_REDUC_MIN,
2194 1, args[0]);
2195 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
2196 break;
2197 BUILTIN_GPF (BINOP, fmulx, 0)
1613 { 2198 {
1614 int fcode = DECL_FUNCTION_CODE (fndecl); 2199 gcc_assert (nargs == 2);
1615 unsigned nargs = gimple_call_num_args (stmt); 2200 bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST;
1616 tree *args = (nargs > 0 2201 bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST;
1617 ? gimple_call_arg_ptr (stmt, 0) 2202 if (a0_cst_p || a1_cst_p)
1618 : &error_mark_node);
1619
1620 /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int
1621 and unsigned int; it will distinguish according to the types of
1622 the arguments to the __builtin. */
1623 switch (fcode)
1624 { 2203 {
1625 BUILTIN_VALL (UNOP, reduc_plus_scal_, 10) 2204 if (a0_cst_p && a1_cst_p)
1626 new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS,
1627 1, args[0]);
1628 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
1629 break;
1630 BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10)
1631 BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10)
1632 new_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
1633 1, args[0]);
1634 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
1635 break;
1636 BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10)
1637 BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10)
1638 new_stmt = gimple_build_call_internal (IFN_REDUC_MIN,
1639 1, args[0]);
1640 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
1641 break;
1642 BUILTIN_GPF (BINOP, fmulx, 0)
1643 { 2205 {
1644 gcc_assert (nargs == 2); 2206 tree t0 = TREE_TYPE (args[0]);
1645 bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST; 2207 real_value a0 = (TREE_REAL_CST (args[0]));
1646 bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST; 2208 real_value a1 = (TREE_REAL_CST (args[1]));
1647 if (a0_cst_p || a1_cst_p) 2209 if (real_equal (&a1, &dconst0))
2210 std::swap (a0, a1);
2211 /* According to real_equal (), +0 equals -0. */
2212 if (real_equal (&a0, &dconst0) && real_isinf (&a1))
1648 { 2213 {
1649 if (a0_cst_p && a1_cst_p) 2214 real_value res = dconst2;
1650 { 2215 res.sign = a0.sign ^ a1.sign;
1651 tree t0 = TREE_TYPE (args[0]); 2216 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
1652 real_value a0 = (TREE_REAL_CST (args[0])); 2217 REAL_CST,
1653 real_value a1 = (TREE_REAL_CST (args[1])); 2218 build_real (t0, res));
1654 if (real_equal (&a1, &dconst0))
1655 std::swap (a0, a1);
1656 /* According to real_equal (), +0 equals -0. */
1657 if (real_equal (&a0, &dconst0) && real_isinf (&a1))
1658 {
1659 real_value res = dconst2;
1660 res.sign = a0.sign ^ a1.sign;
1661 new_stmt =
1662 gimple_build_assign (gimple_call_lhs (stmt),
1663 REAL_CST,
1664 build_real (t0, res));
1665 }
1666 else
1667 new_stmt =
1668 gimple_build_assign (gimple_call_lhs (stmt),
1669 MULT_EXPR,
1670 args[0], args[1]);
1671 }
1672 else /* a0_cst_p ^ a1_cst_p. */
1673 {
1674 real_value const_part = a0_cst_p
1675 ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]);
1676 if (!real_equal (&const_part, &dconst0)
1677 && !real_isinf (&const_part))
1678 new_stmt =
1679 gimple_build_assign (gimple_call_lhs (stmt),
1680 MULT_EXPR, args[0], args[1]);
1681 }
1682 } 2219 }
1683 if (new_stmt) 2220 else
1684 { 2221 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
1685 gimple_set_vuse (new_stmt, gimple_vuse (stmt)); 2222 MULT_EXPR,
1686 gimple_set_vdef (new_stmt, gimple_vdef (stmt)); 2223 args[0], args[1]);
1687 }
1688 break;
1689 } 2224 }
1690 default: 2225 else /* a0_cst_p ^ a1_cst_p. */
1691 break; 2226 {
2227 real_value const_part = a0_cst_p
2228 ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]);
2229 if (!real_equal (&const_part, &dconst0)
2230 && !real_isinf (&const_part))
2231 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
2232 MULT_EXPR, args[0],
2233 args[1]);
2234 }
1692 } 2235 }
2236 if (new_stmt)
2237 {
2238 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2239 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
2240 }
2241 break;
1693 } 2242 }
2243 default:
2244 break;
1694 } 2245 }
1695 2246 return new_stmt;
1696 if (new_stmt)
1697 {
1698 gsi_replace (gsi, new_stmt, true);
1699 changed = true;
1700 }
1701
1702 return changed;
1703 } 2247 }
1704 2248
1705 void 2249 void
1706 aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) 2250 aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
1707 { 2251 {
1799 *update = build2 (COMPOUND_EXPR, void_type_node, 2343 *update = build2 (COMPOUND_EXPR, void_type_node,
1800 build2 (COMPOUND_EXPR, void_type_node, 2344 build2 (COMPOUND_EXPR, void_type_node,
1801 reload_fenv, restore_fnenv), update_call); 2345 reload_fenv, restore_fnenv), update_call);
1802 } 2346 }
1803 2347
2348 /* Resolve overloaded MEMTAG build-in functions. */
2349 #define AARCH64_BUILTIN_SUBCODE(F) \
2350 (DECL_MD_FUNCTION_CODE (F) >> AARCH64_BUILTIN_SHIFT)
2351
2352 static tree
2353 aarch64_resolve_overloaded_memtag (location_t loc,
2354 tree fndecl, void *pass_params)
2355 {
2356 vec<tree, va_gc> *params = static_cast<vec<tree, va_gc> *> (pass_params);
2357 unsigned param_num = params ? params->length() : 0;
2358 unsigned int fcode = AARCH64_BUILTIN_SUBCODE (fndecl);
2359 tree inittype = aarch64_memtag_builtin_data[
2360 fcode - AARCH64_MEMTAG_BUILTIN_START - 1].ftype;
2361 unsigned arg_num = list_length (TYPE_ARG_TYPES (inittype)) - 1;
2362
2363 if (param_num != arg_num)
2364 {
2365 TREE_TYPE (fndecl) = inittype;
2366 return NULL_TREE;
2367 }
2368 tree retype = NULL;
2369
2370 if (fcode == AARCH64_MEMTAG_BUILTIN_SUBP)
2371 {
2372 tree t0 = TREE_TYPE ((*params)[0]);
2373 tree t1 = TREE_TYPE ((*params)[1]);
2374
2375 if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE)
2376 t0 = ptr_type_node;
2377 if (t1 == error_mark_node || TREE_CODE (t1) != POINTER_TYPE)
2378 t1 = ptr_type_node;
2379
2380 if (TYPE_MODE (t0) != DImode)
2381 warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit",
2382 (int)tree_to_shwi (DECL_SIZE ((*params)[0])));
2383
2384 if (TYPE_MODE (t1) != DImode)
2385 warning_at (loc, 1, "expected 64-bit address but argument 2 is %d-bit",
2386 (int)tree_to_shwi (DECL_SIZE ((*params)[1])));
2387
2388 retype = build_function_type_list (ptrdiff_type_node, t0, t1, NULL);
2389 }
2390 else
2391 {
2392 tree t0 = TREE_TYPE ((*params)[0]);
2393
2394 if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE)
2395 {
2396 TREE_TYPE (fndecl) = inittype;
2397 return NULL_TREE;
2398 }
2399
2400 if (TYPE_MODE (t0) != DImode)
2401 warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit",
2402 (int)tree_to_shwi (DECL_SIZE ((*params)[0])));
2403
2404 switch (fcode)
2405 {
2406 case AARCH64_MEMTAG_BUILTIN_IRG:
2407 retype = build_function_type_list (t0, t0, uint64_type_node, NULL);
2408 break;
2409 case AARCH64_MEMTAG_BUILTIN_GMI:
2410 retype = build_function_type_list (uint64_type_node, t0,
2411 uint64_type_node, NULL);
2412 break;
2413 case AARCH64_MEMTAG_BUILTIN_INC_TAG:
2414 retype = build_function_type_list (t0, t0, unsigned_type_node, NULL);
2415 break;
2416 case AARCH64_MEMTAG_BUILTIN_SET_TAG:
2417 retype = build_function_type_list (void_type_node, t0, NULL);
2418 break;
2419 case AARCH64_MEMTAG_BUILTIN_GET_TAG:
2420 retype = build_function_type_list (t0, t0, NULL);
2421 break;
2422 default:
2423 return NULL_TREE;
2424 }
2425 }
2426
2427 if (!retype || retype == error_mark_node)
2428 TREE_TYPE (fndecl) = inittype;
2429 else
2430 TREE_TYPE (fndecl) = retype;
2431
2432 return NULL_TREE;
2433 }
2434
2435 /* Called at aarch64_resolve_overloaded_builtin in aarch64-c.c. */
2436 tree
2437 aarch64_resolve_overloaded_builtin_general (location_t loc, tree function,
2438 void *pass_params)
2439 {
2440 unsigned int fcode = AARCH64_BUILTIN_SUBCODE (function);
2441
2442 if (fcode >= AARCH64_MEMTAG_BUILTIN_START
2443 && fcode <= AARCH64_MEMTAG_BUILTIN_END)
2444 return aarch64_resolve_overloaded_memtag(loc, function, pass_params);
2445
2446 return NULL_TREE;
2447 }
1804 2448
1805 #undef AARCH64_CHECK_BUILTIN_MODE 2449 #undef AARCH64_CHECK_BUILTIN_MODE
1806 #undef AARCH64_FIND_FRINT_VARIANT 2450 #undef AARCH64_FIND_FRINT_VARIANT
1807 #undef CF0 2451 #undef CF0
1808 #undef CF1 2452 #undef CF1