Mercurial > hg > CbC > CbC_gcc
diff gcc/config/aarch64/aarch64.md @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | 84e7813d76e9 |
children |
line wrap: on
line diff
--- a/gcc/config/aarch64/aarch64.md Thu Oct 25 07:37:49 2018 +0900 +++ b/gcc/config/aarch64/aarch64.md Thu Feb 13 11:34:05 2020 +0900 @@ -1,5 +1,5 @@ ;; Machine description for AArch64 architecture. -;; Copyright (C) 2009-2018 Free Software Foundation, Inc. +;; Copyright (C) 2009-2020 Free Software Foundation, Inc. ;; Contributed by ARM Ltd. ;; ;; This file is part of GCC. @@ -38,9 +38,7 @@ (R14_REGNUM 14) (R15_REGNUM 15) (R16_REGNUM 16) - (IP0_REGNUM 16) (R17_REGNUM 17) - (IP1_REGNUM 17) (R18_REGNUM 18) (R19_REGNUM 19) (R20_REGNUM 20) @@ -54,7 +52,6 @@ (R28_REGNUM 28) (R29_REGNUM 29) (R30_REGNUM 30) - (LR_REGNUM 30) (SP_REGNUM 31) (V0_REGNUM 32) (V1_REGNUM 33) @@ -88,7 +85,6 @@ (V29_REGNUM 61) (V30_REGNUM 62) (V31_REGNUM 63) - (LAST_SAVED_REGNUM 63) (SFP_REGNUM 64) (AP_REGNUM 65) (CC_REGNUM 66) @@ -110,16 +106,35 @@ (P13_REGNUM 81) (P14_REGNUM 82) (P15_REGNUM 83) + (LAST_SAVED_REGNUM 83) + (FFR_REGNUM 84) + ;; "FFR token": a fake register used for representing the scheduling + ;; restrictions on FFR-related operations. + (FFRT_REGNUM 85) + ;; Scratch register used by stack clash protection to calculate + ;; SVE CFA offsets during probing. + (STACK_CLASH_SVE_CFA_REGNUM 11) + ;; Scratch registers for prologue/epilogue use. + (EP0_REGNUM 12) + (EP1_REGNUM 13) ;; A couple of call-clobbered registers that we need to reserve when ;; tracking speculation this is not ABI, so is subject to change. + (SPECULATION_SCRATCH_REGNUM 14) (SPECULATION_TRACKER_REGNUM 15) - (SPECULATION_SCRATCH_REGNUM 14) + ;; Scratch registers used in frame layout. + (IP0_REGNUM 16) + (IP1_REGNUM 17) + (FP_REGNUM 29) + (LR_REGNUM 30) ] ) (define_c_enum "unspec" [ - UNSPEC_AUTI1716 - UNSPEC_AUTISP + UNSPEC_AUTIA1716 + UNSPEC_AUTIB1716 + UNSPEC_AUTIASP + UNSPEC_AUTIBSP + UNSPEC_CALLEE_ABI UNSPEC_CASESI UNSPEC_CRC32B UNSPEC_CRC32CB @@ -131,6 +146,11 @@ UNSPEC_CRC32X UNSPEC_FCVTZS UNSPEC_FCVTZU + UNSPEC_FJCVTZS + UNSPEC_FRINT32Z + UNSPEC_FRINT32X + UNSPEC_FRINT64Z + UNSPEC_FRINT64X UNSPEC_URECPE UNSPEC_FRECPE UNSPEC_FRECPS @@ -162,8 +182,10 @@ UNSPEC_LD4_LANE UNSPEC_MB UNSPEC_NOP - UNSPEC_PACI1716 - UNSPEC_PACISP + UNSPEC_PACIA1716 + UNSPEC_PACIB1716 + UNSPEC_PACIASP + UNSPEC_PACIBSP UNSPEC_PRLG_STK UNSPEC_REV UNSPEC_RBIT @@ -194,6 +216,7 @@ UNSPEC_UCVTF UNSPEC_USHL_2S UNSPEC_VSTRUCTDUMMY + UNSPEC_SSP_SYSREG UNSPEC_SP_SET UNSPEC_SP_TEST UNSPEC_RSQRT @@ -203,25 +226,61 @@ UNSPEC_XPACLRI UNSPEC_LD1_SVE UNSPEC_ST1_SVE + UNSPEC_LDNT1_SVE + UNSPEC_STNT1_SVE UNSPEC_LD1RQ UNSPEC_LD1_GATHER + UNSPEC_LDFF1_GATHER + UNSPEC_LDNT1_GATHER UNSPEC_ST1_SCATTER - UNSPEC_MERGE_PTRUE - UNSPEC_PTEST_PTRUE + UNSPEC_STNT1_SCATTER + UNSPEC_PRED_X + UNSPEC_PRED_Z + UNSPEC_PTEST + UNSPEC_PTRUE UNSPEC_UNPACKSHI UNSPEC_UNPACKUHI UNSPEC_UNPACKSLO UNSPEC_UNPACKULO UNSPEC_PACK - UNSPEC_FLOAT_CONVERT - UNSPEC_WHILE_LO + UNSPEC_WHILEGE + UNSPEC_WHILEGT + UNSPEC_WHILEHI + UNSPEC_WHILEHS + UNSPEC_WHILELE + UNSPEC_WHILELO + UNSPEC_WHILELS + UNSPEC_WHILELT + UNSPEC_WHILERW + UNSPEC_WHILEWR UNSPEC_LDN UNSPEC_STN UNSPEC_INSR + UNSPEC_CLASTA UNSPEC_CLASTB UNSPEC_FADDA UNSPEC_REV_SUBREG + UNSPEC_REINTERPRET UNSPEC_SPECULATION_TRACKER + UNSPEC_SPECULATION_TRACKER_REV + UNSPEC_COPYSIGN + UNSPEC_TTEST ; Represent transaction test. + UNSPEC_UPDATE_FFR + UNSPEC_UPDATE_FFRT + UNSPEC_RDFFR + UNSPEC_WRFFR + ;; Represents an SVE-style lane index, in which the indexing applies + ;; within the containing 128-bit block. + UNSPEC_SVE_LANE_SELECT + UNSPEC_SVE_CNT_PAT + UNSPEC_SVE_PREFETCH + UNSPEC_SVE_PREFETCH_GATHER + UNSPEC_SVE_COMPACT + UNSPEC_SVE_SPLICE + UNSPEC_GEN_TAG ; Generate a 4-bit MTE tag. + UNSPEC_GEN_TAG_RND ; Generate a random 4-bit MTE tag. + UNSPEC_TAG_SPACE ; Translate address to MTE tag address space. + UNSPEC_LD1RO ]) (define_c_enum "unspecv" [ @@ -233,9 +292,39 @@ UNSPECV_BLOCKAGE ; Represent a blockage UNSPECV_PROBE_STACK_RANGE ; Represent stack range probing. UNSPECV_SPECULATION_BARRIER ; Represent speculation barrier. + UNSPECV_BTI_NOARG ; Represent BTI. + UNSPECV_BTI_C ; Represent BTI c. + UNSPECV_BTI_J ; Represent BTI j. + UNSPECV_BTI_JC ; Represent BTI jc. + UNSPECV_TSTART ; Represent transaction start. + UNSPECV_TCOMMIT ; Represent transaction commit. + UNSPECV_TCANCEL ; Represent transaction cancel. + UNSPEC_RNDR ; Represent RNDR + UNSPEC_RNDRRS ; Represent RNDRRS ] ) +;; These constants are used as a const_int in various SVE unspecs +;; to indicate whether the governing predicate is known to be a PTRUE. +(define_constants + [; Indicates that the predicate might not be a PTRUE. + (SVE_MAYBE_NOT_PTRUE 0) + + ; Indicates that the predicate is known to be a PTRUE. + (SVE_KNOWN_PTRUE 1)]) + +;; These constants are used as a const_int in predicated SVE FP arithmetic +;; to indicate whether the operation is allowed to make additional lanes +;; active without worrying about the effect on faulting behavior. +(define_constants + [; Indicates either that all lanes are active or that the instruction may + ; operate on inactive inputs even if doing so could induce a fault. + (SVE_RELAXED_GP 0) + + ; Indicates that some lanes might be inactive and that the instruction + ; must not operate on inactive inputs if doing so could induce a fault. + (SVE_STRICT_GP 1)]) + ;; If further include files are added the defintion of MD_INCLUDES ;; must be updated. @@ -344,9 +433,11 @@ (include "../arm/cortex-a57.md") (include "../arm/exynos-m1.md") (include "falkor.md") +(include "saphira.md") (include "thunderx.md") (include "../arm/xgene1.md") (include "thunderx2t99.md") +(include "tsv110.md") ;; ------------------------------------------------------------------- ;; Jumps and other miscellaneous insns @@ -368,8 +459,8 @@ (define_expand "cbranch<mode>4" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" - [(match_operand:GPI 1 "register_operand" "") - (match_operand:GPI 2 "aarch64_plus_operand" "")]) + [(match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "aarch64_plus_operand")]) (label_ref (match_operand 3 "" "")) (pc)))] "" @@ -382,8 +473,8 @@ (define_expand "cbranch<mode>4" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" - [(match_operand:GPF 1 "register_operand" "") - (match_operand:GPF 2 "aarch64_fp_compare_operand" "")]) + [(match_operand:GPF 1 "register_operand") + (match_operand:GPF 2 "aarch64_fp_compare_operand")]) (label_ref (match_operand 3 "" "")) (pc)))] "" @@ -397,23 +488,25 @@ (define_expand "cbranchcc4" [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" - [(match_operand 1 "cc_register" "") + [(match_operand 1 "cc_register") (match_operand 2 "const0_operand")]) (label_ref (match_operand 3 "" "")) (pc)))] "" "") -(define_insn "ccmp<mode>" - [(set (match_operand:CC 1 "cc_register" "") - (if_then_else:CC +(define_insn "@ccmp<CC_ONLY:mode><GPI:mode>" + [(set (match_operand:CC_ONLY 1 "cc_register" "") + (if_then_else:CC_ONLY (match_operator 4 "aarch64_comparison_operator" [(match_operand 0 "cc_register" "") (const_int 0)]) - (compare:CC + (compare:CC_ONLY (match_operand:GPI 2 "register_operand" "r,r,r") (match_operand:GPI 3 "aarch64_ccmp_operand" "r,Uss,Usn")) - (unspec:CC [(match_operand 5 "immediate_operand")] UNSPEC_NZCV)))] + (unspec:CC_ONLY + [(match_operand 5 "immediate_operand")] + UNSPEC_NZCV)))] "" "@ ccmp\\t%<w>2, %<w>3, %k5, %m4 @@ -422,33 +515,57 @@ [(set_attr "type" "alus_sreg,alus_imm,alus_imm")] ) -(define_insn "fccmp<mode>" - [(set (match_operand:CCFP 1 "cc_register" "") - (if_then_else:CCFP +(define_insn "@ccmp<CCFP_CCFPE:mode><GPF:mode>" + [(set (match_operand:CCFP_CCFPE 1 "cc_register" "") + (if_then_else:CCFP_CCFPE + (match_operator 4 "aarch64_comparison_operator" + [(match_operand 0 "cc_register" "") + (const_int 0)]) + (compare:CCFP_CCFPE + (match_operand:GPF 2 "register_operand" "w") + (match_operand:GPF 3 "register_operand" "w")) + (unspec:CCFP_CCFPE + [(match_operand 5 "immediate_operand")] + UNSPEC_NZCV)))] + "TARGET_FLOAT" + "fccmp<e>\\t%<s>2, %<s>3, %k5, %m4" + [(set_attr "type" "fccmp<s>")] +) + +(define_insn "@ccmp<CC_ONLY:mode><GPI:mode>_rev" + [(set (match_operand:CC_ONLY 1 "cc_register" "") + (if_then_else:CC_ONLY (match_operator 4 "aarch64_comparison_operator" [(match_operand 0 "cc_register" "") (const_int 0)]) - (compare:CCFP - (match_operand:GPF 2 "register_operand" "w") - (match_operand:GPF 3 "register_operand" "w")) - (unspec:CCFP [(match_operand 5 "immediate_operand")] UNSPEC_NZCV)))] - "TARGET_FLOAT" - "fccmp\\t%<s>2, %<s>3, %k5, %m4" - [(set_attr "type" "fccmp<s>")] -) - -(define_insn "fccmpe<mode>" - [(set (match_operand:CCFPE 1 "cc_register" "") - (if_then_else:CCFPE + (unspec:CC_ONLY + [(match_operand 5 "immediate_operand")] + UNSPEC_NZCV) + (compare:CC_ONLY + (match_operand:GPI 2 "register_operand" "r,r,r") + (match_operand:GPI 3 "aarch64_ccmp_operand" "r,Uss,Usn"))))] + "" + "@ + ccmp\\t%<w>2, %<w>3, %k5, %M4 + ccmp\\t%<w>2, %3, %k5, %M4 + ccmn\\t%<w>2, #%n3, %k5, %M4" + [(set_attr "type" "alus_sreg,alus_imm,alus_imm")] +) + +(define_insn "@ccmp<CCFP_CCFPE:mode><GPF:mode>_rev" + [(set (match_operand:CCFP_CCFPE 1 "cc_register" "") + (if_then_else:CCFP_CCFPE (match_operator 4 "aarch64_comparison_operator" [(match_operand 0 "cc_register" "") - (const_int 0)]) - (compare:CCFPE + (const_int 0)]) + (unspec:CCFP_CCFPE + [(match_operand 5 "immediate_operand")] + UNSPEC_NZCV) + (compare:CCFP_CCFPE (match_operand:GPF 2 "register_operand" "w") - (match_operand:GPF 3 "register_operand" "w")) - (unspec:CCFPE [(match_operand 5 "immediate_operand")] UNSPEC_NZCV)))] + (match_operand:GPF 3 "register_operand" "w"))))] "TARGET_FLOAT" - "fccmpe\\t%<s>2, %<s>3, %k5, %m4" + "fccmp<e>\\t%<s>2, %<s>3, %k5, %M4" [(set_attr "type" "fccmp<s>")] ) @@ -460,9 +577,9 @@ ;; csneg x0, x0, x1, mi (define_expand "mod<mode>3" - [(match_operand:GPI 0 "register_operand" "") - (match_operand:GPI 1 "register_operand" "") - (match_operand:GPI 2 "const_int_operand" "")] + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "const_int_operand")] "" { HOST_WIDE_INT val = INTVAL (operands[2]); @@ -515,10 +632,14 @@ (pc)))] "" { + /* GCC's traditional style has been to use "beq" instead of "b.eq", etc., + but the "." is required for SVE conditions. */ + bool use_dot_p = GET_MODE (operands[1]) == CC_NZCmode; if (get_attr_length (insn) == 8) - return aarch64_gen_far_branch (operands, 2, "Lbcond", "b%M0\\t"); + return aarch64_gen_far_branch (operands, 2, "Lbcond", + use_dot_p ? "b.%M0\\t" : "b%M0\\t"); else - return "b%m0\\t%l2"; + return use_dot_p ? "b.%m0\\t%l2" : "b%m0\\t%l2"; } [(set_attr "type" "branch") (set (attr "length") @@ -543,14 +664,14 @@ ;; sub x0, x1, #(CST & 0xfff000) ;; subs x0, x0, #(CST & 0x000fff) ;; b<ne,eq> .Label -(define_insn_and_split "*compare_condjump<mode>" +(define_insn_and_split "*compare_condjump<GPI:mode>" [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") (match_operand:GPI 1 "aarch64_imm24" "n")) (label_ref:P (match_operand 2 "" "")) (pc)))] - "!aarch64_move_imm (INTVAL (operands[1]), <MODE>mode) - && !aarch64_plus_operand (operands[1], <MODE>mode) + "!aarch64_move_imm (INTVAL (operands[1]), <GPI:MODE>mode) + && !aarch64_plus_operand (operands[1], <GPI:MODE>mode) && !reload_completed" "#" "&& true" @@ -558,20 +679,21 @@ { HOST_WIDE_INT lo_imm = UINTVAL (operands[1]) & 0xfff; HOST_WIDE_INT hi_imm = UINTVAL (operands[1]) & 0xfff000; - rtx tmp = gen_reg_rtx (<MODE>mode); - emit_insn (gen_add<mode>3 (tmp, operands[0], GEN_INT (-hi_imm))); - emit_insn (gen_add<mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm))); + rtx tmp = gen_reg_rtx (<GPI:MODE>mode); + emit_insn (gen_add<GPI:mode>3 (tmp, operands[0], GEN_INT (-hi_imm))); + emit_insn (gen_add<GPI:mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm))); rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); - rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <MODE>mode, cc_reg, const0_rtx); + rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <GPI:MODE>mode, + cc_reg, const0_rtx); emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[2])); DONE; } ) (define_expand "casesi" - [(match_operand:SI 0 "register_operand" "") ; Index - (match_operand:SI 1 "const_int_operand" "") ; Lower bound - (match_operand:SI 2 "const_int_operand" "") ; Total range + [(match_operand:SI 0 "register_operand") ; Index + (match_operand:SI 1 "const_int_operand") ; Lower bound + (match_operand:SI 2 "const_int_operand") ; Total range (match_operand:DI 3 "" "") ; Table label (match_operand:DI 4 "" "")] ; Out of range label "" @@ -608,13 +730,27 @@ operands[0], operands[2], operands[4])); operands[2] = force_reg (DImode, gen_rtx_LABEL_REF (DImode, operands[3])); - emit_jump_insn (gen_casesi_dispatch (operands[2], operands[0], - operands[3])); + operands[2] + = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, operands[2], operands[0]), + UNSPEC_CASESI); + operands[2] = gen_rtx_MEM (DImode, operands[2]); + MEM_READONLY_P (operands[2]) = 1; + MEM_NOTRAP_P (operands[2]) = 1; + emit_jump_insn (gen_casesi_dispatch (operands[2], operands[3])); DONE; } ) -(define_insn "casesi_dispatch" +(define_expand "casesi_dispatch" + [(parallel + [(set (pc) (match_operand:DI 0 "")) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:DI 2)) + (clobber (match_scratch:DI 3)) + (use (label_ref:DI (match_operand 1 "")))])] + "") + +(define_insn "*casesi_dispatch" [(parallel [(set (pc) (mem:DI (unspec [(match_operand:DI 0 "register_operand" "r") @@ -623,7 +759,7 @@ (clobber (reg:CC CC_REGNUM)) (clobber (match_scratch:DI 3 "=r")) (clobber (match_scratch:DI 4 "=r")) - (use (label_ref (match_operand 2 "" "")))])] + (use (label_ref:DI (match_operand 2 "" "")))])] "" "* return aarch64_output_casesi (operands); @@ -710,8 +846,12 @@ if (aarch64_return_address_signing_enabled () && TARGET_ARMV8_3 && !crtl->calls_eh_return) - return "retaa"; - + { + if (aarch64_ra_sign_key == AARCH64_KEY_B) + return "retab"; + else + return "retaa"; + } return "ret"; } [(set_attr "type" "branch")] @@ -839,14 +979,15 @@ ;; ------------------------------------------------------------------- (define_expand "call" - [(parallel [(call (match_operand 0 "memory_operand" "") - (match_operand 1 "general_operand" "")) - (use (match_operand 2 "" "")) - (clobber (reg:DI LR_REGNUM))])] + [(parallel + [(call (match_operand 0 "memory_operand") + (match_operand 1 "general_operand")) + (unspec:DI [(match_operand 2 "const_int_operand")] UNSPEC_CALLEE_ABI) + (clobber (reg:DI LR_REGNUM))])] "" " { - aarch64_expand_call (NULL_RTX, operands[0], false); + aarch64_expand_call (NULL_RTX, operands[0], operands[2], false); DONE; }" ) @@ -854,6 +995,7 @@ (define_insn "*call_insn" [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "r, Usf")) (match_operand 1 "" "")) + (unspec:DI [(match_operand:DI 2 "const_int_operand")] UNSPEC_CALLEE_ABI) (clobber (reg:DI LR_REGNUM))] "" "@ @@ -863,15 +1005,16 @@ ) (define_expand "call_value" - [(parallel [(set (match_operand 0 "" "") - (call (match_operand 1 "memory_operand" "") - (match_operand 2 "general_operand" ""))) - (use (match_operand 3 "" "")) - (clobber (reg:DI LR_REGNUM))])] + [(parallel + [(set (match_operand 0 "") + (call (match_operand 1 "memory_operand") + (match_operand 2 "general_operand"))) + (unspec:DI [(match_operand 3 "const_int_operand")] UNSPEC_CALLEE_ABI) + (clobber (reg:DI LR_REGNUM))])] "" " { - aarch64_expand_call (operands[0], operands[1], false); + aarch64_expand_call (operands[0], operands[1], operands[3], false); DONE; }" ) @@ -880,6 +1023,7 @@ [(set (match_operand 0 "" "") (call (mem:DI (match_operand:DI 1 "aarch64_call_insn_operand" "r, Usf")) (match_operand 2 "" ""))) + (unspec:DI [(match_operand:DI 3 "const_int_operand")] UNSPEC_CALLEE_ABI) (clobber (reg:DI LR_REGNUM))] "" "@ @@ -889,33 +1033,36 @@ ) (define_expand "sibcall" - [(parallel [(call (match_operand 0 "memory_operand" "") - (match_operand 1 "general_operand" "")) - (return) - (use (match_operand 2 "" ""))])] + [(parallel + [(call (match_operand 0 "memory_operand") + (match_operand 1 "general_operand")) + (unspec:DI [(match_operand 2 "const_int_operand")] UNSPEC_CALLEE_ABI) + (return)])] "" { - aarch64_expand_call (NULL_RTX, operands[0], true); + aarch64_expand_call (NULL_RTX, operands[0], operands[2], true); DONE; } ) (define_expand "sibcall_value" - [(parallel [(set (match_operand 0 "" "") - (call (match_operand 1 "memory_operand" "") - (match_operand 2 "general_operand" ""))) - (return) - (use (match_operand 3 "" ""))])] + [(parallel + [(set (match_operand 0 "") + (call (match_operand 1 "memory_operand") + (match_operand 2 "general_operand"))) + (unspec:DI [(match_operand 3 "const_int_operand")] UNSPEC_CALLEE_ABI) + (return)])] "" { - aarch64_expand_call (operands[0], operands[1], true); + aarch64_expand_call (operands[0], operands[1], operands[3], true); DONE; } ) (define_insn "*sibcall_insn" [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucs, Usf")) - (match_operand 1 "" "")) + (match_operand 1 "")) + (unspec:DI [(match_operand:DI 2 "const_int_operand")] UNSPEC_CALLEE_ABI) (return)] "SIBLING_CALL_P (insn)" "@ @@ -925,10 +1072,11 @@ ) (define_insn "*sibcall_value_insn" - [(set (match_operand 0 "" "") + [(set (match_operand 0 "") (call (mem:DI (match_operand:DI 1 "aarch64_call_insn_operand" "Ucs, Usf")) - (match_operand 2 "" ""))) + (match_operand 2 ""))) + (unspec:DI [(match_operand:DI 3 "const_int_operand")] UNSPEC_CALLEE_ABI) (return)] "SIBLING_CALL_P (insn)" "@ @@ -948,7 +1096,9 @@ { int i; - emit_call_insn (gen_call (operands[0], const0_rtx, NULL)); + /* Untyped calls always use the default ABI. It's only possible to use + ABI variants if we know the type of the target function. */ + emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx)); for (i = 0; i < XVECLEN (operands[2], 0); i++) { @@ -969,8 +1119,8 @@ ;; ------------------------------------------------------------------- (define_expand "mov<mode>" - [(set (match_operand:SHORT 0 "nonimmediate_operand" "") - (match_operand:SHORT 1 "general_operand" ""))] + [(set (match_operand:SHORT 0 "nonimmediate_operand") + (match_operand:SHORT 1 "general_operand"))] "" " if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx) @@ -1026,12 +1176,12 @@ ) (define_expand "mov<mode>" - [(set (match_operand:GPI 0 "nonimmediate_operand" "") - (match_operand:GPI 1 "general_operand" ""))] + [(set (match_operand:GPI 0 "nonimmediate_operand") + (match_operand:GPI 1 "general_operand"))] "" " - if (MEM_P (operands[0]) && CONST_INT_P (operands[1]) - && <MODE>mode == DImode + if (MEM_P (operands[0]) && !MEM_VOLATILE_P (operands[0]) + && CONST_INT_P (operands[1]) && <MODE>mode == DImode && aarch64_split_dimode_const_store (operands[0], operands[1])) DONE; @@ -1132,9 +1282,27 @@ [(set_attr "type" "mov_imm")] ) +;; Match MOVK as a normal AND and IOR operation. +(define_insn "aarch64_movk<mode>" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0") + (match_operand:GPI 2 "const_int_operand")) + (match_operand:GPI 3 "const_int_operand")))] + "aarch64_movk_shift (rtx_mode_t (operands[2], <MODE>mode), + rtx_mode_t (operands[3], <MODE>mode)) >= 0" + { + int shift = aarch64_movk_shift (rtx_mode_t (operands[2], <MODE>mode), + rtx_mode_t (operands[3], <MODE>mode)); + operands[2] = gen_int_mode (UINTVAL (operands[3]) >> shift, SImode); + operands[3] = gen_int_mode (shift, SImode); + return "movk\\t%<w>0, #%X2, lsl %3"; + } + [(set_attr "type" "mov_imm")] +) + (define_expand "movti" - [(set (match_operand:TI 0 "nonimmediate_operand" "") - (match_operand:TI 1 "general_operand" ""))] + [(set (match_operand:TI 0 "nonimmediate_operand") + (match_operand:TI 1 "general_operand"))] "" " if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx) @@ -1188,8 +1356,8 @@ }) (define_expand "mov<mode>" - [(set (match_operand:GPF_TF_F16 0 "nonimmediate_operand" "") - (match_operand:GPF_TF_F16 1 "general_operand" ""))] + [(set (match_operand:GPF_TF_F16_MOV 0 "nonimmediate_operand") + (match_operand:GPF_TF_F16_MOV 1 "general_operand"))] "" { if (!TARGET_FLOAT) @@ -1205,11 +1373,11 @@ } ) -(define_insn "*movhf_aarch64" - [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w , w,?r,w,w ,w ,w,m,r,m ,r") - (match_operand:HF 1 "general_operand" "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))] - "TARGET_FLOAT && (register_operand (operands[0], HFmode) - || aarch64_reg_or_fp_zero (operands[1], HFmode))" +(define_insn "*mov<mode>_aarch64" + [(set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w , w,?r,w,w ,w ,w,m,r,m ,r") + (match_operand:HFBF 1 "general_operand" "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))] + "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode) + || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))" "@ movi\\t%0.4h, #0 fmov\\t%h0, %w1 @@ -1334,17 +1502,17 @@ ;; 0 is dst ;; 1 is src -;; 2 is size of move in bytes +;; 2 is size of copy in bytes ;; 3 is alignment -(define_expand "movmemdi" +(define_expand "cpymemdi" [(match_operand:BLK 0 "memory_operand") (match_operand:BLK 1 "memory_operand") (match_operand:DI 2 "immediate_operand") (match_operand:DI 3 "immediate_operand")] "!STRICT_ALIGNMENT" { - if (aarch64_expand_movmem (operands)) + if (aarch64_expand_cpymem (operands)) DONE; FAIL; } @@ -1385,6 +1553,21 @@ (set_attr "arch" "*,fp")] ) +(define_insn "load_pair_dw_tftf" + [(set (match_operand:TF 0 "register_operand" "=w") + (match_operand:TF 1 "aarch64_mem_pair_operand" "Ump")) + (set (match_operand:TF 2 "register_operand" "=w") + (match_operand:TF 3 "memory_operand" "m"))] + "TARGET_SIMD + && rtx_equal_p (XEXP (operands[3], 0), + plus_constant (Pmode, + XEXP (operands[1], 0), + GET_MODE_SIZE (TFmode)))" + "ldp\\t%q0, %q2, %1" + [(set_attr "type" "neon_ldp_q") + (set_attr "fp" "yes")] +) + ;; Operands 0 and 2 are tied together by the final condition; so we allow ;; fairly lax checking on the second memory operation. (define_insn "store_pair_sw_<SX:mode><SX2:mode>" @@ -1420,6 +1603,21 @@ (set_attr "arch" "*,fp")] ) +(define_insn "store_pair_dw_tftf" + [(set (match_operand:TF 0 "aarch64_mem_pair_operand" "=Ump") + (match_operand:TF 1 "register_operand" "w")) + (set (match_operand:TF 2 "memory_operand" "=m") + (match_operand:TF 3 "register_operand" "w"))] + "TARGET_SIMD && + rtx_equal_p (XEXP (operands[2], 0), + plus_constant (Pmode, + XEXP (operands[0], 0), + GET_MODE_SIZE (TFmode)))" + "stp\\t%q1, %q3, %0" + [(set_attr "type" "neon_stp_q") + (set_attr "fp" "yes")] +) + ;; Load pair with post-index writeback. This is primarily used in function ;; epilogues. (define_insn "loadwb_pair<GPI:mode>_<P:mode>" @@ -1433,8 +1631,8 @@ (mem:GPI (plus:P (match_dup 1) (match_operand:P 5 "const_int_operand" "n"))))])] "INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)" - "ldp\\t%<w>2, %<w>3, [%1], %4" - [(set_attr "type" "load_<ldpstp_sz>")] + "ldp\\t%<GPI:w>2, %<GPI:w>3, [%1], %4" + [(set_attr "type" "load_<GPI:ldpstp_sz>")] ) (define_insn "loadwb_pair<GPF:mode>_<P:mode>" @@ -1448,10 +1646,25 @@ (mem:GPF (plus:P (match_dup 1) (match_operand:P 5 "const_int_operand" "n"))))])] "INTVAL (operands[5]) == GET_MODE_SIZE (<GPF:MODE>mode)" - "ldp\\t%<w>2, %<w>3, [%1], %4" + "ldp\\t%<GPF:w>2, %<GPF:w>3, [%1], %4" [(set_attr "type" "neon_load1_2reg")] ) +(define_insn "loadwb_pair<TX:mode>_<P:mode>" + [(parallel + [(set (match_operand:P 0 "register_operand" "=k") + (plus:P (match_operand:P 1 "register_operand" "0") + (match_operand:P 4 "aarch64_mem_pair_offset" "n"))) + (set (match_operand:TX 2 "register_operand" "=w") + (mem:TX (match_dup 1))) + (set (match_operand:TX 3 "register_operand" "=w") + (mem:TX (plus:P (match_dup 1) + (match_operand:P 5 "const_int_operand" "n"))))])] + "TARGET_SIMD && INTVAL (operands[5]) == GET_MODE_SIZE (<TX:MODE>mode)" + "ldp\\t%q2, %q3, [%1], %4" + [(set_attr "type" "neon_ldp_q")] +) + ;; Store pair with pre-index writeback. This is primarily used in function ;; prologues. (define_insn "storewb_pair<GPI:mode>_<P:mode>" @@ -1466,8 +1679,8 @@ (match_operand:P 5 "const_int_operand" "n"))) (match_operand:GPI 3 "register_operand" "r"))])] "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)" - "stp\\t%<w>2, %<w>3, [%0, %4]!" - [(set_attr "type" "store_<ldpstp_sz>")] + "stp\\t%<GPI:w>2, %<GPI:w>3, [%0, %4]!" + [(set_attr "type" "store_<GPI:ldpstp_sz>")] ) (define_insn "storewb_pair<GPF:mode>_<P:mode>" @@ -1482,10 +1695,28 @@ (match_operand:P 5 "const_int_operand" "n"))) (match_operand:GPF 3 "register_operand" "w"))])] "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPF:MODE>mode)" - "stp\\t%<w>2, %<w>3, [%0, %4]!" + "stp\\t%<GPF:w>2, %<GPF:w>3, [%0, %4]!" [(set_attr "type" "neon_store1_2reg<q>")] ) +(define_insn "storewb_pair<TX:mode>_<P:mode>" + [(parallel + [(set (match_operand:P 0 "register_operand" "=&k") + (plus:P (match_operand:P 1 "register_operand" "0") + (match_operand:P 4 "aarch64_mem_pair_offset" "n"))) + (set (mem:TX (plus:P (match_dup 0) + (match_dup 4))) + (match_operand:TX 2 "register_operand" "w")) + (set (mem:TX (plus:P (match_dup 0) + (match_operand:P 5 "const_int_operand" "n"))) + (match_operand:TX 3 "register_operand" "w"))])] + "TARGET_SIMD + && INTVAL (operands[5]) + == INTVAL (operands[4]) + GET_MODE_SIZE (<TX:MODE>mode)" + "stp\\t%q2, %q3, [%0, %4]!" + [(set_attr "type" "neon_stp_q")] +) + ;; ------------------------------------------------------------------- ;; Sign/Zero extension ;; ------------------------------------------------------------------- @@ -1530,7 +1761,7 @@ ldr\t%s0, %1 fmov\t%w0, %s1 fmov\t%s0, %s1" - [(set_attr "type" "extend,load_4,f_mcr,f_loads,f_mrc,fmov") + [(set_attr "type" "mov_reg,load_4,f_mcr,f_loads,f_mrc,fmov") (set_attr "arch" "*,*,fp,fp,fp,fp")] ) @@ -1610,9 +1841,9 @@ (define_expand "add<mode>3" [(set - (match_operand:GPI 0 "register_operand" "") - (plus:GPI (match_operand:GPI 1 "register_operand" "") - (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand" "")))] + (match_operand:GPI 0 "register_operand") + (plus:GPI (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand")))] "" { /* If operands[1] is a subreg extract the inner RTX. */ @@ -1621,6 +1852,7 @@ /* If the constant is too large for a single instruction and isn't frame based, split off the immediate so it is available for CSE. */ if (!aarch64_plus_immediate (operands[2], <MODE>mode) + && !(TARGET_SVE && aarch64_sve_plus_immediate (operands[2], <MODE>mode)) && can_create_pseudo_p () && (!REG_P (op1) || !REGNO_PTR_FRAME_P (REGNO (op1)))) @@ -1638,10 +1870,10 @@ (define_insn "*add<mode>3_aarch64" [(set - (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,rk") + (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,r,rk") (plus:GPI - (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,rk") - (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uav")))] + (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,0,rk") + (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uai,Uav")))] "" "@ add\\t%<w>0, %<w>1, %2 @@ -1649,10 +1881,11 @@ add\\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas> sub\\t%<w>0, %<w>1, #%n2 # - * return aarch64_output_sve_addvl_addpl (operands[0], operands[1], operands[2]);" - ;; The "alu_imm" type for ADDVL/ADDPL is just a placeholder. - [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm") - (set_attr "arch" "*,*,simd,*,*,*")] + * return aarch64_output_sve_scalar_inc_dec (operands[2]); + * return aarch64_output_sve_addvl_addpl (operands[2]);" + ;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders. + [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm,alu_imm") + (set_attr "arch" "*,*,simd,*,*,sve,sve")] ) ;; zero_extend version of above @@ -1731,17 +1964,18 @@ ;; this pattern. (define_insn_and_split "*add<mode>3_poly_1" [(set - (match_operand:GPI 0 "register_operand" "=r,r,r,r,r,&r") + (match_operand:GPI 0 "register_operand" "=r,r,r,r,r,r,&r") (plus:GPI - (match_operand:GPI 1 "register_operand" "%rk,rk,rk,rk,rk,rk") - (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand" "I,r,J,Uaa,Uav,Uat")))] + (match_operand:GPI 1 "register_operand" "%rk,rk,rk,rk,rk,0,rk") + (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand" "I,r,J,Uaa,Uav,Uai,Uat")))] "TARGET_SVE && operands[0] != stack_pointer_rtx" "@ add\\t%<w>0, %<w>1, %2 add\\t%<w>0, %<w>1, %<w>2 sub\\t%<w>0, %<w>1, #%n2 # - * return aarch64_output_sve_addvl_addpl (operands[0], operands[1], operands[2]); + * return aarch64_output_sve_scalar_inc_dec (operands[2]); + * return aarch64_output_sve_addvl_addpl (operands[2]); #" "&& epilogue_completed && !reg_overlap_mentioned_p (operands[0], operands[1]) @@ -1752,8 +1986,8 @@ operands[2], operands[0], NULL_RTX); DONE; } - ;; The "alu_imm" type for ADDVL/ADDPL is just a placeholder. - [(set_attr "type" "alu_imm,alu_sreg,alu_imm,multiple,alu_imm,multiple")] + ;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders. + [(set_attr "type" "alu_imm,alu_sreg,alu_imm,multiple,alu_imm,alu_imm,multiple")] ) (define_split @@ -1777,11 +2011,15 @@ (define_expand "addv<mode>4" [(match_operand:GPI 0 "register_operand") (match_operand:GPI 1 "register_operand") - (match_operand:GPI 2 "register_operand") + (match_operand:GPI 2 "aarch64_plus_operand") (label_ref (match_operand 3 "" ""))] "" { - emit_insn (gen_add<mode>3_compareV (operands[0], operands[1], operands[2])); + if (CONST_INT_P (operands[2])) + emit_insn (gen_add<mode>3_compareV_imm (operands[0], operands[1], + operands[2])); + else + emit_insn (gen_add<mode>3_compareV (operands[0], operands[1], operands[2])); aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); DONE; @@ -1795,15 +2033,15 @@ "" { emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2])); - aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); + aarch64_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]); DONE; }) (define_expand "addti3" - [(set (match_operand:TI 0 "register_operand" "") - (plus:TI (match_operand:TI 1 "register_operand" "") - (match_operand:TI 2 "aarch64_reg_or_imm" "")))] + [(set (match_operand:TI 0 "register_operand") + (plus:TI (match_operand:TI 1 "register_operand") + (match_operand:TI 2 "aarch64_reg_or_imm")))] "" { rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high; @@ -1834,9 +2072,9 @@ }) (define_expand "addvti4" - [(match_operand:TI 0 "register_operand" "") - (match_operand:TI 1 "register_operand" "") - (match_operand:TI 2 "aarch64_reg_or_imm" "") + [(match_operand:TI 0 "register_operand") + (match_operand:TI 1 "register_operand") + (match_operand:TI 2 "aarch64_reg_or_imm") (label_ref (match_operand 3 "" ""))] "" { @@ -1868,9 +2106,9 @@ }) (define_expand "uaddvti4" - [(match_operand:TI 0 "register_operand" "") - (match_operand:TI 1 "register_operand" "") - (match_operand:TI 2 "aarch64_reg_or_imm" "") + [(match_operand:TI 0 "register_operand") + (match_operand:TI 1 "register_operand") + (match_operand:TI 2 "aarch64_reg_or_imm") (label_ref (match_operand 3 "" ""))] "" { @@ -1897,14 +2135,14 @@ emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest); emit_move_insn (gen_highpart (DImode, operands[0]), high_dest); - aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); + aarch64_gen_unlikely_cbranch (GEU, CC_ADCmode, operands[3]); DONE; }) (define_insn "add<mode>3_compare0" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ - (plus:GPI (match_operand:GPI 1 "register_operand" "%r,r,r") + (plus:GPI (match_operand:GPI 1 "register_operand" "%rk,rk,rk") (match_operand:GPI 2 "aarch64_plus_operand" "r,I,J")) (const_int 0))) (set (match_operand:GPI 0 "register_operand" "=r,r,r") @@ -1921,7 +2159,7 @@ (define_insn "*addsi3_compare0_uxtw" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ - (plus:SI (match_operand:SI 1 "register_operand" "%r,r,r") + (plus:SI (match_operand:SI 1 "register_operand" "%rk,rk,rk") (match_operand:SI 2 "aarch64_plus_operand" "r,I,J")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r,r,r") @@ -1934,69 +2172,36 @@ [(set_attr "type" "alus_sreg,alus_imm,alus_imm")] ) -(define_insn "*add<mode>3_compareC_cconly_imm" - [(set (reg:CC_C CC_REGNUM) - (ne:CC_C - (plus:<DWI> - (zero_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r")) - (match_operand:<DWI> 2 "const_scalar_int_operand" "")) - (zero_extend:<DWI> - (plus:GPI - (match_dup 0) - (match_operand:GPI 1 "aarch64_plus_immediate" "I,J")))))] - "aarch64_zero_extend_const_eq (<DWI>mode, operands[2], - <MODE>mode, operands[1])" - "@ - cmn\\t%<w>0, %1 - cmp\\t%<w>0, #%n1" - [(set_attr "type" "alus_imm")] -) - (define_insn "*add<mode>3_compareC_cconly" [(set (reg:CC_C CC_REGNUM) - (ne:CC_C - (plus:<DWI> - (zero_extend:<DWI> (match_operand:GPI 0 "register_operand" "r")) - (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) - (zero_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] - "" - "cmn\\t%<w>0, %<w>1" - [(set_attr "type" "alus_sreg")] -) - -(define_insn "*add<mode>3_compareC_imm" - [(set (reg:CC_C CC_REGNUM) - (ne:CC_C - (plus:<DWI> - (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r,r")) - (match_operand:<DWI> 3 "const_scalar_int_operand" "")) - (zero_extend:<DWI> - (plus:GPI - (match_dup 1) - (match_operand:GPI 2 "aarch64_plus_immediate" "I,J"))))) - (set (match_operand:GPI 0 "register_operand" "=r,r") - (plus:GPI (match_dup 1) (match_dup 2)))] - "aarch64_zero_extend_const_eq (<DWI>mode, operands[3], - <MODE>mode, operands[2])" + (compare:CC_C + (plus:GPI + (match_operand:GPI 0 "register_operand" "r,r,r") + (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J")) + (match_dup 0)))] + "" "@ - adds\\t%<w>0, %<w>1, %2 - subs\\t%<w>0, %<w>1, #%n2" - [(set_attr "type" "alus_imm")] + cmn\\t%<w>0, %<w>1 + cmn\\t%<w>0, %1 + cmp\\t%<w>0, #%n1" + [(set_attr "type" "alus_sreg,alus_imm,alus_imm")] ) (define_insn "add<mode>3_compareC" [(set (reg:CC_C CC_REGNUM) (compare:CC_C - (plus:<DWI> - (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) - (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) - (zero_extend:<DWI> - (plus:GPI (match_dup 1) (match_dup 2))))) - (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI + (match_operand:GPI 1 "register_operand" "rk,rk,rk") + (match_operand:GPI 2 "aarch64_plus_operand" "r,I,J")) + (match_dup 1))) + (set (match_operand:GPI 0 "register_operand" "=r,r,r") (plus:GPI (match_dup 1) (match_dup 2)))] "" - "adds\\t%<w>0, %<w>1, %<w>2" - [(set_attr "type" "alus_sreg")] + "@ + adds\\t%<w>0, %<w>1, %<w>2 + adds\\t%<w>0, %<w>1, %2 + subs\\t%<w>0, %<w>1, #%n2" + [(set_attr "type" "alus_sreg,alus_imm,alus_imm")] ) (define_insn "*add<mode>3_compareV_cconly_imm" @@ -2028,12 +2233,12 @@ [(set_attr "type" "alus_sreg")] ) -(define_insn "*add<mode>3_compareV_imm" +(define_insn "add<mode>3_compareV_imm" [(set (reg:CC_V CC_REGNUM) (compare:CC_V (plus:<DWI> (sign_extend:<DWI> - (match_operand:GPI 1 "register_operand" "r,r")) + (match_operand:GPI 1 "register_operand" "rk,rk")) (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")) (sign_extend:<DWI> (plus:GPI (match_dup 1) (match_dup 2))))) @@ -2050,7 +2255,7 @@ [(set (reg:CC_V CC_REGNUM) (compare:CC_V (plus:<DWI> - (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "rk")) (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) (sign_extend:<DWI> (plus:GPI (match_dup 1) (match_dup 2))))) (set (match_operand:GPI 0 "register_operand" "=r") @@ -2129,7 +2334,7 @@ (compare:CC_NZ (plus:GPI (ANY_EXTEND:GPI (match_operand:ALLX 1 "register_operand" "r")) - (match_operand:GPI 2 "register_operand" "r")) + (match_operand:GPI 2 "register_operand" "rk")) (const_int 0))) (set (match_operand:GPI 0 "register_operand" "=r") (plus:GPI (ANY_EXTEND:GPI (match_dup 1)) (match_dup 2)))] @@ -2141,7 +2346,7 @@ (define_insn "*subs_<optab><ALLX:mode>_<GPI:mode>" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ - (minus:GPI (match_operand:GPI 1 "register_operand" "r") + (minus:GPI (match_operand:GPI 1 "register_operand" "rk") (ANY_EXTEND:GPI (match_operand:ALLX 2 "register_operand" "r"))) (const_int 0))) @@ -2159,7 +2364,7 @@ (ANY_EXTEND:GPI (match_operand:ALLX 1 "register_operand" "r")) (match_operand 2 "aarch64_imm3" "Ui3")) - (match_operand:GPI 3 "register_operand" "r")) + (match_operand:GPI 3 "register_operand" "rk")) (const_int 0))) (set (match_operand:GPI 0 "register_operand" "=rk") (plus:GPI (ashift:GPI (ANY_EXTEND:GPI (match_dup 1)) @@ -2173,7 +2378,7 @@ (define_insn "*subs_<optab><ALLX:mode>_shift_<GPI:mode>" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ - (minus:GPI (match_operand:GPI 1 "register_operand" "r") + (minus:GPI (match_operand:GPI 1 "register_operand" "rk") (ashift:GPI (ANY_EXTEND:GPI (match_operand:ALLX 2 "register_operand" "r")) @@ -2196,7 +2401,7 @@ (match_operand 2 "aarch64_pwr_imm3" "Up3")) (match_operand 3 "const_int_operand" "n") (const_int 0)) - (match_operand:GPI 4 "register_operand" "r")) + (match_operand:GPI 4 "register_operand" "rk")) (const_int 0))) (set (match_operand:GPI 0 "register_operand" "=r") (plus:GPI (ANY_EXTRACT:GPI (mult:GPI (match_dup 1) (match_dup 2)) @@ -2211,7 +2416,7 @@ (define_insn "*subs_<optab><mode>_multp2" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ - (minus:GPI (match_operand:GPI 4 "register_operand" "r") + (minus:GPI (match_operand:GPI 4 "register_operand" "rk") (ANY_EXTRACT:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r") (match_operand 2 "aarch64_pwr_imm3" "Up3")) @@ -2394,7 +2599,7 @@ [(set (match_operand:GPI 0 "register_operand") (plus:GPI (plus:GPI - (ne:GPI (reg:CC_C CC_REGNUM) (const_int 0)) + (ltu:GPI (reg:CC_C CC_REGNUM) (const_int 0)) (match_operand:GPI 1 "aarch64_reg_or_zero")) (match_operand:GPI 2 "aarch64_reg_or_zero")))] "" @@ -2433,65 +2638,62 @@ (define_expand "add<mode>3_carryinC" [(parallel [(set (match_dup 3) - (compare:CC_C + (compare:CC_ADC (plus:<DWI> (plus:<DWI> (match_dup 4) (zero_extend:<DWI> - (match_operand:GPI 1 "register_operand" ""))) + (match_operand:GPI 1 "register_operand"))) (zero_extend:<DWI> - (match_operand:GPI 2 "register_operand" ""))) - (zero_extend:<DWI> - (plus:GPI - (plus:GPI (match_dup 5) (match_dup 1)) - (match_dup 2))))) + (match_operand:GPI 2 "register_operand"))) + (match_dup 6))) (set (match_operand:GPI 0 "register_operand") (plus:GPI (plus:GPI (match_dup 5) (match_dup 1)) (match_dup 2)))])] "" { - operands[3] = gen_rtx_REG (CC_Cmode, CC_REGNUM); - operands[4] = gen_rtx_NE (<DWI>mode, operands[3], const0_rtx); - operands[5] = gen_rtx_NE (<MODE>mode, operands[3], const0_rtx); + operands[3] = gen_rtx_REG (CC_ADCmode, CC_REGNUM); + rtx ccin = gen_rtx_REG (CC_Cmode, CC_REGNUM); + operands[4] = gen_rtx_LTU (<DWI>mode, ccin, const0_rtx); + operands[5] = gen_rtx_LTU (<MODE>mode, ccin, const0_rtx); + operands[6] = immed_wide_int_const (wi::shwi (1, <DWI>mode) + << GET_MODE_BITSIZE (<MODE>mode), + TImode); }) (define_insn "*add<mode>3_carryinC_zero" - [(set (reg:CC_C CC_REGNUM) - (compare:CC_C + [(set (reg:CC_ADC CC_REGNUM) + (compare:CC_ADC (plus:<DWI> (match_operand:<DWI> 2 "aarch64_carry_operation" "") (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) - (zero_extend:<DWI> - (plus:GPI - (match_operand:GPI 3 "aarch64_carry_operation" "") - (match_dup 1))))) + (match_operand 4 "const_scalar_int_operand" ""))) (set (match_operand:GPI 0 "register_operand" "=r") - (plus:GPI (match_dup 3) (match_dup 1)))] - "" + (plus:GPI (match_operand:GPI 3 "aarch64_carry_operation" "") + (match_dup 1)))] + "rtx_mode_t (operands[4], <DWI>mode) + == (wi::shwi (1, <DWI>mode) << (unsigned) GET_MODE_BITSIZE (<MODE>mode))" "adcs\\t%<w>0, %<w>1, <w>zr" [(set_attr "type" "adc_reg")] ) (define_insn "*add<mode>3_carryinC" - [(set (reg:CC_C CC_REGNUM) - (compare:CC_C + [(set (reg:CC_ADC CC_REGNUM) + (compare:CC_ADC (plus:<DWI> (plus:<DWI> (match_operand:<DWI> 3 "aarch64_carry_operation" "") (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) - (zero_extend:<DWI> - (plus:GPI - (plus:GPI - (match_operand:GPI 4 "aarch64_carry_operation" "") - (match_dup 1)) - (match_dup 2))))) + (match_operand 5 "const_scalar_int_operand" ""))) (set (match_operand:GPI 0 "register_operand" "=r") (plus:GPI - (plus:GPI (match_dup 4) (match_dup 1)) + (plus:GPI (match_operand:GPI 4 "aarch64_carry_operation" "") + (match_dup 1)) (match_dup 2)))] - "" + "rtx_mode_t (operands[5], <DWI>mode) + == (wi::shwi (1, <DWI>mode) << (unsigned) GET_MODE_BITSIZE (<MODE>mode))" "adcs\\t%<w>0, %<w>1, %<w>2" [(set_attr "type" "adc_reg")] ) @@ -2504,9 +2706,9 @@ (plus:<DWI> (match_dup 3) (sign_extend:<DWI> - (match_operand:GPI 1 "register_operand" ""))) + (match_operand:GPI 1 "register_operand"))) (sign_extend:<DWI> - (match_operand:GPI 2 "register_operand" ""))) + (match_operand:GPI 2 "register_operand"))) (sign_extend:<DWI> (plus:GPI (plus:GPI (match_dup 4) (match_dup 1)) @@ -2518,8 +2720,8 @@ "" { rtx cc = gen_rtx_REG (CC_Cmode, CC_REGNUM); - operands[3] = gen_rtx_NE (<DWI>mode, cc, const0_rtx); - operands[4] = gen_rtx_NE (<MODE>mode, cc, const0_rtx); + operands[3] = gen_rtx_LTU (<DWI>mode, cc, const0_rtx); + operands[4] = gen_rtx_LTU (<MODE>mode, cc, const0_rtx); }) (define_insn "*add<mode>3_carryinV_zero" @@ -2658,19 +2860,109 @@ (set_attr "arch" "*,simd")] ) -(define_expand "subv<mode>4" +(define_expand "subv<GPI:mode>4" [(match_operand:GPI 0 "register_operand") - (match_operand:GPI 1 "aarch64_reg_or_zero") - (match_operand:GPI 2 "aarch64_reg_or_zero") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "aarch64_plus_operand") (label_ref (match_operand 3 "" ""))] "" { - emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2])); + if (CONST_INT_P (operands[2])) + emit_insn (gen_subv<mode>_imm (operands[0], operands[1], operands[2])); + else + emit_insn (gen_subv<mode>_insn (operands[0], operands[1], operands[2])); aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); DONE; }) +(define_insn "subv<GPI:mode>_insn" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (sign_extend:<DWI> + (minus:GPI + (match_operand:GPI 1 "register_operand" "rk") + (match_operand:GPI 2 "register_operand" "r"))) + (minus:<DWI> (sign_extend:<DWI> (match_dup 1)) + (sign_extend:<DWI> (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI (match_dup 1) (match_dup 2)))] + "" + "subs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "alus_sreg")] +) + +(define_insn "subv<GPI:mode>_imm" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (sign_extend:<DWI> + (minus:GPI + (match_operand:GPI 1 "register_operand" "rk,rk") + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J"))) + (minus:<DWI> (sign_extend:<DWI> (match_dup 1)) + (match_dup 2)))) + (set (match_operand:GPI 0 "register_operand" "=r,r") + (minus:GPI (match_dup 1) (match_dup 2)))] + "" + "@ + subs\\t%<w>0, %<w>1, %2 + adds\\t%<w>0, %<w>1, #%n2" + [(set_attr "type" "alus_sreg")] +) + +(define_expand "negv<GPI:mode>3" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (label_ref (match_operand 2 "" ""))] + "" + { + emit_insn (gen_negv<mode>_insn (operands[0], operands[1])); + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[2]); + + DONE; + } +) + +(define_insn "negv<GPI:mode>_insn" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (sign_extend:<DWI> + (neg:GPI (match_operand:GPI 1 "register_operand" "r"))) + (neg:<DWI> (sign_extend:<DWI> (match_dup 1))))) + (set (match_operand:GPI 0 "register_operand" "=r") + (neg:GPI (match_dup 1)))] + "" + "negs\\t%<w>0, %<w>1" + [(set_attr "type" "alus_sreg")] +) + +(define_insn "negv<GPI:mode>_cmp_only" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (sign_extend:<DWI> + (neg:GPI (match_operand:GPI 0 "register_operand" "r"))) + (neg:<DWI> (sign_extend:<DWI> (match_dup 0)))))] + "" + "negs\\t%<w>zr, %<w>0" + [(set_attr "type" "alus_sreg")] +) + +(define_insn "*cmpv<GPI:mode>_insn" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (sign_extend:<DWI> + (minus:GPI (match_operand:GPI 0 "register_operand" "r,r,r") + (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J"))) + (minus:<DWI> (sign_extend:<DWI> (match_dup 0)) + (sign_extend:<DWI> (match_dup 1)))))] + "" + "@ + cmp\\t%<w>0, %<w>1 + cmp\\t%<w>0, %1 + cmp\\t%<w>0, #%n1" + [(set_attr "type" "alus_sreg")] +) + (define_expand "usubv<mode>4" [(match_operand:GPI 0 "register_operand") (match_operand:GPI 1 "aarch64_reg_or_zero") @@ -2685,9 +2977,9 @@ }) (define_expand "subti3" - [(set (match_operand:TI 0 "register_operand" "") - (minus:TI (match_operand:TI 1 "aarch64_reg_or_zero" "") - (match_operand:TI 2 "register_operand" "")))] + [(set (match_operand:TI 0 "register_operand") + (minus:TI (match_operand:TI 1 "aarch64_reg_or_zero") + (match_operand:TI 2 "register_operand")))] "" { rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high; @@ -2706,7 +2998,7 @@ (define_expand "subvti4" [(match_operand:TI 0 "register_operand") - (match_operand:TI 1 "aarch64_reg_or_zero") + (match_operand:TI 1 "register_operand") (match_operand:TI 2 "aarch64_reg_or_imm") (label_ref (match_operand 3 "" ""))] "" @@ -2717,7 +3009,7 @@ &low_dest, &op1_low, &op2_low, &high_dest, &op1_high, &op2_high); aarch64_expand_subvti (operands[0], low_dest, op1_low, op2_low, - high_dest, op1_high, op2_high); + high_dest, op1_high, op2_high, false); aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); DONE; @@ -2725,7 +3017,7 @@ (define_expand "usubvti4" [(match_operand:TI 0 "register_operand") - (match_operand:TI 1 "aarch64_reg_or_zero") + (match_operand:TI 1 "register_operand") (match_operand:TI 2 "aarch64_reg_or_imm") (label_ref (match_operand 3 "" ""))] "" @@ -2736,15 +3028,59 @@ &low_dest, &op1_low, &op2_low, &high_dest, &op1_high, &op2_high); aarch64_expand_subvti (operands[0], low_dest, op1_low, op2_low, - high_dest, op1_high, op2_high); + high_dest, op1_high, op2_high, true); aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]); DONE; }) +(define_expand "negvti3" + [(match_operand:TI 0 "register_operand") + (match_operand:TI 1 "register_operand") + (label_ref (match_operand 2 "" ""))] + "" + { + emit_insn (gen_negdi_carryout (gen_lowpart (DImode, operands[0]), + gen_lowpart (DImode, operands[1]))); + emit_insn (gen_negvdi_carryinV (gen_highpart (DImode, operands[0]), + gen_highpart (DImode, operands[1]))); + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[2]); + + DONE; + } +) + +(define_insn "negdi_carryout" + [(set (reg:CC CC_REGNUM) + (compare:CC + (const_int 0) (match_operand:DI 1 "register_operand" "r"))) + (set (match_operand:DI 0 "register_operand" "=r") + (neg:DI (match_dup 1)))] + "" + "negs\\t%0, %1" + [(set_attr "type" "alus_sreg")] +) + +(define_insn "negvdi_carryinV" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (neg:TI (plus:TI + (ltu:TI (reg:CC CC_REGNUM) (const_int 0)) + (sign_extend:TI (match_operand:DI 1 "register_operand" "r")))) + (sign_extend:TI + (neg:DI (plus:DI (ltu:DI (reg:CC CC_REGNUM) (const_int 0)) + (match_dup 1)))))) + (set (match_operand:DI 0 "register_operand" "=r") + (neg:DI (plus:DI (ltu:DI (reg:CC CC_REGNUM) (const_int 0)) + (match_dup 1))))] + "" + "ngcs\\t%0, %1" + [(set_attr "type" "alus_sreg")] +) + (define_insn "*sub<mode>3_compare0" [(set (reg:CC_NZ CC_REGNUM) - (compare:CC_NZ (minus:GPI (match_operand:GPI 1 "register_operand" "r") + (compare:CC_NZ (minus:GPI (match_operand:GPI 1 "register_operand" "rk") (match_operand:GPI 2 "register_operand" "r")) (const_int 0))) (set (match_operand:GPI 0 "register_operand" "=r") @@ -2757,7 +3093,7 @@ ;; zero_extend version of above (define_insn "*subsi3_compare0_uxtw" [(set (reg:CC_NZ CC_REGNUM) - (compare:CC_NZ (minus:SI (match_operand:SI 1 "register_operand" "r") + (compare:CC_NZ (minus:SI (match_operand:SI 1 "register_operand" "rk") (match_operand:SI 2 "register_operand" "r")) (const_int 0))) (set (match_operand:DI 0 "register_operand" "=r") @@ -2767,10 +3103,10 @@ [(set_attr "type" "alus_sreg")] ) -(define_insn "*sub<mode>3_compare1_imm" +(define_insn "sub<mode>3_compare1_imm" [(set (reg:CC CC_REGNUM) (compare:CC - (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ,rZ") + (match_operand:GPI 1 "aarch64_reg_or_zero" "rkZ,rkZ") (match_operand:GPI 2 "aarch64_plus_immediate" "I,J"))) (set (match_operand:GPI 0 "register_operand" "=r,r") (plus:GPI @@ -2778,15 +3114,15 @@ (match_operand:GPI 3 "aarch64_plus_immediate" "J,I")))] "UINTVAL (operands[2]) == -UINTVAL (operands[3])" "@ - subs\\t%<w>0, %<w>1, #%n3 - adds\\t%<w>0, %<w>1, %3" + subs\\t%<w>0, %<w>1, %2 + adds\\t%<w>0, %<w>1, #%n2" [(set_attr "type" "alus_imm")] ) (define_insn "sub<mode>3_compare1" [(set (reg:CC CC_REGNUM) (compare:CC - (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") + (match_operand:GPI 1 "aarch64_reg_or_zero" "rkZ") (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ"))) (set (match_operand:GPI 0 "register_operand" "=r") (minus:GPI (match_dup 1) (match_dup 2)))] @@ -2795,21 +3131,8 @@ [(set_attr "type" "alus_sreg")] ) -(define_insn "sub<mode>3_compare1_imm" - [(set (reg:CC CC_REGNUM) - (compare:CC - (match_operand:GPI 1 "register_operand" "r") - (match_operand:GPI 3 "const_int_operand" "n"))) - (set (match_operand:GPI 0 "register_operand" "=r") - (plus:GPI (match_dup 1) - (match_operand:GPI 2 "aarch64_sub_immediate" "J")))] - "INTVAL (operands[3]) == -INTVAL (operands[2])" - "subs\\t%<w>0, %<w>1, #%n2" - [(set_attr "type" "alus_sreg")] -) - (define_peephole2 - [(set (match_operand:GPI 0 "register_operand") + [(set (match_operand:GPI 0 "aarch64_general_reg") (minus:GPI (match_operand:GPI 1 "aarch64_reg_or_zero") (match_operand:GPI 2 "aarch64_reg_or_zero"))) (set (reg:CC CC_REGNUM) @@ -2834,7 +3157,7 @@ (compare:CC (match_operand:GPI 1 "aarch64_reg_or_zero") (match_operand:GPI 2 "aarch64_reg_or_zero"))) - (set (match_operand:GPI 0 "register_operand") + (set (match_operand:GPI 0 "aarch64_general_reg") (minus:GPI (match_dup 1) (match_dup 2)))] "" @@ -2847,9 +3170,9 @@ ) (define_peephole2 - [(set (match_operand:GPI 0 "register_operand") + [(set (match_operand:GPI 0 "aarch64_general_reg") (plus:GPI (match_operand:GPI 1 "register_operand") - (match_operand:GPI 2 "aarch64_sub_immediate"))) + (match_operand:GPI 2 "aarch64_plus_immediate"))) (set (reg:CC CC_REGNUM) (compare:CC (match_dup 1) @@ -2859,7 +3182,7 @@ [(const_int 0)] { emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1], - operands[2], operands[3])); + operands[3], operands[2])); DONE; } ) @@ -2872,14 +3195,14 @@ (compare:CC (match_operand:GPI 1 "register_operand") (match_operand:GPI 3 "const_int_operand"))) - (set (match_operand:GPI 0 "register_operand") + (set (match_operand:GPI 0 "aarch64_general_reg") (plus:GPI (match_dup 1) - (match_operand:GPI 2 "aarch64_sub_immediate")))] + (match_operand:GPI 2 "aarch64_plus_immediate")))] "INTVAL (operands[3]) == -INTVAL (operands[2])" [(const_int 0)] { emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1], - operands[2], operands[3])); + operands[3], operands[2])); DONE; } ) @@ -3099,41 +3422,29 @@ [(set_attr "type" "adc_reg")] ) -(define_expand "sub<mode>3_carryinCV" +(define_expand "usub<GPI:mode>3_carryinC" [(parallel [(set (reg:CC CC_REGNUM) (compare:CC - (sign_extend:<DWI> - (match_operand:GPI 1 "aarch64_reg_or_zero" "")) + (zero_extend:<DWI> + (match_operand:GPI 1 "aarch64_reg_or_zero")) (plus:<DWI> - (sign_extend:<DWI> - (match_operand:GPI 2 "register_operand" "")) + (zero_extend:<DWI> + (match_operand:GPI 2 "register_operand")) (ltu:<DWI> (reg:CC CC_REGNUM) (const_int 0))))) - (set (match_operand:GPI 0 "register_operand" "") + (set (match_operand:GPI 0 "register_operand") (minus:GPI (minus:GPI (match_dup 1) (match_dup 2)) (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])] "" ) -(define_insn "*sub<mode>3_carryinCV_z1_z2" - [(set (reg:CC CC_REGNUM) - (compare:CC - (const_int 0) - (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) - (set (match_operand:GPI 0 "register_operand" "=r") - (neg:GPI (match_operand:GPI 1 "aarch64_borrow_operation" "")))] - "" - "sbcs\\t%<w>0, <w>zr, <w>zr" - [(set_attr "type" "adc_reg")] -) - -(define_insn "*sub<mode>3_carryinCV_z1" +(define_insn "*usub<GPI:mode>3_carryinC_z1" [(set (reg:CC CC_REGNUM) (compare:CC (const_int 0) (plus:<DWI> - (sign_extend:<DWI> + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) (match_operand:<DWI> 2 "aarch64_borrow_operation" "")))) (set (match_operand:GPI 0 "register_operand" "=r") @@ -3145,10 +3456,10 @@ [(set_attr "type" "adc_reg")] ) -(define_insn "*sub<mode>3_carryinCV_z2" +(define_insn "*usub<GPI:mode>3_carryinC_z2" [(set (reg:CC CC_REGNUM) (compare:CC - (sign_extend:<DWI> + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) (set (match_operand:GPI 0 "register_operand" "=r") @@ -3160,13 +3471,13 @@ [(set_attr "type" "adc_reg")] ) -(define_insn "*sub<mode>3_carryinCV" +(define_insn "*usub<GPI:mode>3_carryinC" [(set (reg:CC CC_REGNUM) (compare:CC - (sign_extend:<DWI> + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) (plus:<DWI> - (sign_extend:<DWI> + (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r")) (match_operand:<DWI> 3 "aarch64_borrow_operation" "")))) (set (match_operand:GPI 0 "register_operand" "=r") @@ -3178,6 +3489,69 @@ [(set_attr "type" "adc_reg")] ) +(define_expand "sub<GPI:mode>3_carryinV" + [(parallel + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (minus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 1 "aarch64_reg_or_zero")) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand")) + (ltu:<DWI> (reg:CC CC_REGNUM) (const_int 0)))) + (sign_extend:<DWI> + (minus:GPI (match_dup 1) + (plus:GPI (ltu:GPI (reg:CC CC_REGNUM) (const_int 0)) + (match_dup 2)))))) + (set (match_operand:GPI 0 "register_operand") + (minus:GPI + (minus:GPI (match_dup 1) (match_dup 2)) + (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])] + "" +) + +(define_insn "*sub<mode>3_carryinV_z2" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (minus:<DWI> + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) + (match_operand:<DWI> 2 "aarch64_borrow_operation" "")) + (sign_extend:<DWI> + (minus:GPI (match_dup 1) + (match_operand:GPI 3 "aarch64_borrow_operation" ""))))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (match_dup 1) (match_dup 3)))] + "" + "sbcs\\t%<w>0, %<w>1, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub<mode>3_carryinV" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (minus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r")) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r")) + (match_operand:<DWI> 3 "aarch64_borrow_operation" ""))) + (sign_extend:<DWI> + (minus:GPI + (match_dup 1) + (plus:GPI (match_operand:GPI 4 "aarch64_borrow_operation" "") + (match_dup 2)))))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (minus:GPI (match_dup 1) (match_dup 2)) + (match_dup 4)))] + "" + "sbcs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "adc_reg")] +) + (define_insn "*sub_uxt<mode>_shift2" [(set (match_operand:GPI 0 "register_operand" "=rk") (minus:GPI (match_operand:GPI 4 "register_operand" "rk") @@ -3243,8 +3617,8 @@ ) (define_expand "abs<mode>2" - [(match_operand:GPI 0 "register_operand" "") - (match_operand:GPI 1 "register_operand" "")] + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand")] "" { rtx ccreg = aarch64_gen_compare_reg (LT, operands[1], const0_rtx); @@ -3657,10 +4031,10 @@ ;; ------------------------------------------------------------------- (define_expand "cstore<mode>4" - [(set (match_operand:SI 0 "register_operand" "") + [(set (match_operand:SI 0 "register_operand") (match_operator:SI 1 "aarch64_comparison_operator" - [(match_operand:GPI 2 "register_operand" "") - (match_operand:GPI 3 "aarch64_plus_operand" "")]))] + [(match_operand:GPI 2 "register_operand") + (match_operand:GPI 3 "aarch64_plus_operand")]))] "" " operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], @@ -3682,10 +4056,10 @@ (define_expand "cstore<mode>4" - [(set (match_operand:SI 0 "register_operand" "") + [(set (match_operand:SI 0 "register_operand") (match_operator:SI 1 "aarch64_comparison_operator_mode" - [(match_operand:GPF 2 "register_operand" "") - (match_operand:GPF 3 "aarch64_fp_compare_operand" "")]))] + [(match_operand:GPF 2 "register_operand") + (match_operand:GPF 3 "aarch64_fp_compare_operand")]))] "" " operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], @@ -3770,13 +4144,13 @@ ) (define_expand "cmov<mode>6" - [(set (match_operand:GPI 0 "register_operand" "") + [(set (match_operand:GPI 0 "register_operand") (if_then_else:GPI (match_operator 1 "aarch64_comparison_operator" - [(match_operand:GPI 2 "register_operand" "") - (match_operand:GPI 3 "aarch64_plus_operand" "")]) - (match_operand:GPI 4 "register_operand" "") - (match_operand:GPI 5 "register_operand" "")))] + [(match_operand:GPI 2 "register_operand") + (match_operand:GPI 3 "aarch64_plus_operand")]) + (match_operand:GPI 4 "register_operand") + (match_operand:GPI 5 "register_operand")))] "" " operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], @@ -3786,13 +4160,13 @@ ) (define_expand "cmov<mode>6" - [(set (match_operand:GPF 0 "register_operand" "") + [(set (match_operand:GPF 0 "register_operand") (if_then_else:GPF (match_operator 1 "aarch64_comparison_operator" - [(match_operand:GPF 2 "register_operand" "") - (match_operand:GPF 3 "aarch64_fp_compare_operand" "")]) - (match_operand:GPF 4 "register_operand" "") - (match_operand:GPF 5 "register_operand" "")))] + [(match_operand:GPF 2 "register_operand") + (match_operand:GPF 3 "aarch64_fp_compare_operand")]) + (match_operand:GPF 4 "register_operand") + (match_operand:GPF 5 "register_operand")))] "" " operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], @@ -3870,10 +4244,10 @@ ) (define_expand "mov<mode>cc" - [(set (match_operand:ALLI 0 "register_operand" "") - (if_then_else:ALLI (match_operand 1 "aarch64_comparison_operator" "") - (match_operand:ALLI 2 "register_operand" "") - (match_operand:ALLI 3 "register_operand" "")))] + [(set (match_operand:ALLI 0 "register_operand") + (if_then_else:ALLI (match_operand 1 "aarch64_comparison_operator") + (match_operand:ALLI 2 "register_operand") + (match_operand:ALLI 3 "register_operand")))] "" { rtx ccreg; @@ -3889,10 +4263,10 @@ ) (define_expand "mov<GPF:mode><GPI:mode>cc" - [(set (match_operand:GPI 0 "register_operand" "") - (if_then_else:GPI (match_operand 1 "aarch64_comparison_operator" "") - (match_operand:GPF 2 "register_operand" "") - (match_operand:GPF 3 "register_operand" "")))] + [(set (match_operand:GPI 0 "register_operand") + (if_then_else:GPI (match_operand 1 "aarch64_comparison_operator") + (match_operand:GPF 2 "register_operand") + (match_operand:GPF 3 "register_operand")))] "" { rtx ccreg; @@ -3908,10 +4282,10 @@ ) (define_expand "mov<mode>cc" - [(set (match_operand:GPF 0 "register_operand" "") - (if_then_else:GPF (match_operand 1 "aarch64_comparison_operator" "") - (match_operand:GPF 2 "register_operand" "") - (match_operand:GPF 3 "register_operand" "")))] + [(set (match_operand:GPF 0 "register_operand") + (if_then_else:GPF (match_operand 1 "aarch64_comparison_operator") + (match_operand:GPF 2 "register_operand") + (match_operand:GPF 3 "register_operand")))] "" { rtx ccreg; @@ -3927,10 +4301,10 @@ ) (define_expand "<neg_not_op><mode>cc" - [(set (match_operand:GPI 0 "register_operand" "") - (if_then_else:GPI (match_operand 1 "aarch64_comparison_operator" "") - (NEG_NOT:GPI (match_operand:GPI 2 "register_operand" "")) - (match_operand:GPI 3 "register_operand" "")))] + [(set (match_operand:GPI 0 "register_operand") + (if_then_else:GPI (match_operand 1 "aarch64_comparison_operator") + (NEG_NOT:GPI (match_operand:GPI 2 "register_operand")) + (match_operand:GPI 3 "register_operand")))] "" { rtx ccreg; @@ -4474,7 +4848,6 @@ { rtx v = gen_reg_rtx (V8QImode); rtx v1 = gen_reg_rtx (V8QImode); - rtx r = gen_reg_rtx (QImode); rtx in = operands[1]; rtx out = operands[0]; if(<MODE>mode == SImode) @@ -4488,8 +4861,7 @@ } emit_move_insn (v, gen_lowpart (V8QImode, in)); emit_insn (gen_popcountv8qi2 (v1, v)); - emit_insn (gen_reduc_plus_scal_v8qi (r, v1)); - emit_insn (gen_zero_extendqi<mode>2 (out, r)); + emit_insn (gen_aarch64_zero_extend<mode>_reduc_plus_v8qi (out, v1)); DONE; }) @@ -4537,7 +4909,7 @@ [(set_attr "type" "alus_imm")] ) -(define_insn "*ands<mode>_compare0" +(define_insn "*ands<GPI:mode>_compare0" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ (zero_extend:GPI (match_operand:SHORT 1 "register_operand" "r")) @@ -5159,7 +5531,7 @@ ;; ------------------------------------------------------------------- (define_expand "<optab>" - [(set (match_operand:DI 0 "register_operand" "=r") + [(set (match_operand:DI 0 "register_operand") (ANY_EXTRACT:DI (match_operand:DI 1 "register_operand") (match_operand 2 "aarch64_simd_shift_imm_offset_di") @@ -5259,6 +5631,107 @@ [(set_attr "type" "bfm")] ) +;; Match a bfi instruction where the shift of OP3 means that we are +;; actually copying the least significant bits of OP3 into OP0 by way +;; of the AND masks and the IOR instruction. A similar instruction +;; with the two parts of the IOR swapped around was never triggered +;; in a bootstrap build and test of GCC so it was not included. + +(define_insn "*aarch64_bfi<GPI:mode>5_shift" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0") + (match_operand:GPI 2 "const_int_operand" "n")) + (and:GPI (ashift:GPI + (match_operand:GPI 3 "register_operand" "r") + (match_operand:GPI 4 "aarch64_simd_shift_imm_<mode>" "n")) + (match_operand:GPI 5 "const_int_operand" "n"))))] + "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]), + UINTVAL (operands[4]), + UINTVAL(operands[5]))" + "bfi\t%<GPI:w>0, %<GPI:w>3, %4, %P5" + [(set_attr "type" "bfm")] +) + +(define_insn "*aarch64_bfi<GPI:mode>5_shift_alt" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (ashift:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "aarch64_simd_shift_imm_<mode>" "n")) + (match_operand:GPI 3 "const_int_operand" "n")) + (and:GPI (match_operand:GPI 4 "register_operand" "0") + (match_operand:GPI 5 "const_int_operand" "n"))))] + "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[5]), + UINTVAL (operands[2]), + UINTVAL(operands[3]))" + "bfi\t%<GPI:w>0, %<GPI:w>1, %2, %P3" + [(set_attr "type" "bfm")] +) + +;; Like *aarch64_bfi<GPI:mode>5_shift but with no and of the ashift because +;; the shift is large enough to remove the need for an AND instruction. + +(define_insn "*aarch64_bfi<GPI:mode>4_noand" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0") + (match_operand:GPI 2 "const_int_operand" "n")) + (ashift:GPI + (match_operand:GPI 3 "register_operand" "r") + (match_operand:GPI 4 "aarch64_simd_shift_imm_<mode>" "n"))))] + "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]), + UINTVAL (operands[4]), + HOST_WIDE_INT_M1U << UINTVAL (operands[4]) )" +{ + operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - UINTVAL (operands[4])); + return "bfi\t%<GPI:w>0, %<GPI:w>3, %4, %5"; +} + [(set_attr "type" "bfm")] +) + +(define_insn "*aarch64_bfi<GPI:mode>4_noand_alt" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (ashift:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "aarch64_simd_shift_imm_<mode>" "n")) + (and:GPI (match_operand:GPI 3 "register_operand" "0") + (match_operand:GPI 4 "const_int_operand" "n"))))] + "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[4]), + UINTVAL (operands[2]), + HOST_WIDE_INT_M1U << UINTVAL (operands[2]) )" +{ + operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - UINTVAL (operands[2])); + return "bfi\t%<GPI:w>0, %<GPI:w>1, %2, %5"; +} + [(set_attr "type" "bfm")] +) + +;; Like *aarch64_bfi<GPI:mode>5_shift but with no shifting, we are just +;; copying the least significant bits of OP3 to OP0. We need two versions +;; of the instruction to handle different checks on the constant values. + +(define_insn "*aarch64_bfi<GPI:mode>4_noshift" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0") + (match_operand:GPI 2 "const_int_operand" "n")) + (and:GPI (match_operand:GPI 3 "register_operand" "r") + (match_operand:GPI 4 "const_int_operand" "n"))))] + "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]), 0, + UINTVAL (operands[4]))" + "bfi\t%<GPI:w>0, %<GPI:w>3, 0, %P4" + [(set_attr "type" "bfm")] +) + +(define_insn "*aarch64_bfi<GPI:mode>4_noshift_alt" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (match_operand:GPI 3 "register_operand" "r") + (match_operand:GPI 4 "const_int_operand" "n")) + (and:GPI (match_operand:GPI 1 "register_operand" "0") + (match_operand:GPI 2 "const_int_operand" "n"))))] + "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]), 0, + UINTVAL (operands[4]))" + "bfi\t%<GPI:w>0, %<GPI:w>3, 0, %P4" + [(set_attr "type" "bfm")] +) + (define_insn "*extr_insv_lower_reg<mode>" [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r") (match_operand 1 "const_int_operand" "n") @@ -5314,6 +5787,21 @@ [(set_attr "type" "bfx")] ) +(define_insn "*ashiftsi_extvdi_bfiz" + [(set (match_operand:SI 0 "register_operand" "=r") + (ashift:SI + (match_operator:SI 4 "subreg_lowpart_operator" + [(sign_extract:DI + (match_operand:DI 1 "register_operand" "r") + (match_operand 2 "aarch64_simd_shift_imm_offset_si") + (const_int 0))]) + (match_operand 3 "aarch64_simd_shift_imm_si")))] + "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]), + 1, GET_MODE_BITSIZE (SImode) - 1)" + "sbfiz\\t%w0, %w1, %3, %2" + [(set_attr "type" "bfx")] +) + ;; When the bit position and width of the equivalent extraction add up to 32 ;; we can use a W-reg LSL instruction taking advantage of the implicit ;; zero-extension of the X-reg. @@ -5675,6 +6163,44 @@ [(set_attr "type" "f_cvtf2i")] ) +;; Equal width integer to fp and multiply combine. +(define_insn "*aarch64_<su_optab>cvtf<fcvt_target><GPF:mode>2_mult" + [(set (match_operand:GPF 0 "register_operand" "=w,w") + (mult:GPF (FLOATUORS:GPF + (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r")) + (match_operand:GPF 2 "aarch64_fp_pow2_recip" "Dt,Dt")))] + "TARGET_FLOAT" + { + operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2])); + switch (which_alternative) + { + case 0: + return "<su_optab>cvtf\t%<GPF:s>0, %<s>1, #%2"; + case 1: + return "<su_optab>cvtf\t%<GPF:s>0, %<w1>1, #%2"; + default: + gcc_unreachable (); + } + } + [(set_attr "type" "neon_int_to_fp_<Vetype>,f_cvti2f") + (set_attr "arch" "simd,fp")] +) + +;; Unequal width integer to fp and multiply combine. +(define_insn "*aarch64_<su_optab>cvtf<fcvt_iesize><GPF:mode>2_mult" + [(set (match_operand:GPF 0 "register_operand" "=w") + (mult:GPF (FLOATUORS:GPF + (match_operand:<FCVT_IESIZE> 1 "register_operand" "r")) + (match_operand:GPF 2 "aarch64_fp_pow2_recip" "Dt")))] + "TARGET_FLOAT" + { + operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2])); + return "<su_optab>cvtf\t%<GPF:s>0, %<w2>1, #%2"; + } + [(set_attr "type" "f_cvti2f")] +) + +;; Equal width integer to fp conversion. (define_insn "<optab><fcvt_target><GPF:mode>2" [(set (match_operand:GPF 0 "register_operand" "=w,w") (FLOATUORS:GPF (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r")))] @@ -5686,6 +6212,7 @@ (set_attr "arch" "simd,fp")] ) +;; Unequal width integer to fp conversions. (define_insn "<optab><fcvt_iesize><GPF:mode>2" [(set (match_operand:GPF 0 "register_operand" "=w") (FLOATUORS:GPF (match_operand:<FCVT_IESIZE> 1 "register_operand" "r")))] @@ -5908,8 +6435,8 @@ ) (define_expand "sqrt<mode>2" - [(set (match_operand:GPF_F16 0 "register_operand" "=w") - (sqrt:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))] + [(set (match_operand:GPF_F16 0 "register_operand") + (sqrt:GPF_F16 (match_operand:GPF_F16 1 "register_operand")))] "TARGET_FLOAT" { if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) @@ -5971,7 +6498,7 @@ [(match_operand:GPI 0 "register_operand") (match_operand:GPF 1 "register_operand")] "TARGET_FLOAT - && ((GET_MODE_SIZE (<GPF:MODE>mode) <= GET_MODE_SIZE (<GPI:MODE>mode)) + && ((GET_MODE_BITSIZE (<GPF:MODE>mode) <= LONG_TYPE_SIZE) || !flag_trapping_math || flag_fp_int_builtin_inexact)" { rtx cvt = gen_reg_rtx (<GPF:MODE>mode); @@ -5986,48 +6513,46 @@ ;; LDR d2, #(1 << 63) ;; BSL v2.8b, [y], [x] ;; -;; or another, equivalent, sequence using one of BSL/BIT/BIF. -;; aarch64_simd_bsldf will select the best suited of these instructions -;; to generate based on register allocation, and knows how to partially -;; constant fold based on the values of X and Y, so expand through that. - -(define_expand "copysigndf3" - [(match_operand:DF 0 "register_operand") - (match_operand:DF 1 "register_operand") - (match_operand:DF 2 "register_operand")] +;; or another, equivalent, sequence using one of BSL/BIT/BIF. Because +;; we expect these operations to nearly always operate on +;; floating-point values, we do not want the operation to be +;; simplified into a bit-field insert operation that operates on the +;; integer side, since typically that would involve three inter-bank +;; register copies. As we do not expect copysign to be followed by +;; other logical operations on the result, it seems preferable to keep +;; this as an unspec operation, rather than exposing the underlying +;; logic to the compiler. + +(define_expand "copysign<GPF:mode>3" + [(match_operand:GPF 0 "register_operand") + (match_operand:GPF 1 "register_operand") + (match_operand:GPF 2 "register_operand")] "TARGET_FLOAT && TARGET_SIMD" { - rtx mask = gen_reg_rtx (DImode); - emit_move_insn (mask, GEN_INT (HOST_WIDE_INT_1U << 63)); - emit_insn (gen_aarch64_simd_bsldf (operands[0], mask, - operands[2], operands[1])); + rtx bitmask = gen_reg_rtx (<V_INT_EQUIV>mode); + emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U + << (GET_MODE_BITSIZE (<MODE>mode) - 1))); + emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2], + bitmask)); DONE; } ) -;; As above, but we must first get to a 64-bit value if we wish to use -;; aarch64_simd_bslv2sf. - -(define_expand "copysignsf3" - [(match_operand:SF 0 "register_operand") - (match_operand:SF 1 "register_operand") - (match_operand:SF 2 "register_operand")] +(define_insn "copysign<GPF:mode>3_insn" + [(set (match_operand:GPF 0 "register_operand" "=w,w,w,r") + (unspec:GPF [(match_operand:GPF 1 "register_operand" "w,0,w,r") + (match_operand:GPF 2 "register_operand" "w,w,0,0") + (match_operand:<V_INT_EQUIV> 3 "register_operand" "0,w,w,X")] + UNSPEC_COPYSIGN))] "TARGET_FLOAT && TARGET_SIMD" -{ - rtx v_bitmask = gen_reg_rtx (V2SImode); - - /* Juggle modes to get us in to a vector mode for BSL. */ - rtx op1 = lowpart_subreg (DImode, operands[1], SFmode); - rtx op2 = lowpart_subreg (V2SFmode, operands[2], SFmode); - rtx tmp = gen_reg_rtx (V2SFmode); - emit_move_insn (v_bitmask, - aarch64_simd_gen_const_vector_dup (V2SImode, - HOST_WIDE_INT_M1U << 31)); - emit_insn (gen_aarch64_simd_bslv2sf (tmp, v_bitmask, op2, op1)); - emit_move_insn (operands[0], lowpart_subreg (SFmode, tmp, V2SFmode)); - DONE; -} -) + "@ + bsl\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype> + bit\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype> + bif\\t%0.<Vbtype>, %1.<Vbtype>, %3.<Vbtype> + bfxil\\t%<w1>0, %<w1>1, #0, <sizem1>" + [(set_attr "type" "neon_bsl<q>,neon_bsl<q>,neon_bsl<q>,bfm")] +) + ;; For xorsign (x, y), we want to generate: ;; @@ -6070,6 +6595,7 @@ ;; ------------------------------------------------------------------- ;; Reload Scalar Floating point modes from constant pool. ;; The AArch64 port doesn't have __int128 constant move support. +;; The patterns need constraints due to TARGET_SECONDARY_RELOAD hook. (define_expand "@aarch64_reload_movcp<GPF_TF:mode><P:mode>" [(set (match_operand:GPF_TF 0 "register_operand" "=w") (mem:GPF_TF (match_operand 1 "aarch64_constant_pool_symref" "S"))) @@ -6170,9 +6696,9 @@ ;; rodata section. (define_expand "add_losym" - [(set (match_operand 0 "register_operand" "=r") - (lo_sum (match_operand 1 "register_operand" "r") - (match_operand 2 "aarch64_valid_symref" "S")))] + [(set (match_operand 0 "register_operand") + (lo_sum (match_operand 1 "register_operand") + (match_operand 2 "aarch64_valid_symref")))] "" { machine_mode mode = GET_MODE (operands[0]); @@ -6261,9 +6787,10 @@ ;; instructions in the TLS stubs, in order to enable linker relaxation. ;; Therefore we treat the stubs as an atomic sequence. (define_expand "tlsgd_small_<mode>" - [(parallel [(set (match_operand 0 "register_operand" "") + [(parallel [(set (match_operand:PTR 0 "register_operand") (call (mem:DI (match_dup 2)) (const_int 1))) - (unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref" "")] UNSPEC_GOTSMALLTLS) + (unspec:DI [(const_int 0)] UNSPEC_CALLEE_ABI) + (unspec:DI [(match_operand 1 "aarch64_valid_symref")] UNSPEC_GOTSMALLTLS) (clobber (reg:DI LR_REGNUM))])] "" { @@ -6271,9 +6798,10 @@ }) (define_insn "*tlsgd_small_<mode>" - [(set (match_operand 0 "register_operand" "") + [(set (match_operand:PTR 0 "register_operand" "") (call (mem:DI (match_operand:DI 2 "" "")) (const_int 1))) - (unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref" "S")] UNSPEC_GOTSMALLTLS) + (unspec:DI [(const_int 0)] UNSPEC_CALLEE_ABI) + (unspec:DI [(match_operand 1 "aarch64_valid_symref" "S")] UNSPEC_GOTSMALLTLS) (clobber (reg:DI LR_REGNUM)) ] "" @@ -6373,7 +6901,12 @@ "TARGET_TLS_DESC" { if (TARGET_SVE) - emit_insn (gen_tlsdesc_small_sve_<mode> (operands[0])); + { + rtx abi = gen_int_mode (aarch64_tlsdesc_abi_id (), DImode); + rtx_insn *call + = emit_call_insn (gen_tlsdesc_small_sve_<mode> (operands[0], abi)); + RTL_CONST_CALL_P (call) = 1; + } else emit_insn (gen_tlsdesc_small_advsimd_<mode> (operands[0])); DONE; @@ -6388,72 +6921,27 @@ UNSPEC_TLSDESC)) (clobber (reg:DI LR_REGNUM)) (clobber (reg:CC CC_REGNUM)) - (clobber (match_scratch:DI 1 "=r"))] + (clobber (match_scratch:DI 1 "=r")) + (use (reg:DI FP_REGNUM))] "TARGET_TLS_DESC && !TARGET_SVE" "adrp\\tx0, %A0\;ldr\\t%<w>1, [x0, #%L0]\;add\\t<w>0, <w>0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" [(set_attr "type" "call") (set_attr "length" "16")]) -;; For SVE, model tlsdesc calls as clobbering the lower 128 bits of -;; all vector registers, and clobber all predicate registers, on -;; top of the usual R0 and LR. +;; For SVE, model tlsdesc calls as normal calls, with the callee ABI +;; describing the extra call-preserved guarantees. This would work +;; for non-SVE too, but avoiding a call is probably better if we can. (define_insn "tlsdesc_small_sve_<mode>" [(set (reg:PTR R0_REGNUM) - (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")] - UNSPEC_TLSDESC)) + (call (mem:DI (unspec:PTR + [(match_operand 0 "aarch64_valid_symref")] + UNSPEC_TLSDESC)) + (const_int 0))) + (unspec:DI [(match_operand:DI 1 "const_int_operand")] UNSPEC_CALLEE_ABI) (clobber (reg:DI LR_REGNUM)) - (clobber (reg:CC CC_REGNUM)) - (clobber_high (reg:TI V0_REGNUM)) - (clobber_high (reg:TI V1_REGNUM)) - (clobber_high (reg:TI V2_REGNUM)) - (clobber_high (reg:TI V3_REGNUM)) - (clobber_high (reg:TI V4_REGNUM)) - (clobber_high (reg:TI V5_REGNUM)) - (clobber_high (reg:TI V6_REGNUM)) - (clobber_high (reg:TI V7_REGNUM)) - (clobber_high (reg:TI V8_REGNUM)) - (clobber_high (reg:TI V9_REGNUM)) - (clobber_high (reg:TI V10_REGNUM)) - (clobber_high (reg:TI V11_REGNUM)) - (clobber_high (reg:TI V12_REGNUM)) - (clobber_high (reg:TI V13_REGNUM)) - (clobber_high (reg:TI V14_REGNUM)) - (clobber_high (reg:TI V15_REGNUM)) - (clobber_high (reg:TI V16_REGNUM)) - (clobber_high (reg:TI V17_REGNUM)) - (clobber_high (reg:TI V18_REGNUM)) - (clobber_high (reg:TI V19_REGNUM)) - (clobber_high (reg:TI V20_REGNUM)) - (clobber_high (reg:TI V21_REGNUM)) - (clobber_high (reg:TI V22_REGNUM)) - (clobber_high (reg:TI V23_REGNUM)) - (clobber_high (reg:TI V24_REGNUM)) - (clobber_high (reg:TI V25_REGNUM)) - (clobber_high (reg:TI V26_REGNUM)) - (clobber_high (reg:TI V27_REGNUM)) - (clobber_high (reg:TI V28_REGNUM)) - (clobber_high (reg:TI V29_REGNUM)) - (clobber_high (reg:TI V30_REGNUM)) - (clobber_high (reg:TI V31_REGNUM)) - (clobber (reg:VNx2BI P0_REGNUM)) - (clobber (reg:VNx2BI P1_REGNUM)) - (clobber (reg:VNx2BI P2_REGNUM)) - (clobber (reg:VNx2BI P3_REGNUM)) - (clobber (reg:VNx2BI P4_REGNUM)) - (clobber (reg:VNx2BI P5_REGNUM)) - (clobber (reg:VNx2BI P6_REGNUM)) - (clobber (reg:VNx2BI P7_REGNUM)) - (clobber (reg:VNx2BI P8_REGNUM)) - (clobber (reg:VNx2BI P9_REGNUM)) - (clobber (reg:VNx2BI P10_REGNUM)) - (clobber (reg:VNx2BI P11_REGNUM)) - (clobber (reg:VNx2BI P12_REGNUM)) - (clobber (reg:VNx2BI P13_REGNUM)) - (clobber (reg:VNx2BI P14_REGNUM)) - (clobber (reg:VNx2BI P15_REGNUM)) - (clobber (match_scratch:DI 1 "=r"))] + (clobber (match_scratch:DI 2 "=r"))] "TARGET_TLS_DESC && TARGET_SVE" - "adrp\\tx0, %A0\;ldr\\t%<w>1, [x0, #%L0]\;add\\t<w>0, <w>0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" + "adrp\\tx0, %A0\;ldr\\t%<w>2, [x0, #%L0]\;add\\t<w>0, <w>0, %L0\;.tlsdesccall\\t%0\;blr\\t%2" [(set_attr "type" "call") (set_attr "length" "16")]) @@ -6467,6 +6955,15 @@ [(set_attr "length" "0")] ) +(define_insn "aarch64_fjcvtzs" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:DF 1 "register_operand" "w")] + UNSPEC_FJCVTZS))] + "TARGET_JSCVT" + "fjcvtzs\\t%w0, %d1" + [(set_attr "type" "f_cvtf2i")] +) + ;; Pointer authentication patterns are always provided. In architecture ;; revisions prior to ARMv8.3-A these HINT instructions operate as NOPs. ;; This lets the user write portable software which authenticates pointers @@ -6480,7 +6977,7 @@ [(set (reg:DI R30_REGNUM) (unspec:DI [(reg:DI R30_REGNUM) (reg:DI SP_REGNUM)] PAUTH_LR_SP))] "" - "hint\t<pauth_hint_num_a> // <pauth_mnem_prefix>asp"; + "hint\t<pauth_hint_num> // <pauth_mnem_prefix>sp"; ) ;; Signing/Authenticating X17 using X16 as the salt. @@ -6489,7 +6986,7 @@ [(set (reg:DI R17_REGNUM) (unspec:DI [(reg:DI R17_REGNUM) (reg:DI R16_REGNUM)] PAUTH_17_16))] "" - "hint\t<pauth_hint_num_a> // <pauth_mnem_prefix>a1716"; + "hint\t<pauth_hint_num> // <pauth_mnem_prefix>1716"; ) ;; Stripping the signature in R30. @@ -6544,7 +7041,7 @@ ;; Named pattern for expanding thread pointer reference. (define_expand "get_thread_pointerdi" - [(match_operand:DI 0 "register_operand" "=r")] + [(match_operand:DI 0 "register_operand")] "" { rtx tmp = aarch64_load_tp (operands[0]); @@ -6560,20 +7057,55 @@ "" { machine_mode mode = GET_MODE (operands[0]); - + if (aarch64_stack_protector_guard != SSP_GLOBAL) + { + /* Generate access through the system register. */ + rtx tmp_reg = gen_reg_rtx (mode); + if (mode == DImode) + { + emit_insn (gen_reg_stack_protect_address_di (tmp_reg)); + emit_insn (gen_adddi3 (tmp_reg, tmp_reg, + GEN_INT (aarch64_stack_protector_guard_offset))); + } + else + { + emit_insn (gen_reg_stack_protect_address_si (tmp_reg)); + emit_insn (gen_addsi3 (tmp_reg, tmp_reg, + GEN_INT (aarch64_stack_protector_guard_offset))); + + } + operands[1] = gen_rtx_MEM (mode, tmp_reg); + } + emit_insn ((mode == DImode ? gen_stack_protect_set_di : gen_stack_protect_set_si) (operands[0], operands[1])); DONE; }) +(define_insn "reg_stack_protect_address_<mode>" + [(set (match_operand:PTR 0 "register_operand" "=r") + (unspec:PTR [(const_int 0)] + UNSPEC_SSP_SYSREG))] + "aarch64_stack_protector_guard != SSP_GLOBAL" + { + char buf[150]; + snprintf (buf, 150, "mrs\\t%%<w>0, %s", + aarch64_stack_protector_guard_reg_str); + output_asm_insn (buf, operands); + return ""; + } + [(set_attr "type" "mrs")]) + +;; DO NOT SPLIT THIS PATTERN. It is important for security reasons that the +;; canary value does not live beyond the life of this sequence. (define_insn "stack_protect_set_<mode>" [(set (match_operand:PTR 0 "memory_operand" "=m") (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")] UNSPEC_SP_SET)) (set (match_scratch:PTR 2 "=&r") (const_int 0))] "" - "ldr\\t%<w>2, %1\;str\\t%<w>2, %0\;mov\t%<w>2,0" + "ldr\\t%<w>2, %1\;str\\t%<w>2, %0\;mov\t%<w>2, 0" [(set_attr "length" "12") (set_attr "type" "multiple")]) @@ -6587,12 +7119,34 @@ machine_mode mode = GET_MODE (operands[0]); result = gen_reg_rtx(mode); - + if (aarch64_stack_protector_guard != SSP_GLOBAL) + { + /* Generate access through the system register. The + sequence we want here is the access + of the stack offset to come with + mrs scratch_reg, <system_register> + add scratch_reg, scratch_reg, :lo12:offset. */ + rtx tmp_reg = gen_reg_rtx (mode); + if (mode == DImode) + { + emit_insn (gen_reg_stack_protect_address_di (tmp_reg)); + emit_insn (gen_adddi3 (tmp_reg, tmp_reg, + GEN_INT (aarch64_stack_protector_guard_offset))); + } + else + { + emit_insn (gen_reg_stack_protect_address_si (tmp_reg)); + emit_insn (gen_addsi3 (tmp_reg, tmp_reg, + GEN_INT (aarch64_stack_protector_guard_offset))); + + } + operands[1] = gen_rtx_MEM (mode, tmp_reg); + } emit_insn ((mode == DImode - ? gen_stack_protect_test_di - : gen_stack_protect_test_si) (result, - operands[0], - operands[1])); + ? gen_stack_protect_test_di + : gen_stack_protect_test_si) (result, + operands[0], + operands[1])); if (mode == DImode) emit_jump_insn (gen_cbranchdi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx), @@ -6686,7 +7240,7 @@ ;; SPECULATION_TRACKER_REGNUM is reserved for this purpose when necessary. (define_insn "speculation_tracker" [(set (reg:DI SPECULATION_TRACKER_REGNUM) - (unspec [(reg:DI SPECULATION_TRACKER_REGNUM) (match_operand 0)] + (unspec:DI [(reg:DI SPECULATION_TRACKER_REGNUM) (match_operand 0)] UNSPEC_SPECULATION_TRACKER))] "" { @@ -6697,11 +7251,48 @@ [(set_attr "type" "csel")] ) -;; Helper for aarch64.c code. -(define_expand "set_clobber_cc" - [(parallel [(set (match_operand 0) - (match_operand 1)) - (clobber (reg:CC CC_REGNUM))])]) +;; Like speculation_tracker, but track the inverse condition. +(define_insn "speculation_tracker_rev" + [(set (reg:DI SPECULATION_TRACKER_REGNUM) + (unspec:DI [(reg:DI SPECULATION_TRACKER_REGNUM) (match_operand 0)] + UNSPEC_SPECULATION_TRACKER_REV))] + "" + { + operands[1] = gen_rtx_REG (DImode, SPECULATION_TRACKER_REGNUM); + output_asm_insn ("csel\\t%1, %1, xzr, %M0", operands); + return ""; + } + [(set_attr "type" "csel")] +) + +;; BTI <target> instructions +(define_insn "bti_noarg" + [(unspec_volatile [(const_int 0)] UNSPECV_BTI_NOARG)] + "" + "hint\t32 // bti" + [(set_attr "type" "no_insn")] +) + +(define_insn "bti_c" + [(unspec_volatile [(const_int 0)] UNSPECV_BTI_C)] + "" + "hint\t34 // bti c" + [(set_attr "type" "no_insn")] +) + +(define_insn "bti_j" + [(unspec_volatile [(const_int 0)] UNSPECV_BTI_J)] + "" + "hint\t36 // bti j" + [(set_attr "type" "no_insn")] +) + +(define_insn "bti_jc" + [(unspec_volatile [(const_int 0)] UNSPECV_BTI_JC)] + "" + "hint\t38 // bti jc" + [(set_attr "type" "no_insn")] +) ;; Hard speculation barrier. (define_insn "speculation_barrier" @@ -6717,10 +7308,10 @@ ;; tracking enabled. Use the speculation tracker to decide whether to ;; copy operand 1 to the target, or to copy the fail value (operand 2). (define_expand "@despeculate_copy<ALLI_TI:mode>" - [(set (match_operand:ALLI_TI 0 "register_operand" "=r") + [(set (match_operand:ALLI_TI 0 "register_operand") (unspec_volatile:ALLI_TI - [(match_operand:ALLI_TI 1 "register_operand" "r") - (match_operand:ALLI_TI 2 "aarch64_reg_or_zero" "rZ") + [(match_operand:ALLI_TI 1 "register_operand") + (match_operand:ALLI_TI 2 "aarch64_reg_or_zero") (use (reg:DI SPECULATION_TRACKER_REGNUM)) (clobber (reg:CC CC_REGNUM))] UNSPECV_SPECULATION_BARRIER))] "" @@ -6810,6 +7401,160 @@ (set_attr "speculation_barrier" "true")] ) +(define_insn "aarch64_<frintnzs_op><mode>" + [(set (match_operand:VSFDF 0 "register_operand" "=w") + (unspec:VSFDF [(match_operand:VSFDF 1 "register_operand" "w")] + FRINTNZX))] + "TARGET_FRINT && TARGET_FLOAT + && !(VECTOR_MODE_P (<MODE>mode) && !TARGET_SIMD)" + "<frintnzs_op>\\t%<v>0<Vmtype>, %<v>1<Vmtype>" + [(set_attr "type" "f_rint<stype>")] +) + +;; Transactional Memory Extension (TME) instructions. + +(define_insn "tstart" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(const_int 0)] UNSPECV_TSTART)) + (clobber (mem:BLK (scratch)))] + "TARGET_TME" + "tstart\\t%0" + [(set_attr "type" "tme")] +) + +(define_insn "ttest" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(const_int 0)] UNSPEC_TTEST)) + (clobber (mem:BLK (scratch)))] + "TARGET_TME" + "ttest\\t%0" + [(set_attr "type" "tme")] +) + +(define_insn "tcommit" + [(unspec_volatile:BLK [(const_int 0)] UNSPECV_TCOMMIT) + (clobber (mem:BLK (scratch)))] + "TARGET_TME" + "tcommit" + [(set_attr "type" "tme")] +) + +(define_insn "tcancel" + [(unspec_volatile:BLK + [(match_operand 0 "const_int_operand" "n")] UNSPECV_TCANCEL) + (clobber (mem:BLK (scratch)))] + "TARGET_TME && (UINTVAL (operands[0]) <= 65535)" + "tcancel\\t#%0" + [(set_attr "type" "tme")] +) + +(define_insn "aarch64_rndr" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(const_int 0)] UNSPEC_RNDR)) + (set (reg:CC_Z CC_REGNUM) + (unspec_volatile:CC_Z [(const_int 0)] UNSPEC_RNDR))] + "TARGET_RNG" + "mrs\t%0, RNDR" + [(set_attr "type" "mrs")] +) + +(define_insn "aarch64_rndrrs" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI [(const_int 0)] UNSPEC_RNDRRS)) + (set (reg:CC_Z CC_REGNUM) + (unspec_volatile:CC_Z [(const_int 0)] UNSPEC_RNDRRS))] + "TARGET_RNG" + "mrs\t%0, RNDRRS" + [(set_attr "type" "mrs")] +) + +;; Memory Tagging Extension (MTE) instructions. + +(define_insn "irg" + [(set (match_operand:DI 0 "register_operand" "=rk") + (ior:DI + (and:DI (match_operand:DI 1 "register_operand" "rk") + (const_int -1080863910568919041)) ;; 0xf0ff... + (ashift:DI (unspec:QI [(match_operand:DI 2 "register_operand" "r")] + UNSPEC_GEN_TAG_RND) + (const_int 56))))] + "TARGET_MEMTAG" + "irg\\t%0, %1, %2" + [(set_attr "type" "memtag")] +) + +(define_insn "gmi" + [(set (match_operand:DI 0 "register_operand" "=r") + (ior:DI (ashift:DI + (const_int 1) + (and:QI (lshiftrt:DI + (match_operand:DI 1 "register_operand" "rk") + (const_int 56)) (const_int 15))) + (match_operand:DI 2 "register_operand" "r")))] + "TARGET_MEMTAG" + "gmi\\t%0, %1, %2" + [(set_attr "type" "memtag")] +) + +(define_insn "addg" + [(set (match_operand:DI 0 "register_operand" "=rk") + (ior:DI + (and:DI (plus:DI (match_operand:DI 1 "register_operand" "rk") + (match_operand:DI 2 "aarch64_granule16_uimm6" "i")) + (const_int -1080863910568919041)) ;; 0xf0ff... + (ashift:DI + (unspec:QI + [(and:QI (lshiftrt:DI (match_dup 1) (const_int 56)) (const_int 15)) + (match_operand:QI 3 "aarch64_memtag_tag_offset" "i")] + UNSPEC_GEN_TAG) + (const_int 56))))] + "TARGET_MEMTAG" + "addg\\t%0, %1, #%2, #%3" + [(set_attr "type" "memtag")] +) + +(define_insn "subp" + [(set (match_operand:DI 0 "register_operand" "=r") + (minus:DI + (and:DI (match_operand:DI 1 "register_operand" "rk") + (const_int 72057594037927935)) ;; 0x00ff... + (and:DI (match_operand:DI 2 "register_operand" "rk") + (const_int 72057594037927935))))] ;; 0x00ff... + "TARGET_MEMTAG" + "subp\\t%0, %1, %2" + [(set_attr "type" "memtag")] +) + +;; LDG will use the 16-byte aligned value of the address. +(define_insn "ldg" + [(set (match_operand:DI 0 "register_operand" "+r") + (ior:DI + (and:DI (match_dup 0) (const_int -1080863910568919041)) ;; 0xf0ff... + (ashift:DI + (mem:QI (unspec:DI + [(and:DI (plus:DI (match_operand:DI 1 "register_operand" "rk") + (match_operand:DI 2 "aarch64_granule16_simm9" "i")) + (const_int -16))] UNSPEC_TAG_SPACE)) + (const_int 56))))] + "TARGET_MEMTAG" + "ldg\\t%0, [%1, #%2]" + [(set_attr "type" "memtag")] +) + +;; STG doesn't align the address but aborts with alignment fault +;; when the address is not 16-byte aligned. +(define_insn "stg" + [(set (mem:QI (unspec:DI + [(plus:DI (match_operand:DI 1 "register_operand" "rk") + (match_operand:DI 2 "aarch64_granule16_simm9" "i"))] + UNSPEC_TAG_SPACE)) + (and:QI (lshiftrt:DI (match_operand:DI 0 "register_operand" "rk") + (const_int 56)) (const_int 15)))] + "TARGET_MEMTAG" + "stg\\t%0, [%1, #%2]" + [(set_attr "type" "memtag")] +) + ;; AdvSIMD Stuff (include "aarch64-simd.md") @@ -6821,3 +7566,6 @@ ;; SVE. (include "aarch64-sve.md") + +;; SVE2. +(include "aarch64-sve2.md")