Mercurial > hg > CbC > CbC_gcc
diff gcc/config/aarch64/aarch64.md @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
line wrap: on
line diff
--- a/gcc/config/aarch64/aarch64.md Fri Oct 27 22:46:09 2017 +0900 +++ b/gcc/config/aarch64/aarch64.md Thu Oct 25 07:37:49 2018 +0900 @@ -1,5 +1,5 @@ ;; Machine description for AArch64 architecture. -;; Copyright (C) 2009-2017 Free Software Foundation, Inc. +;; Copyright (C) 2009-2018 Free Software Foundation, Inc. ;; Contributed by ARM Ltd. ;; ;; This file is part of GCC. @@ -57,12 +57,63 @@ (LR_REGNUM 30) (SP_REGNUM 31) (V0_REGNUM 32) + (V1_REGNUM 33) + (V2_REGNUM 34) + (V3_REGNUM 35) + (V4_REGNUM 36) + (V5_REGNUM 37) + (V6_REGNUM 38) + (V7_REGNUM 39) + (V8_REGNUM 40) + (V9_REGNUM 41) + (V10_REGNUM 42) + (V11_REGNUM 43) + (V12_REGNUM 44) + (V13_REGNUM 45) + (V14_REGNUM 46) (V15_REGNUM 47) + (V16_REGNUM 48) + (V17_REGNUM 49) + (V18_REGNUM 50) + (V19_REGNUM 51) + (V20_REGNUM 52) + (V21_REGNUM 53) + (V22_REGNUM 54) + (V23_REGNUM 55) + (V24_REGNUM 56) + (V25_REGNUM 57) + (V26_REGNUM 58) + (V27_REGNUM 59) + (V28_REGNUM 60) + (V29_REGNUM 61) + (V30_REGNUM 62) (V31_REGNUM 63) (LAST_SAVED_REGNUM 63) (SFP_REGNUM 64) (AP_REGNUM 65) (CC_REGNUM 66) + ;; Defined only to make the DWARF description simpler. + (VG_REGNUM 67) + (P0_REGNUM 68) + (P1_REGNUM 69) + (P2_REGNUM 70) + (P3_REGNUM 71) + (P4_REGNUM 72) + (P5_REGNUM 73) + (P6_REGNUM 74) + (P7_REGNUM 75) + (P8_REGNUM 76) + (P9_REGNUM 77) + (P10_REGNUM 78) + (P11_REGNUM 79) + (P12_REGNUM 80) + (P13_REGNUM 81) + (P14_REGNUM 82) + (P15_REGNUM 83) + ;; A couple of call-clobbered registers that we need to reserve when + ;; tracking speculation this is not ABI, so is subject to change. + (SPECULATION_TRACKER_REGNUM 15) + (SPECULATION_SCRATCH_REGNUM 14) ] ) @@ -114,7 +165,11 @@ UNSPEC_PACI1716 UNSPEC_PACISP UNSPEC_PRLG_STK + UNSPEC_REV UNSPEC_RBIT + UNSPEC_SABAL + UNSPEC_SABDL2 + UNSPEC_SADALP UNSPEC_SCVTF UNSPEC_SISD_NEG UNSPEC_SISD_SSHL @@ -133,6 +188,9 @@ UNSPEC_TLSLE24 UNSPEC_TLSLE32 UNSPEC_TLSLE48 + UNSPEC_UABAL + UNSPEC_UABDL2 + UNSPEC_UADALP UNSPEC_UCVTF UNSPEC_USHL_2S UNSPEC_VSTRUCTDUMMY @@ -143,6 +201,27 @@ UNSPEC_RSQRTS UNSPEC_NZCV UNSPEC_XPACLRI + UNSPEC_LD1_SVE + UNSPEC_ST1_SVE + UNSPEC_LD1RQ + UNSPEC_LD1_GATHER + UNSPEC_ST1_SCATTER + UNSPEC_MERGE_PTRUE + UNSPEC_PTEST_PTRUE + UNSPEC_UNPACKSHI + UNSPEC_UNPACKUHI + UNSPEC_UNPACKSLO + UNSPEC_UNPACKULO + UNSPEC_PACK + UNSPEC_FLOAT_CONVERT + UNSPEC_WHILE_LO + UNSPEC_LDN + UNSPEC_STN + UNSPEC_INSR + UNSPEC_CLASTB + UNSPEC_FADDA + UNSPEC_REV_SUBREG + UNSPEC_SPECULATION_TRACKER ]) (define_c_enum "unspecv" [ @@ -153,6 +232,7 @@ UNSPECV_SET_FPSR ; Represent assign of FPSR content. UNSPECV_BLOCKAGE ; Represent a blockage UNSPECV_PROBE_STACK_RANGE ; Represent stack range probing. + UNSPECV_SPECULATION_BARRIER ; Represent speculation barrier. ] ) @@ -179,38 +259,54 @@ ;; FP or SIMD registers then the pattern predicate should include TARGET_FLOAT ;; or TARGET_SIMD. +;; Attributes of the architecture required to support the instruction (or +;; alternative). This attribute is used to compute attribute "enabled", use type +;; "any" to enable an alternative in all cases. + +(define_enum "arches" [ any rcpc8_4 fp simd sve fp16]) + +(define_enum_attr "arch" "arches" (const_string "any")) + +;; [For compatibility with Arm in pipeline models] ;; Attribute that specifies whether or not the instruction touches fp -;; registers. When this is set to yes for an alternative, that alternative -;; will be disabled when !TARGET_FLOAT. -(define_attr "fp" "no,yes" (const_string "no")) - -;; Attribute that specifies whether or not the instruction touches half -;; precision fp registers. When this is set to yes for an alternative, -;; that alternative will be disabled when !TARGET_FP_F16INST. -(define_attr "fp16" "no,yes" (const_string "no")) - -;; Attribute that specifies whether or not the instruction touches simd -;; registers. When this is set to yes for an alternative, that alternative -;; will be disabled when !TARGET_SIMD. -(define_attr "simd" "no,yes" (const_string "no")) - -(define_attr "length" "" - (const_int 4)) +;; registers. +;; Note that this attribute is not used anywhere in either the arm or aarch64 +;; backends except in the scheduling description for xgene1. In that +;; scheduling description this attribute is used to subclass the load_4 and +;; load_8 types. +(define_attr "fp" "no,yes" + (if_then_else + (eq_attr "arch" "fp") + (const_string "yes") + (const_string "no"))) + +(define_attr "arch_enabled" "no,yes" + (if_then_else + (ior + (eq_attr "arch" "any") + + (and (eq_attr "arch" "rcpc8_4") + (match_test "AARCH64_ISA_RCPC8_4")) + + (and (eq_attr "arch" "fp") + (match_test "TARGET_FLOAT")) + + (and (eq_attr "arch" "simd") + (match_test "TARGET_SIMD")) + + (and (eq_attr "arch" "fp16") + (match_test "TARGET_FP_F16INST")) + + (and (eq_attr "arch" "sve") + (match_test "TARGET_SVE"))) + (const_string "yes") + (const_string "no"))) ;; Attribute that controls whether an alternative is enabled or not. ;; Currently it is only used to disable alternatives which touch fp or simd -;; registers when -mgeneral-regs-only is specified. -(define_attr "enabled" "no,yes" - (cond [(ior - (ior - (and (eq_attr "fp" "yes") - (eq (symbol_ref "TARGET_FLOAT") (const_int 0))) - (and (eq_attr "simd" "yes") - (eq (symbol_ref "TARGET_SIMD") (const_int 0)))) - (and (eq_attr "fp16" "yes") - (eq (symbol_ref "TARGET_FP_F16INST") (const_int 0)))) - (const_string "no") - ] (const_string "yes"))) +;; registers when -mgeneral-regs-only is specified or to require a special +;; architecture support. +(define_attr "enabled" "no,yes" (attr "arch_enabled")) ;; Attribute that specifies whether we are dealing with a branch to a ;; label that is far away, i.e. further away than the maximum/minimum @@ -219,10 +315,23 @@ ;; 1 :=: yes (define_attr "far_branch" "" (const_int 0)) +;; Attribute that specifies whether the alternative uses MOVPRFX. +(define_attr "movprfx" "no,yes" (const_string "no")) + +(define_attr "length" "" + (cond [(eq_attr "movprfx" "yes") + (const_int 8) + ] (const_int 4))) + ;; Strictly for compatibility with AArch32 in pipeline models, since AArch64 has ;; no predicated insns. (define_attr "predicated" "yes,no" (const_string "no")) +;; Set to true on an insn that requires the speculation tracking state to be +;; in the tracking register before the insn issues. Otherwise the compiler +;; may chose to hold the tracking state encoded in SP. +(define_attr "speculation_barrier" "true,false" (const_string "false")) + ;; ------------------------------------------------------------------- ;; Pipeline descriptions and scheduling ;; ------------------------------------------------------------------- @@ -626,7 +735,7 @@ (const_int 0)) (label_ref (match_operand 1 "" "")) (pc)))] - "" + "!aarch64_track_speculation" { if (get_attr_length (insn) == 8) return aarch64_gen_far_branch (operands, 1, "Lcb", "<inv_cb>\\t%<w>0, "); @@ -656,7 +765,7 @@ (label_ref (match_operand 2 "" "")) (pc))) (clobber (reg:CC CC_REGNUM))] - "" + "!aarch64_track_speculation" { if (get_attr_length (insn) == 8) { @@ -692,7 +801,7 @@ (label_ref (match_operand 1 "" "")) (pc))) (clobber (reg:CC CC_REGNUM))] - "" + "!aarch64_track_speculation" { if (get_attr_length (insn) == 8) { @@ -749,7 +858,7 @@ "" "@ blr\\t%0 - bl\\t%a0" + bl\\t%c0" [(set_attr "type" "call, call")] ) @@ -775,7 +884,7 @@ "" "@ blr\\t%1 - bl\\t%a1" + bl\\t%c1" [(set_attr "type" "call, call")] ) @@ -811,7 +920,7 @@ "SIBLING_CALL_P (insn)" "@ br\\t%0 - b\\t%a0" + b\\t%c0" [(set_attr "type" "branch, branch")] ) @@ -824,7 +933,7 @@ "SIBLING_CALL_P (insn)" "@ br\\t%1 - b\\t%a1" + b\\t%c1" [(set_attr "type" "branch, branch")] ) @@ -866,12 +975,18 @@ " if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx) operands[1] = force_reg (<MODE>mode, operands[1]); + + if (GET_CODE (operands[1]) == CONST_POLY_INT) + { + aarch64_expand_mov_immediate (operands[0], operands[1]); + DONE; + } " ) (define_insn "*mov<mode>_aarch64" - [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, *w,r,*w, m, m, r,*w,*w") - (match_operand:SHORT 1 "general_operand" " r,M,D<hq>,m, m,rZ,*w,*w, r,*w"))] + [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, w,r ,r,w, m,m,r,w,w") + (match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D<hq>,Usv,m,m,rZ,w,w,r,w"))] "(register_operand (operands[0], <MODE>mode) || aarch64_reg_or_zero (operands[1], <MODE>mode))" { @@ -885,26 +1000,29 @@ return aarch64_output_scalar_simd_mov_immediate (operands[1], <MODE>mode); case 3: + return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]); + case 4: return "ldr<size>\t%w0, %1"; - case 4: + case 5: return "ldr\t%<size>0, %1"; - case 5: - return "str<size>\t%w1, %0"; case 6: + return "str<size>\t%w1, %0"; + case 7: return "str\t%<size>1, %0"; - case 7: + case 8: return "umov\t%w0, %1.<v>[0]"; - case 8: + case 9: return "dup\t%0.<Vallxd>, %w1"; - case 9: + case 10: return "dup\t%<Vetype>0, %1.<v>[0]"; default: gcc_unreachable (); } } - [(set_attr "type" "mov_reg,mov_imm,neon_move,load_4,load_4,store_4,store_4,\ - neon_to_gp<q>,neon_from_gp<q>,neon_dup") - (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")] + ;; The "mov_imm" type for CNT is just a placeholder. + [(set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4, + store_4,neon_to_gp<q>,neon_from_gp<q>,neon_dup") + (set_attr "arch" "*,*,simd,sve,*,*,*,*,simd,simd,simd")] ) (define_expand "mov<mode>" @@ -932,8 +1050,8 @@ ) (define_insn_and_split "*movsi_aarch64" - [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r,w, m, m, r, r, w,r,w, w") - (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,m,m,rZ,*w,Usa,Ush,rZ,w,w,Ds"))] + [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m, r, r, w,r,w, w") + (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,Usv,m,m,rZ,w,Usa,Ush,rZ,w,w,Ds"))] "(register_operand (operands[0], SImode) || aarch64_reg_or_zero (operands[1], SImode))" "@ @@ -942,11 +1060,12 @@ mov\\t%w0, %w1 mov\\t%w0, %1 # + * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]); ldr\\t%w0, %1 ldr\\t%s0, %1 str\\t%w1, %0 str\\t%s1, %0 - adr\\t%x0, %a1 + adr\\t%x0, %c1 adrp\\t%x0, %A1 fmov\\t%s0, %w1 fmov\\t%w0, %s1 @@ -959,15 +1078,15 @@ aarch64_expand_mov_immediate (operands[0], operands[1]); DONE; }" - [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load_4,load_4,store_4,store_4,\ - adr,adr,f_mcr,f_mrc,fmov,neon_move") - (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") - (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] + ;; The "mov_imm" type for CNT is just a placeholder. + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4, + load_4,store_4,store_4,adr,adr,f_mcr,f_mrc,fmov,neon_move") + (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,fp,fp,fp,simd")] ) (define_insn_and_split "*movdi_aarch64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r,w, m,m, r, r, w,r,w, w") - (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,m,m,rZ,w,Usa,Ush,rZ,w,w,Dd"))] + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, m,m, r, r, w,r,w, w") + (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,Usv,m,m,rZ,w,Usa,Ush,rZ,w,w,Dd"))] "(register_operand (operands[0], DImode) || aarch64_reg_or_zero (operands[1], DImode))" "@ @@ -977,11 +1096,12 @@ mov\\t%x0, %1 mov\\t%w0, %1 # + * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]); ldr\\t%x0, %1 ldr\\t%d0, %1 str\\t%x1, %0 str\\t%d1, %0 - adr\\t%x0, %a1 + adr\\t%x0, %c1 adrp\\t%x0, %A1 fmov\\t%d0, %x1 fmov\\t%x0, %d1 @@ -994,10 +1114,11 @@ aarch64_expand_mov_immediate (operands[0], operands[1]); DONE; }" - [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_8,\ - load_8,store_8,store_8,adr,adr,f_mcr,f_mrc,fmov,neon_move") - (set_attr "fp" "*,*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") - (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] + ;; The "mov_imm" type for CNTD is just a placeholder. + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,mov_imm, + load_8,load_8,store_8,store_8,adr,adr,f_mcr,f_mrc,fmov, + neon_move") + (set_attr "arch" "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,fp,fp,fp,simd")] ) (define_insn "insv_imm<mode>" @@ -1018,14 +1139,22 @@ " if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx) operands[1] = force_reg (TImode, operands[1]); + + if (GET_CODE (operands[1]) == CONST_POLY_INT) + { + emit_move_insn (gen_lowpart (DImode, operands[0]), + gen_lowpart (DImode, operands[1])); + emit_move_insn (gen_highpart (DImode, operands[0]), const0_rtx); + DONE; + } " ) (define_insn "*movti_aarch64" [(set (match_operand:TI 0 - "nonimmediate_operand" "=r, w,r,w,r,m,m,w,m") + "nonimmediate_operand" "= r,w, r,w,r,m,m,w,m") (match_operand:TI 1 - "aarch64_movti_operand" " rn,r,w,w,m,r,Z,m,w"))] + "aarch64_movti_operand" " rUti,r, w,w,m,r,Z,m,w"))] "(register_operand (operands[0], TImode) || aarch64_reg_or_zero (operands[1], TImode))" "@ @@ -1042,8 +1171,7 @@ load_16,store_16,store_16,\ load_16,store_16") (set_attr "length" "8,8,8,4,4,4,4,4,4") - (set_attr "simd" "*,*,*,yes,*,*,*,*,*") - (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")] + (set_attr "arch" "*,*,*,simd,*,*,*,fp,fp")] ) ;; Split a TImode register-register or register-immediate move into @@ -1066,7 +1194,7 @@ { if (!TARGET_FLOAT) { - aarch64_err_no_fpadvsimd (<MODE>mode, "code"); + aarch64_err_no_fpadvsimd (<MODE>mode); FAIL; } @@ -1078,26 +1206,26 @@ ) (define_insn "*movhf_aarch64" - [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r") - (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r"))] + [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w , w,?r,w,w ,w ,w,m,r,m ,r") + (match_operand:HF 1 "general_operand" "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))] "TARGET_FLOAT && (register_operand (operands[0], HFmode) || aarch64_reg_or_fp_zero (operands[1], HFmode))" "@ movi\\t%0.4h, #0 fmov\\t%h0, %w1 + dup\\t%w0.4h, %w1 umov\\t%w0, %1.h[0] mov\\t%0.h[0], %1.h[0] fmov\\t%h0, %1 - * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode); + * return aarch64_output_scalar_simd_mov_immediate (operands[1], HImode); ldr\\t%h0, %1 str\\t%h1, %0 ldrh\\t%w0, %1 strh\\t%w1, %0 mov\\t%w0, %w1" - [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \ + [(set_attr "type" "neon_move,f_mcr,neon_move,neon_to_gp, neon_move,fconsts, \ neon_move,f_loads,f_stores,load_4,store_4,mov_reg") - (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*") - (set_attr "fp16" "*,yes,*,*,yes,*,*,*,*,*,*")] + (set_attr "arch" "simd,fp16,simd,simd,simd,fp16,simd,*,*,*,*,*")] ) (define_insn "*movsf_aarch64" @@ -1121,7 +1249,7 @@ [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,neon_move,\ f_loads,f_stores,load_4,store_4,mov_reg,\ fconsts") - (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")] + (set_attr "arch" "simd,*,*,*,*,simd,*,*,*,*,*,*")] ) (define_insn "*movdf_aarch64" @@ -1145,7 +1273,7 @@ [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\ f_loadd,f_stored,load_8,store_8,mov_reg,\ fconstd") - (set_attr "simd" "yes,*,*,*,*,yes,*,*,*,*,*,*")] + (set_attr "arch" "simd,*,*,*,*,simd,*,*,*,*,*,*")] ) (define_split @@ -1190,7 +1318,7 @@ [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\ f_loadd,f_stored,load_16,store_16,store_16") (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4") - (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")] + (set_attr "arch" "simd,*,*,*,simd,*,*,*,*,*,*")] ) (define_split @@ -1224,139 +1352,72 @@ ;; Operands 1 and 3 are tied together by the final condition; so we allow ;; fairly lax checking on the second memory operation. -(define_insn "load_pairsi" - [(set (match_operand:SI 0 "register_operand" "=r,*w") - (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump,Ump")) - (set (match_operand:SI 2 "register_operand" "=r,*w") - (match_operand:SI 3 "memory_operand" "m,m"))] - "rtx_equal_p (XEXP (operands[3], 0), - plus_constant (Pmode, - XEXP (operands[1], 0), - GET_MODE_SIZE (SImode)))" +(define_insn "load_pair_sw_<SX:mode><SX2:mode>" + [(set (match_operand:SX 0 "register_operand" "=r,w") + (match_operand:SX 1 "aarch64_mem_pair_operand" "Ump,Ump")) + (set (match_operand:SX2 2 "register_operand" "=r,w") + (match_operand:SX2 3 "memory_operand" "m,m"))] + "rtx_equal_p (XEXP (operands[3], 0), + plus_constant (Pmode, + XEXP (operands[1], 0), + GET_MODE_SIZE (<SX:MODE>mode)))" "@ ldp\\t%w0, %w2, %1 ldp\\t%s0, %s2, %1" [(set_attr "type" "load_8,neon_load1_2reg") - (set_attr "fp" "*,yes")] -) - -(define_insn "load_pairdi" - [(set (match_operand:DI 0 "register_operand" "=r,*w") - (match_operand:DI 1 "aarch64_mem_pair_operand" "Ump,Ump")) - (set (match_operand:DI 2 "register_operand" "=r,*w") - (match_operand:DI 3 "memory_operand" "m,m"))] - "rtx_equal_p (XEXP (operands[3], 0), - plus_constant (Pmode, - XEXP (operands[1], 0), - GET_MODE_SIZE (DImode)))" + (set_attr "arch" "*,fp")] +) + +;; Storing different modes that can still be merged +(define_insn "load_pair_dw_<DX:mode><DX2:mode>" + [(set (match_operand:DX 0 "register_operand" "=r,w") + (match_operand:DX 1 "aarch64_mem_pair_operand" "Ump,Ump")) + (set (match_operand:DX2 2 "register_operand" "=r,w") + (match_operand:DX2 3 "memory_operand" "m,m"))] + "rtx_equal_p (XEXP (operands[3], 0), + plus_constant (Pmode, + XEXP (operands[1], 0), + GET_MODE_SIZE (<DX:MODE>mode)))" "@ ldp\\t%x0, %x2, %1 ldp\\t%d0, %d2, %1" [(set_attr "type" "load_16,neon_load1_2reg") - (set_attr "fp" "*,yes")] -) - + (set_attr "arch" "*,fp")] +) ;; Operands 0 and 2 are tied together by the final condition; so we allow ;; fairly lax checking on the second memory operation. -(define_insn "store_pairsi" - [(set (match_operand:SI 0 "aarch64_mem_pair_operand" "=Ump,Ump") - (match_operand:SI 1 "aarch64_reg_or_zero" "rZ,*w")) - (set (match_operand:SI 2 "memory_operand" "=m,m") - (match_operand:SI 3 "aarch64_reg_or_zero" "rZ,*w"))] - "rtx_equal_p (XEXP (operands[2], 0), - plus_constant (Pmode, - XEXP (operands[0], 0), - GET_MODE_SIZE (SImode)))" +(define_insn "store_pair_sw_<SX:mode><SX2:mode>" + [(set (match_operand:SX 0 "aarch64_mem_pair_operand" "=Ump,Ump") + (match_operand:SX 1 "aarch64_reg_zero_or_fp_zero" "rYZ,w")) + (set (match_operand:SX2 2 "memory_operand" "=m,m") + (match_operand:SX2 3 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))] + "rtx_equal_p (XEXP (operands[2], 0), + plus_constant (Pmode, + XEXP (operands[0], 0), + GET_MODE_SIZE (<SX:MODE>mode)))" "@ stp\\t%w1, %w3, %0 stp\\t%s1, %s3, %0" [(set_attr "type" "store_8,neon_store1_2reg") - (set_attr "fp" "*,yes")] -) - -(define_insn "store_pairdi" - [(set (match_operand:DI 0 "aarch64_mem_pair_operand" "=Ump,Ump") - (match_operand:DI 1 "aarch64_reg_or_zero" "rZ,*w")) - (set (match_operand:DI 2 "memory_operand" "=m,m") - (match_operand:DI 3 "aarch64_reg_or_zero" "rZ,*w"))] - "rtx_equal_p (XEXP (operands[2], 0), - plus_constant (Pmode, - XEXP (operands[0], 0), - GET_MODE_SIZE (DImode)))" + (set_attr "arch" "*,fp")] +) + +;; Storing different modes that can still be merged +(define_insn "store_pair_dw_<DX:mode><DX2:mode>" + [(set (match_operand:DX 0 "aarch64_mem_pair_operand" "=Ump,Ump") + (match_operand:DX 1 "aarch64_reg_zero_or_fp_zero" "rYZ,w")) + (set (match_operand:DX2 2 "memory_operand" "=m,m") + (match_operand:DX2 3 "aarch64_reg_zero_or_fp_zero" "rYZ,w"))] + "rtx_equal_p (XEXP (operands[2], 0), + plus_constant (Pmode, + XEXP (operands[0], 0), + GET_MODE_SIZE (<DX:MODE>mode)))" "@ stp\\t%x1, %x3, %0 stp\\t%d1, %d3, %0" [(set_attr "type" "store_16,neon_store1_2reg") - (set_attr "fp" "*,yes")] -) - -;; Operands 1 and 3 are tied together by the final condition; so we allow -;; fairly lax checking on the second memory operation. -(define_insn "load_pairsf" - [(set (match_operand:SF 0 "register_operand" "=w,*r") - (match_operand:SF 1 "aarch64_mem_pair_operand" "Ump,Ump")) - (set (match_operand:SF 2 "register_operand" "=w,*r") - (match_operand:SF 3 "memory_operand" "m,m"))] - "rtx_equal_p (XEXP (operands[3], 0), - plus_constant (Pmode, - XEXP (operands[1], 0), - GET_MODE_SIZE (SFmode)))" - "@ - ldp\\t%s0, %s2, %1 - ldp\\t%w0, %w2, %1" - [(set_attr "type" "neon_load1_2reg,load_8") - (set_attr "fp" "yes,*")] -) - -(define_insn "load_pairdf" - [(set (match_operand:DF 0 "register_operand" "=w,*r") - (match_operand:DF 1 "aarch64_mem_pair_operand" "Ump,Ump")) - (set (match_operand:DF 2 "register_operand" "=w,*r") - (match_operand:DF 3 "memory_operand" "m,m"))] - "rtx_equal_p (XEXP (operands[3], 0), - plus_constant (Pmode, - XEXP (operands[1], 0), - GET_MODE_SIZE (DFmode)))" - "@ - ldp\\t%d0, %d2, %1 - ldp\\t%x0, %x2, %1" - [(set_attr "type" "neon_load1_2reg,load_16") - (set_attr "fp" "yes,*")] -) - -;; Operands 0 and 2 are tied together by the final condition; so we allow -;; fairly lax checking on the second memory operation. -(define_insn "store_pairsf" - [(set (match_operand:SF 0 "aarch64_mem_pair_operand" "=Ump,Ump") - (match_operand:SF 1 "aarch64_reg_or_fp_zero" "w,*rY")) - (set (match_operand:SF 2 "memory_operand" "=m,m") - (match_operand:SF 3 "aarch64_reg_or_fp_zero" "w,*rY"))] - "rtx_equal_p (XEXP (operands[2], 0), - plus_constant (Pmode, - XEXP (operands[0], 0), - GET_MODE_SIZE (SFmode)))" - "@ - stp\\t%s1, %s3, %0 - stp\\t%w1, %w3, %0" - [(set_attr "type" "neon_store1_2reg,store_8") - (set_attr "fp" "yes,*")] -) - -(define_insn "store_pairdf" - [(set (match_operand:DF 0 "aarch64_mem_pair_operand" "=Ump,Ump") - (match_operand:DF 1 "aarch64_reg_or_fp_zero" "w,*rY")) - (set (match_operand:DF 2 "memory_operand" "=m,m") - (match_operand:DF 3 "aarch64_reg_or_fp_zero" "w,*rY"))] - "rtx_equal_p (XEXP (operands[2], 0), - plus_constant (Pmode, - XEXP (operands[0], 0), - GET_MODE_SIZE (DFmode)))" - "@ - stp\\t%d1, %d3, %0 - stp\\t%x1, %x3, %0" - [(set_attr "type" "neon_store1_2reg,store_16") - (set_attr "fp" "yes,*")] + (set_attr "arch" "*,fp")] ) ;; Load pair with post-index writeback. This is primarily used in function @@ -1459,26 +1520,34 @@ ) (define_insn "*zero_extendsidi2_aarch64" - [(set (match_operand:DI 0 "register_operand" "=r,r") - (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m")))] + [(set (match_operand:DI 0 "register_operand" "=r,r,w,w,r,w") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m,r,m,w,w")))] "" "@ uxtw\t%0, %w1 - ldr\t%w0, %1" - [(set_attr "type" "extend,load_4")] + ldr\t%w0, %1 + fmov\t%s0, %w1 + ldr\t%s0, %1 + fmov\t%w0, %s1 + fmov\t%s0, %s1" + [(set_attr "type" "extend,load_4,f_mcr,f_loads,f_mrc,fmov") + (set_attr "arch" "*,*,fp,fp,fp,fp")] ) (define_insn "*load_pair_zero_extendsidi2_aarch64" - [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump"))) - (set (match_operand:DI 2 "register_operand" "=r") - (zero_extend:DI (match_operand:SI 3 "memory_operand" "m")))] + [(set (match_operand:DI 0 "register_operand" "=r,w") + (zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump,Ump"))) + (set (match_operand:DI 2 "register_operand" "=r,w") + (zero_extend:DI (match_operand:SI 3 "memory_operand" "m,m")))] "rtx_equal_p (XEXP (operands[3], 0), plus_constant (Pmode, XEXP (operands[1], 0), GET_MODE_SIZE (SImode)))" - "ldp\\t%w0, %w2, %1" - [(set_attr "type" "load_8")] + "@ + ldp\t%w0, %w2, %1 + ldp\t%s0, %s2, %1" + [(set_attr "type" "load_8,neon_load1_2reg") + (set_attr "arch" "*,fp")] ) (define_expand "<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2" @@ -1498,14 +1567,15 @@ ) (define_insn "*zero_extend<SHORT:mode><GPI:mode>2_aarch64" - [(set (match_operand:GPI 0 "register_operand" "=r,r,*w") + [(set (match_operand:GPI 0 "register_operand" "=r,r,w") (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,m")))] "" "@ and\t%<GPI:w>0, %<GPI:w>1, <SHORT:short_mask> ldr<SHORT:size>\t%w0, %1 ldr\t%<SHORT:size>0, %1" - [(set_attr "type" "logic_imm,load_4,load_4")] + [(set_attr "type" "logic_imm,load_4,f_loads") + (set_attr "arch" "*,*,fp")] ) (define_expand "<optab>qihi2" @@ -1542,7 +1612,7 @@ [(set (match_operand:GPI 0 "register_operand" "") (plus:GPI (match_operand:GPI 1 "register_operand" "") - (match_operand:GPI 2 "aarch64_pluslong_operand" "")))] + (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand" "")))] "" { /* If operands[1] is a subreg extract the inner RTX. */ @@ -1555,23 +1625,34 @@ && (!REG_P (op1) || !REGNO_PTR_FRAME_P (REGNO (op1)))) operands[2] = force_reg (<MODE>mode, operands[2]); + /* Expand polynomial additions now if the destination is the stack + pointer, since we don't want to use that as a temporary. */ + else if (operands[0] == stack_pointer_rtx + && aarch64_split_add_offset_immediate (operands[2], <MODE>mode)) + { + aarch64_split_add_offset (<MODE>mode, operands[0], operands[1], + operands[2], NULL_RTX, NULL_RTX); + DONE; + } }) (define_insn "*add<mode>3_aarch64" [(set - (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r") + (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,rk") (plus:GPI - (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk") - (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Upl")))] + (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,rk") + (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uav")))] "" "@ add\\t%<w>0, %<w>1, %2 add\\t%<w>0, %<w>1, %<w>2 add\\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas> sub\\t%<w>0, %<w>1, #%n2 - #" - [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple") - (set_attr "simd" "*,*,yes,*,*")] + # + * return aarch64_output_sve_addvl_addpl (operands[0], operands[1], operands[2]);" + ;; The "alu_imm" type for ADDVL/ADDPL is just a placeholder. + [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm") + (set_attr "arch" "*,*,simd,*,*,*")] ) ;; zero_extend version of above @@ -1580,7 +1661,7 @@ (match_operand:DI 0 "register_operand" "=rk,rk,rk,r") (zero_extend:DI (plus:SI (match_operand:SI 1 "register_operand" "%rk,rk,rk,rk") - (match_operand:SI 2 "aarch64_pluslong_operand" "I,r,J,Upl"))))] + (match_operand:SI 2 "aarch64_pluslong_operand" "I,r,J,Uaa"))))] "" "@ add\\t%w0, %w1, %2 @@ -1633,6 +1714,48 @@ } ) +;; Match addition of polynomial offsets that require one temporary, for which +;; we can use the early-clobbered destination register. This is a separate +;; pattern so that the early clobber doesn't affect register allocation +;; for other forms of addition. However, we still need to provide an +;; all-register alternative, in case the offset goes out of range after +;; elimination. For completeness we might as well provide all GPR-based +;; alternatives from the main pattern. +;; +;; We don't have a pattern for additions requiring two temporaries since at +;; present LRA doesn't allow new scratches to be added during elimination. +;; Such offsets should be rare anyway. +;; +;; ??? But if we added LRA support for new scratches, much of the ugliness +;; here would go away. We could just handle all polynomial constants in +;; this pattern. +(define_insn_and_split "*add<mode>3_poly_1" + [(set + (match_operand:GPI 0 "register_operand" "=r,r,r,r,r,&r") + (plus:GPI + (match_operand:GPI 1 "register_operand" "%rk,rk,rk,rk,rk,rk") + (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand" "I,r,J,Uaa,Uav,Uat")))] + "TARGET_SVE && operands[0] != stack_pointer_rtx" + "@ + add\\t%<w>0, %<w>1, %2 + add\\t%<w>0, %<w>1, %<w>2 + sub\\t%<w>0, %<w>1, #%n2 + # + * return aarch64_output_sve_addvl_addpl (operands[0], operands[1], operands[2]); + #" + "&& epilogue_completed + && !reg_overlap_mentioned_p (operands[0], operands[1]) + && aarch64_split_add_offset_immediate (operands[2], <MODE>mode)" + [(const_int 0)] + { + aarch64_split_add_offset (<MODE>mode, operands[0], operands[1], + operands[2], operands[0], NULL_RTX); + DONE; + } + ;; The "alu_imm" type for ADDVL/ADDPL is just a placeholder. + [(set_attr "type" "alu_imm,alu_sreg,alu_imm,multiple,alu_imm,multiple")] +) + (define_split [(set (match_operand:DI 0 "register_operand") (zero_extend:DI @@ -1651,25 +1774,133 @@ } ) +(define_expand "addv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (label_ref (match_operand 3 "" ""))] + "" +{ + emit_insn (gen_add<mode>3_compareV (operands[0], operands[1], operands[2])); + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + + DONE; +}) + +(define_expand "uaddv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 2 "register_operand") + (label_ref (match_operand 3 "" ""))] + "" +{ + emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2])); + aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); + + DONE; +}) + (define_expand "addti3" [(set (match_operand:TI 0 "register_operand" "") (plus:TI (match_operand:TI 1 "register_operand" "") - (match_operand:TI 2 "register_operand" "")))] + (match_operand:TI 2 "aarch64_reg_or_imm" "")))] + "" +{ + rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high; + + aarch64_addti_scratch_regs (operands[1], operands[2], + &low_dest, &op1_low, &op2_low, + &high_dest, &op1_high, &op2_high); + + if (op2_low == const0_rtx) + { + low_dest = op1_low; + if (!aarch64_pluslong_operand (op2_high, DImode)) + op2_high = force_reg (DImode, op2_high); + emit_insn (gen_adddi3 (high_dest, op1_high, op2_high)); + } + else + { + emit_insn (gen_adddi3_compareC (low_dest, op1_low, + force_reg (DImode, op2_low))); + emit_insn (gen_adddi3_carryin (high_dest, op1_high, + force_reg (DImode, op2_high))); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest); + emit_move_insn (gen_highpart (DImode, operands[0]), high_dest); + + DONE; +}) + +(define_expand "addvti4" + [(match_operand:TI 0 "register_operand" "") + (match_operand:TI 1 "register_operand" "") + (match_operand:TI 2 "aarch64_reg_or_imm" "") + (label_ref (match_operand 3 "" ""))] "" { - rtx low = gen_reg_rtx (DImode); - emit_insn (gen_adddi3_compareC (low, gen_lowpart (DImode, operands[1]), - gen_lowpart (DImode, operands[2]))); - - rtx high = gen_reg_rtx (DImode); - emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]), - gen_highpart (DImode, operands[2]))); - - emit_move_insn (gen_lowpart (DImode, operands[0]), low); - emit_move_insn (gen_highpart (DImode, operands[0]), high); + rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high; + + aarch64_addti_scratch_regs (operands[1], operands[2], + &low_dest, &op1_low, &op2_low, + &high_dest, &op1_high, &op2_high); + + if (op2_low == const0_rtx) + { + low_dest = op1_low; + emit_insn (gen_adddi3_compareV (high_dest, op1_high, + force_reg (DImode, op2_high))); + } + else + { + emit_insn (gen_adddi3_compareC (low_dest, op1_low, + force_reg (DImode, op2_low))); + emit_insn (gen_adddi3_carryinV (high_dest, op1_high, + force_reg (DImode, op2_high))); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest); + emit_move_insn (gen_highpart (DImode, operands[0]), high_dest); + + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); DONE; }) +(define_expand "uaddvti4" + [(match_operand:TI 0 "register_operand" "") + (match_operand:TI 1 "register_operand" "") + (match_operand:TI 2 "aarch64_reg_or_imm" "") + (label_ref (match_operand 3 "" ""))] + "" +{ + rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high; + + aarch64_addti_scratch_regs (operands[1], operands[2], + &low_dest, &op1_low, &op2_low, + &high_dest, &op1_high, &op2_high); + + if (op2_low == const0_rtx) + { + low_dest = op1_low; + emit_insn (gen_adddi3_compareC (high_dest, op1_high, + force_reg (DImode, op2_high))); + } + else + { + emit_insn (gen_adddi3_compareC (low_dest, op1_low, + force_reg (DImode, op2_low))); + emit_insn (gen_adddi3_carryinC (high_dest, op1_high, + force_reg (DImode, op2_high))); + } + + emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest); + emit_move_insn (gen_highpart (DImode, operands[0]), high_dest); + + aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); + DONE; + }) + (define_insn "add<mode>3_compare0" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ @@ -1755,7 +1986,7 @@ (define_insn "add<mode>3_compareC" [(set (reg:CC_C CC_REGNUM) - (ne:CC_C + (compare:CC_C (plus:<DWI> (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) @@ -1768,10 +1999,71 @@ [(set_attr "type" "alus_sreg")] ) +(define_insn "*add<mode>3_compareV_cconly_imm" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (plus:<DWI> + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r")) + (match_operand:<DWI> 1 "const_scalar_int_operand" "")) + (sign_extend:<DWI> + (plus:GPI + (match_dup 0) + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")))))] + "INTVAL (operands[1]) == INTVAL (operands[2])" + "@ + cmn\\t%<w>0, %<w>1 + cmp\\t%<w>0, #%n1" + [(set_attr "type" "alus_imm")] +) + +(define_insn "*add<mode>3_compareV_cconly" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (plus:<DWI> + (sign_extend:<DWI> (match_operand:GPI 0 "register_operand" "r")) + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))] + "" + "cmn\\t%<w>0, %<w>1" + [(set_attr "type" "alus_sreg")] +) + +(define_insn "*add<mode>3_compareV_imm" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r,r")) + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")) + (sign_extend:<DWI> + (plus:GPI (match_dup 1) (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand" "=r,r") + (plus:GPI (match_dup 1) (match_dup 2)))] + "" + "@ + adds\\t%<w>0, %<w>1, %<w>2 + subs\\t%<w>0, %<w>1, #%n2" + [(set_attr "type" "alus_imm,alus_imm")] +) + +(define_insn "add<mode>3_compareV" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (plus:<DWI> + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")) + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) + (sign_extend:<DWI> (plus:GPI (match_dup 1) (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (match_dup 1) (match_dup 2)))] + "" + "adds\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "alus_sreg")] +) + (define_insn "*adds_shift_imm_<mode>" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ - (plus:GPI (ASHIFT:GPI + (plus:GPI (ASHIFT:GPI (match_operand:GPI 1 "register_operand" "r") (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n")) (match_operand:GPI 3 "register_operand" "r")) @@ -2138,6 +2430,138 @@ [(set_attr "type" "adc_reg")] ) +(define_expand "add<mode>3_carryinC" + [(parallel + [(set (match_dup 3) + (compare:CC_C + (plus:<DWI> + (plus:<DWI> + (match_dup 4) + (zero_extend:<DWI> + (match_operand:GPI 1 "register_operand" ""))) + (zero_extend:<DWI> + (match_operand:GPI 2 "register_operand" ""))) + (zero_extend:<DWI> + (plus:GPI + (plus:GPI (match_dup 5) (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 5) (match_dup 1)) + (match_dup 2)))])] + "" +{ + operands[3] = gen_rtx_REG (CC_Cmode, CC_REGNUM); + operands[4] = gen_rtx_NE (<DWI>mode, operands[3], const0_rtx); + operands[5] = gen_rtx_NE (<MODE>mode, operands[3], const0_rtx); +}) + +(define_insn "*add<mode>3_carryinC_zero" + [(set (reg:CC_C CC_REGNUM) + (compare:CC_C + (plus:<DWI> + (match_operand:<DWI> 2 "aarch64_carry_operation" "") + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (zero_extend:<DWI> + (plus:GPI + (match_operand:GPI 3 "aarch64_carry_operation" "") + (match_dup 1))))) + (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (match_dup 3) (match_dup 1)))] + "" + "adcs\\t%<w>0, %<w>1, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*add<mode>3_carryinC" + [(set (reg:CC_C CC_REGNUM) + (compare:CC_C + (plus:<DWI> + (plus:<DWI> + (match_operand:<DWI> 3 "aarch64_carry_operation" "") + (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) + (zero_extend:<DWI> + (plus:GPI + (plus:GPI + (match_operand:GPI 4 "aarch64_carry_operation" "") + (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2)))] + "" + "adcs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "adc_reg")] +) + +(define_expand "add<mode>3_carryinV" + [(parallel + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (plus:<DWI> + (plus:<DWI> + (match_dup 3) + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" ""))) + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand" ""))) + (sign_extend:<DWI> + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2)))])] + "" +{ + rtx cc = gen_rtx_REG (CC_Cmode, CC_REGNUM); + operands[3] = gen_rtx_NE (<DWI>mode, cc, const0_rtx); + operands[4] = gen_rtx_NE (<MODE>mode, cc, const0_rtx); +}) + +(define_insn "*add<mode>3_carryinV_zero" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (plus:<DWI> + (match_operand:<DWI> 2 "aarch64_carry_operation" "") + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> + (plus:GPI + (match_operand:GPI 3 "aarch64_carry_operation" "") + (match_dup 1))))) + (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (match_dup 3) (match_dup 1)))] + "" + "adcs\\t%<w>0, %<w>1, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*add<mode>3_carryinV" + [(set (reg:CC_V CC_REGNUM) + (compare:CC_V + (plus:<DWI> + (plus:<DWI> + (match_operand:<DWI> 3 "aarch64_carry_operation" "") + (sign_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))) + (sign_extend:<DWI> (match_operand:GPI 2 "register_operand" "r"))) + (sign_extend:<DWI> + (plus:GPI + (plus:GPI + (match_operand:GPI 4 "aarch64_carry_operation" "") + (match_dup 1)) + (match_dup 2))))) + (set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI + (plus:GPI (match_dup 4) (match_dup 1)) + (match_dup 2)))] + "" + "adcs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "adc_reg")] +) + (define_insn "*add_uxt<mode>_shift2" [(set (match_operand:GPI 0 "register_operand" "=rk") (plus:GPI (and:GPI @@ -2231,25 +2655,90 @@ sub\\t%x0, %x1, %x2 sub\\t%d0, %d1, %d2" [(set_attr "type" "alu_sreg, neon_sub") - (set_attr "simd" "*,yes")] -) + (set_attr "arch" "*,simd")] +) + +(define_expand "subv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "aarch64_reg_or_zero") + (match_operand:GPI 2 "aarch64_reg_or_zero") + (label_ref (match_operand 3 "" ""))] + "" +{ + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2])); + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + + DONE; +}) + +(define_expand "usubv<mode>4" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "aarch64_reg_or_zero") + (match_operand:GPI 2 "aarch64_reg_or_zero") + (label_ref (match_operand 3 "" ""))] + "" +{ + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2])); + aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]); + + DONE; +}) (define_expand "subti3" [(set (match_operand:TI 0 "register_operand" "") - (minus:TI (match_operand:TI 1 "register_operand" "") + (minus:TI (match_operand:TI 1 "aarch64_reg_or_zero" "") (match_operand:TI 2 "register_operand" "")))] "" { - rtx low = gen_reg_rtx (DImode); - emit_insn (gen_subdi3_compare1 (low, gen_lowpart (DImode, operands[1]), - gen_lowpart (DImode, operands[2]))); - - rtx high = gen_reg_rtx (DImode); - emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]), - gen_highpart (DImode, operands[2]))); - - emit_move_insn (gen_lowpart (DImode, operands[0]), low); - emit_move_insn (gen_highpart (DImode, operands[0]), high); + rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high; + + aarch64_subvti_scratch_regs (operands[1], operands[2], + &low_dest, &op1_low, &op2_low, + &high_dest, &op1_high, &op2_high); + + emit_insn (gen_subdi3_compare1 (low_dest, op1_low, op2_low)); + emit_insn (gen_subdi3_carryin (high_dest, op1_high, op2_high)); + + emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest); + emit_move_insn (gen_highpart (DImode, operands[0]), high_dest); + DONE; +}) + +(define_expand "subvti4" + [(match_operand:TI 0 "register_operand") + (match_operand:TI 1 "aarch64_reg_or_zero") + (match_operand:TI 2 "aarch64_reg_or_imm") + (label_ref (match_operand 3 "" ""))] + "" +{ + rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high; + + aarch64_subvti_scratch_regs (operands[1], operands[2], + &low_dest, &op1_low, &op2_low, + &high_dest, &op1_high, &op2_high); + aarch64_expand_subvti (operands[0], low_dest, op1_low, op2_low, + high_dest, op1_high, op2_high); + + aarch64_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + DONE; +}) + +(define_expand "usubvti4" + [(match_operand:TI 0 "register_operand") + (match_operand:TI 1 "aarch64_reg_or_zero") + (match_operand:TI 2 "aarch64_reg_or_imm") + (label_ref (match_operand 3 "" ""))] + "" +{ + rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high; + + aarch64_subvti_scratch_regs (operands[1], operands[2], + &low_dest, &op1_low, &op2_low, + &high_dest, &op1_high, &op2_high); + aarch64_expand_subvti (operands[0], low_dest, op1_low, op2_low, + high_dest, op1_high, op2_high); + + aarch64_gen_unlikely_cbranch (LTU, CCmode, operands[3]); DONE; }) @@ -2278,6 +2767,22 @@ [(set_attr "type" "alus_sreg")] ) +(define_insn "*sub<mode>3_compare1_imm" + [(set (reg:CC CC_REGNUM) + (compare:CC + (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ,rZ") + (match_operand:GPI 2 "aarch64_plus_immediate" "I,J"))) + (set (match_operand:GPI 0 "register_operand" "=r,r") + (plus:GPI + (match_dup 1) + (match_operand:GPI 3 "aarch64_plus_immediate" "J,I")))] + "UINTVAL (operands[2]) == -UINTVAL (operands[3])" + "@ + subs\\t%<w>0, %<w>1, #%n3 + adds\\t%<w>0, %<w>1, %3" + [(set_attr "type" "alus_imm")] +) + (define_insn "sub<mode>3_compare1" [(set (reg:CC CC_REGNUM) (compare:CC @@ -2321,6 +2826,26 @@ } ) +;; Same as the above peephole but with the compare and minus in +;; swapped order. The restriction on overlap between operand 0 +;; and operands 1 and 2 doesn't apply here. +(define_peephole2 + [(set (reg:CC CC_REGNUM) + (compare:CC + (match_operand:GPI 1 "aarch64_reg_or_zero") + (match_operand:GPI 2 "aarch64_reg_or_zero"))) + (set (match_operand:GPI 0 "register_operand") + (minus:GPI (match_dup 1) + (match_dup 2)))] + "" + [(const_int 0)] + { + emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], + operands[2])); + DONE; + } +) + (define_peephole2 [(set (match_operand:GPI 0 "register_operand") (plus:GPI (match_operand:GPI 1 "register_operand") @@ -2339,6 +2864,26 @@ } ) +;; Same as the above peephole but with the compare and minus in +;; swapped order. The restriction on overlap between operand 0 +;; and operands 1 doesn't apply here. +(define_peephole2 + [(set (reg:CC CC_REGNUM) + (compare:CC + (match_operand:GPI 1 "register_operand") + (match_operand:GPI 3 "const_int_operand"))) + (set (match_operand:GPI 0 "register_operand") + (plus:GPI (match_dup 1) + (match_operand:GPI 2 "aarch64_sub_immediate")))] + "INTVAL (operands[3]) == -INTVAL (operands[2])" + [(const_int 0)] + { + emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } +) + (define_insn "*sub_<shift>_<mode>" [(set (match_operand:GPI 0 "register_operand" "=r") (minus:GPI (match_operand:GPI 3 "register_operand" "r") @@ -2554,6 +3099,85 @@ [(set_attr "type" "adc_reg")] ) +(define_expand "sub<mode>3_carryinCV" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC + (sign_extend:<DWI> + (match_operand:GPI 1 "aarch64_reg_or_zero" "")) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand" "")) + (ltu:<DWI> (reg:CC CC_REGNUM) (const_int 0))))) + (set (match_operand:GPI 0 "register_operand" "") + (minus:GPI + (minus:GPI (match_dup 1) (match_dup 2)) + (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])] + "" +) + +(define_insn "*sub<mode>3_carryinCV_z1_z2" + [(set (reg:CC CC_REGNUM) + (compare:CC + (const_int 0) + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) + (set (match_operand:GPI 0 "register_operand" "=r") + (neg:GPI (match_operand:GPI 1 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, <w>zr, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub<mode>3_carryinCV_z1" + [(set (reg:CC CC_REGNUM) + (compare:CC + (const_int 0) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r")) + (match_operand:<DWI> 2 "aarch64_borrow_operation" "")))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (neg:GPI (match_dup 1)) + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, <w>zr, %<w>1" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub<mode>3_carryinCV_z2" + [(set (reg:CC CC_REGNUM) + (compare:CC + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r")) + (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (match_dup 1) + (match_operand:GPI 3 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, %<w>1, <w>zr" + [(set_attr "type" "adc_reg")] +) + +(define_insn "*sub<mode>3_carryinCV" + [(set (reg:CC CC_REGNUM) + (compare:CC + (sign_extend:<DWI> + (match_operand:GPI 1 "register_operand" "r")) + (plus:<DWI> + (sign_extend:<DWI> + (match_operand:GPI 2 "register_operand" "r")) + (match_operand:<DWI> 3 "aarch64_borrow_operation" "")))) + (set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (minus:GPI (match_dup 1) (match_dup 2)) + (match_operand:GPI 4 "aarch64_borrow_operation" "")))] + "" + "sbcs\\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "adc_reg")] +) + (define_insn "*sub_uxt<mode>_shift2" [(set (match_operand:GPI 0 "register_operand" "=rk") (minus:GPI (match_operand:GPI 4 "register_operand" "rk") @@ -2638,7 +3262,7 @@ neg\\t%<w>0, %<w>1 neg\\t%<rtn>0<vas>, %<rtn>1<vas>" [(set_attr "type" "alu_sreg, neon_neg<q>") - (set_attr "simd" "*,yes")] + (set_attr "arch" "*,simd")] ) ;; zero_extend version of above @@ -2963,7 +3587,7 @@ (define_insn "cmp<mode>" [(set (reg:CC CC_REGNUM) - (compare:CC (match_operand:GPI 0 "register_operand" "r,r,r") + (compare:CC (match_operand:GPI 0 "register_operand" "rk,rk,rk") (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J")))] "" "@ @@ -3092,7 +3716,8 @@ (define_insn_and_split "*compare_cstore<mode>_insn" [(set (match_operand:GPI 0 "register_operand" "=r") (EQL:GPI (match_operand:GPI 1 "register_operand" "r") - (match_operand:GPI 2 "aarch64_imm24" "n")))] + (match_operand:GPI 2 "aarch64_imm24" "n"))) + (clobber (reg:CC CC_REGNUM))] "!aarch64_move_imm (INTVAL (operands[2]), <MODE>mode) && !aarch64_plus_operand (operands[2], <MODE>mode) && !reload_completed" @@ -3328,7 +3953,7 @@ CRC))] "TARGET_CRC32" { - if (GET_MODE_BITSIZE (GET_MODE (operands[2])) >= 64) + if (GET_MODE_BITSIZE (<crc_mode>mode) >= 64) return "<crc_variant>\\t%w0, %w1, %x2"; else return "<crc_variant>\\t%w0, %w1, %w2"; @@ -3391,6 +4016,63 @@ [(set_attr "type" "csel")] ) +;; If X can be loaded by a single CNT[BHWD] instruction, +;; +;; A = UMAX (B, X) +;; +;; is equivalent to: +;; +;; TMP = UQDEC[BHWD] (B, X) +;; A = TMP + X +;; +;; Defining the pattern this way means that: +;; +;; A = UMAX (B, X) - X +;; +;; becomes: +;; +;; TMP1 = UQDEC[BHWD] (B, X) +;; TMP2 = TMP1 + X +;; A = TMP2 - X +;; +;; which combine can optimize to: +;; +;; A = UQDEC[BHWD] (B, X) +;; +;; We don't use match_operand predicates because the order of the operands +;; can vary: the CNT[BHWD] constant will come first if the other operand is +;; a simpler constant (such as a CONST_INT), otherwise it will come second. +(define_expand "umax<mode>3" + [(set (match_operand:GPI 0 "register_operand") + (umax:GPI (match_operand:GPI 1 "") + (match_operand:GPI 2 "")))] + "TARGET_SVE" + { + if (aarch64_sve_cnt_immediate (operands[1], <MODE>mode)) + std::swap (operands[1], operands[2]); + else if (!aarch64_sve_cnt_immediate (operands[2], <MODE>mode)) + FAIL; + rtx temp = gen_reg_rtx (<MODE>mode); + operands[1] = force_reg (<MODE>mode, operands[1]); + emit_insn (gen_aarch64_uqdec<mode> (temp, operands[1], operands[2])); + emit_insn (gen_add<mode>3 (operands[0], temp, operands[2])); + DONE; + } +) + +;; Saturating unsigned subtraction of a CNT[BHWD] immediate. +(define_insn "aarch64_uqdec<mode>" + [(set (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI + (umax:GPI (match_operand:GPI 1 "register_operand" "0") + (match_operand:GPI 2 "aarch64_sve_cnt_immediate" "Usv")) + (match_dup 2)))] + "TARGET_SVE" + { + return aarch64_output_sve_cnt_immediate ("uqdec", "%<w>0", operands[2]); + } +) + ;; ------------------------------------------------------------------- ;; Logical operations ;; ------------------------------------------------------------------- @@ -3425,7 +4107,7 @@ <logical>\\t%<w>0, %<w>1, %2 <logical>\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" [(set_attr "type" "logic_reg,logic_imm,neon_logic") - (set_attr "simd" "*,*,yes")] + (set_attr "arch" "*,*,simd")] ) ;; zero_extend version of above @@ -3559,7 +4241,7 @@ mvn\\t%<w>0, %<w>1 mvn\\t%0.8b, %1.8b" [(set_attr "type" "logic_reg,neon_logic") - (set_attr "simd" "*,yes")] + (set_attr "arch" "*,simd")] ) (define_insn "*one_cmpl_<optab><mode>2" @@ -3582,7 +4264,7 @@ <NLOGICAL:nlogical>\\t%<w>0, %<w>2, %<w>1 <NLOGICAL:nlogical>\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" [(set_attr "type" "logic_reg,neon_logic") - (set_attr "simd" "*,yes")] + (set_attr "arch" "*,simd")] ) (define_insn "*<NLOGICAL:optab>_one_cmplsidi3_ze" @@ -3622,7 +4304,7 @@ (set (match_dup 0) (not:GPI (match_dup 0)))] "" [(set_attr "type" "logic_reg,multiple") - (set_attr "simd" "*,yes")] + (set_attr "arch" "*,simd")] ) (define_insn "*and_one_cmpl<mode>3_compare0" @@ -3958,7 +4640,7 @@ (define_expand "<optab><mode>3" [(set (match_operand:GPI 0 "register_operand") (ASHIFT:GPI (match_operand:GPI 1 "register_operand") - (match_operand:QI 2 "nonmemory_operand")))] + (match_operand:QI 2 "aarch64_reg_or_imm")))] "" { if (CONST_INT_P (operands[2])) @@ -3994,7 +4676,7 @@ (define_expand "rotr<mode>3" [(set (match_operand:GPI 0 "register_operand") (rotatert:GPI (match_operand:GPI 1 "register_operand") - (match_operand:QI 2 "nonmemory_operand")))] + (match_operand:QI 2 "aarch64_reg_or_imm")))] "" { if (CONST_INT_P (operands[2])) @@ -4014,7 +4696,7 @@ (define_expand "rotl<mode>3" [(set (match_operand:GPI 0 "register_operand") (rotatert:GPI (match_operand:GPI 1 "register_operand") - (match_operand:QI 2 "nonmemory_operand")))] + (match_operand:QI 2 "aarch64_reg_or_imm")))] "" { /* (SZ - cnt) % SZ == -cnt % SZ */ @@ -4054,7 +4736,7 @@ [(set_attr "type" "shift_reg")] ) -(define_insn_and_split "*aarch64_reg_<mode>3_neg_mask2" +(define_insn_and_split "*aarch64_<optab>_reg_<mode>3_neg_mask2" [(set (match_operand:GPI 0 "register_operand" "=&r") (SHIFT:GPI (match_operand:GPI 1 "register_operand" "r") @@ -4067,7 +4749,7 @@ [(const_int 0)] { rtx tmp = (can_create_pseudo_p () ? gen_reg_rtx (SImode) - : operands[0]); + : lowpart_subreg (SImode, operands[0], <MODE>mode)); emit_insn (gen_negsi2 (tmp, operands[2])); rtx and_op = gen_rtx_AND (SImode, tmp, operands[3]); @@ -4078,7 +4760,7 @@ } ) -(define_insn_and_split "*aarch64_reg_<mode>3_minus_mask" +(define_insn_and_split "*aarch64_ashl_reg_<mode>3_minus_mask" [(set (match_operand:GPI 0 "register_operand" "=&r") (ashift:GPI (match_operand:GPI 1 "register_operand" "r") @@ -4112,8 +4794,8 @@ (match_operand:DI 1 "register_operand" "r") (match_operator 4 "subreg_lowpart_operator" [(and:SI (match_operand:SI 2 "register_operand" "r") - (match_operand 3 "aarch64_shift_imm_di" "Usd"))])))] - "((~INTVAL (operands[3]) & (GET_MODE_BITSIZE (DImode)-1)) == 0)" + (match_operand 3 "const_int_operand" "n"))])))] + "((~INTVAL (operands[3]) & (GET_MODE_BITSIZE (DImode) - 1)) == 0)" { rtx xop[3]; xop[0] = operands[0]; @@ -4125,7 +4807,7 @@ [(set_attr "type" "shift_reg")] ) -(define_insn_and_split "*aarch64_reg_<optab>_minus<mode>3" +(define_insn_and_split "*aarch64_<optab>_reg_minus<mode>3" [(set (match_operand:GPI 0 "register_operand" "=&r") (ASHIFT:GPI (match_operand:GPI 1 "register_operand" "r") @@ -4166,8 +4848,8 @@ lsl\t%<w>0, %<w>1, %<w>2 shl\t%<rtn>0<vas>, %<rtn>1<vas>, %2 ushl\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>" - [(set_attr "simd" "no,no,yes,yes") - (set_attr "type" "bfx,shift_reg,neon_shift_imm<q>, neon_shift_reg<q>")] + [(set_attr "type" "bfx,shift_reg,neon_shift_imm<q>, neon_shift_reg<q>") + (set_attr "arch" "*,*,simd,simd")] ) ;; Logical right shift using SISD or Integer instruction @@ -4175,7 +4857,8 @@ [(set (match_operand:GPI 0 "register_operand" "=r,r,w,&w,&w") (lshiftrt:GPI (match_operand:GPI 1 "register_operand" "r,r,w,w,w") - (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,r,Us<cmode>,w,0")))] + (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" + "Us<cmode>,r,Us<cmode_simd>,w,0")))] "" "@ lsr\t%<w>0, %<w>1, %2 @@ -4183,8 +4866,8 @@ ushr\t%<rtn>0<vas>, %<rtn>1<vas>, %2 # #" - [(set_attr "simd" "no,no,yes,yes,yes") - (set_attr "type" "bfx,shift_reg,neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>")] + [(set_attr "type" "bfx,shift_reg,neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>") + (set_attr "arch" "*,*,simd,simd,simd")] ) (define_split @@ -4220,9 +4903,10 @@ ;; Arithmetic right shift using SISD or Integer instruction (define_insn "*aarch64_ashr_sisd_or_int_<mode>3" [(set (match_operand:GPI 0 "register_operand" "=r,r,w,&w,&w") - (ashiftrt:GPI - (match_operand:GPI 1 "register_operand" "r,r,w,w,w") - (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us<cmode>,r,Us<cmode>,w,0")))] + (ashiftrt:GPI + (match_operand:GPI 1 "register_operand" "r,r,w,w,w") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" + "Us<cmode>,r,Us<cmode_simd>,w,0")))] "" "@ asr\t%<w>0, %<w>1, %2 @@ -4230,8 +4914,8 @@ sshr\t%<rtn>0<vas>, %<rtn>1<vas>, %2 # #" - [(set_attr "simd" "no,no,yes,yes,yes") - (set_attr "type" "bfx,shift_reg,neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>")] + [(set_attr "type" "bfx,shift_reg,neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>") + (set_attr "arch" "*,*,simd,simd,simd")] ) (define_split @@ -4271,8 +4955,7 @@ UNSPEC_SISD_USHL))] "TARGET_SIMD" "ushl\t%d0, %d1, %d2" - [(set_attr "simd" "yes") - (set_attr "type" "neon_shift_reg")] + [(set_attr "type" "neon_shift_reg")] ) (define_insn "*aarch64_ushl_2s" @@ -4282,8 +4965,7 @@ UNSPEC_USHL_2S))] "TARGET_SIMD" "ushl\t%0.2s, %1.2s, %2.2s" - [(set_attr "simd" "yes") - (set_attr "type" "neon_shift_reg")] + [(set_attr "type" "neon_shift_reg")] ) (define_insn "*aarch64_sisd_sshl" @@ -4293,8 +4975,7 @@ UNSPEC_SISD_SSHL))] "TARGET_SIMD" "sshl\t%d0, %d1, %d2" - [(set_attr "simd" "yes") - (set_attr "type" "neon_shift_reg")] + [(set_attr "type" "neon_shift_reg")] ) (define_insn "*aarch64_sshl_2s" @@ -4304,8 +4985,7 @@ UNSPEC_SSHL_2S))] "TARGET_SIMD" "sshl\t%0.2s, %1.2s, %2.2s" - [(set_attr "simd" "yes") - (set_attr "type" "neon_shift_reg")] + [(set_attr "type" "neon_shift_reg")] ) (define_insn "*aarch64_sisd_neg_qi" @@ -4314,8 +4994,7 @@ UNSPEC_SISD_NEG))] "TARGET_SIMD" "neg\t%d0, %d1" - [(set_attr "simd" "yes") - (set_attr "type" "neon_neg")] + [(set_attr "type" "neon_neg")] ) ;; Rotate right @@ -4621,6 +5300,20 @@ [(set_attr "type" "bfx")] ) +;; Match sbfiz pattern in a shift left + shift right operation. + +(define_insn "*ashift<mode>_extv_bfiz" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ashift:GPI (sign_extract:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand 2 "aarch64_simd_shift_imm_offset_<mode>" "n") + (const_int 0)) + (match_operand 3 "aarch64_simd_shift_imm_<mode>" "n")))] + "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]), + 1, GET_MODE_BITSIZE (<MODE>mode) - 1)" + "sbfiz\\t%<w>0, %<w>1, %3, %2" + [(set_attr "type" "bfx")] +) + ;; When the bit position and width of the equivalent extraction add up to 32 ;; we can use a W-reg LSL instruction taking advantage of the implicit ;; zero-extension of the X-reg. @@ -4655,6 +5348,58 @@ [(set_attr "type" "rev")] ) +(define_insn "*aarch64_bfxil<mode>" + [(set (match_operand:GPI 0 "register_operand" "=r,r") + (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "r,0") + (match_operand:GPI 3 "const_int_operand" "n, Ulc")) + (and:GPI (match_operand:GPI 2 "register_operand" "0,r") + (match_operand:GPI 4 "const_int_operand" "Ulc, n"))))] + "(INTVAL (operands[3]) == ~INTVAL (operands[4])) + && (aarch64_high_bits_all_ones_p (INTVAL (operands[3])) + || aarch64_high_bits_all_ones_p (INTVAL (operands[4])))" + { + switch (which_alternative) + { + case 0: + operands[3] = GEN_INT (ctz_hwi (~INTVAL (operands[3]))); + return "bfxil\\t%<w>0, %<w>1, 0, %3"; + case 1: + operands[3] = GEN_INT (ctz_hwi (~INTVAL (operands[4]))); + return "bfxil\\t%<w>0, %<w>2, 0, %3"; + default: + gcc_unreachable (); + } + } + [(set_attr "type" "bfm")] +) + +; Zero-extended version of above (aarch64_bfxil) +(define_insn "*aarch64_bfxilsi_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (zero_extend:DI (ior:SI (and:SI (match_operand:SI 1 "register_operand" + "r,0") + (match_operand:SI 3 "const_int_operand" "n, Ulc")) + (and:SI (match_operand:SI 2 "register_operand" "0,r") + (match_operand:SI 4 "const_int_operand" "Ulc, n")))))] + "(INTVAL (operands[3]) == ~INTVAL (operands[4])) + && (aarch64_high_bits_all_ones_p (INTVAL (operands[3])) + || aarch64_high_bits_all_ones_p (INTVAL (operands[4])))" + { + switch (which_alternative) + { + case 0: + operands[3] = GEN_INT (ctz_hwi (~INTVAL (operands[3]))); + return "bfxil\\t%0, %1, 0, %3"; + case 1: + operands[3] = GEN_INT (ctz_hwi (~INTVAL (operands[4]))); + return "bfxil\\t%0, %2, 0, %3"; + default: + gcc_unreachable (); + } + } + [(set_attr "type" "bfm")] +) + ;; There are no canonicalisation rules for the position of the lshiftrt, ashift ;; operations within an IOR/AND RTX, therefore we have two patterns matching ;; each valid permutation. @@ -4743,57 +5488,94 @@ [(set_attr "type" "f_cvtf2i")] ) -;; fma - no throw - -(define_insn "fma<mode>4" +;; fma - expand fma into patterns with the accumulator operand first since +;; reusing the accumulator results in better register allocation. +;; The register allocator considers copy preferences in operand order, +;; so this prefers fmadd s0, s1, s2, s0 over fmadd s1, s1, s2, s0. + +(define_expand "fma<mode>4" + [(set (match_operand:GPF_F16 0 "register_operand") + (fma:GPF_F16 (match_operand:GPF_F16 1 "register_operand") + (match_operand:GPF_F16 2 "register_operand") + (match_operand:GPF_F16 3 "register_operand")))] + "TARGET_FLOAT" +) + +(define_insn "*aarch64_fma<mode>4" [(set (match_operand:GPF_F16 0 "register_operand" "=w") - (fma:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w") - (match_operand:GPF_F16 2 "register_operand" "w") - (match_operand:GPF_F16 3 "register_operand" "w")))] + (fma:GPF_F16 (match_operand:GPF_F16 2 "register_operand" "w") + (match_operand:GPF_F16 3 "register_operand" "w") + (match_operand:GPF_F16 1 "register_operand" "w")))] "TARGET_FLOAT" - "fmadd\\t%<s>0, %<s>1, %<s>2, %<s>3" + "fmadd\\t%<s>0, %<s>2, %<s>3, %<s>1" [(set_attr "type" "fmac<stype>")] ) -(define_insn "fnma<mode>4" +(define_expand "fnma<mode>4" + [(set (match_operand:GPF_F16 0 "register_operand") + (fma:GPF_F16 + (neg:GPF_F16 (match_operand:GPF_F16 1 "register_operand")) + (match_operand:GPF_F16 2 "register_operand") + (match_operand:GPF_F16 3 "register_operand")))] + "TARGET_FLOAT" +) + +(define_insn "*aarch64_fnma<mode>4" [(set (match_operand:GPF_F16 0 "register_operand" "=w") (fma:GPF_F16 - (neg:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")) - (match_operand:GPF_F16 2 "register_operand" "w") - (match_operand:GPF_F16 3 "register_operand" "w")))] + (neg:GPF_F16 (match_operand:GPF_F16 2 "register_operand" "w")) + (match_operand:GPF_F16 3 "register_operand" "w") + (match_operand:GPF_F16 1 "register_operand" "w")))] "TARGET_FLOAT" - "fmsub\\t%<s>0, %<s>1, %<s>2, %<s>3" + "fmsub\\t%<s>0, %<s>2, %<s>3, %<s>1" [(set_attr "type" "fmac<stype>")] ) -(define_insn "fms<mode>4" + +(define_expand "fms<mode>4" + [(set (match_operand:GPF 0 "register_operand") + (fma:GPF (match_operand:GPF 1 "register_operand") + (match_operand:GPF 2 "register_operand") + (neg:GPF (match_operand:GPF 3 "register_operand"))))] + "TARGET_FLOAT" +) + +(define_insn "*aarch64_fms<mode>4" [(set (match_operand:GPF 0 "register_operand" "=w") - (fma:GPF (match_operand:GPF 1 "register_operand" "w") - (match_operand:GPF 2 "register_operand" "w") - (neg:GPF (match_operand:GPF 3 "register_operand" "w"))))] + (fma:GPF (match_operand:GPF 2 "register_operand" "w") + (match_operand:GPF 3 "register_operand" "w") + (neg:GPF (match_operand:GPF 1 "register_operand" "w"))))] "TARGET_FLOAT" - "fnmsub\\t%<s>0, %<s>1, %<s>2, %<s>3" + "fnmsub\\t%<s>0, %<s>2, %<s>3, %<s>1" [(set_attr "type" "fmac<s>")] ) -(define_insn "fnms<mode>4" +(define_expand "fnms<mode>4" + [(set (match_operand:GPF 0 "register_operand") + (fma:GPF (neg:GPF (match_operand:GPF 1 "register_operand")) + (match_operand:GPF 2 "register_operand") + (neg:GPF (match_operand:GPF 3 "register_operand"))))] + "TARGET_FLOAT" +) + +(define_insn "*aarch64_fnms<mode>4" [(set (match_operand:GPF 0 "register_operand" "=w") - (fma:GPF (neg:GPF (match_operand:GPF 1 "register_operand" "w")) - (match_operand:GPF 2 "register_operand" "w") - (neg:GPF (match_operand:GPF 3 "register_operand" "w"))))] + (fma:GPF (neg:GPF (match_operand:GPF 2 "register_operand" "w")) + (match_operand:GPF 3 "register_operand" "w") + (neg:GPF (match_operand:GPF 1 "register_operand" "w"))))] "TARGET_FLOAT" - "fnmadd\\t%<s>0, %<s>1, %<s>2, %<s>3" + "fnmadd\\t%<s>0, %<s>2, %<s>3, %<s>1" [(set_attr "type" "fmac<s>")] ) ;; If signed zeros are ignored, -(a * b + c) = -a * b - c. -(define_insn "*fnmadd<mode>4" +(define_insn "*aarch64_fnmadd<mode>4" [(set (match_operand:GPF 0 "register_operand" "=w") - (neg:GPF (fma:GPF (match_operand:GPF 1 "register_operand" "w") - (match_operand:GPF 2 "register_operand" "w") - (match_operand:GPF 3 "register_operand" "w"))))] + (neg:GPF (fma:GPF (match_operand:GPF 2 "register_operand" "w") + (match_operand:GPF 3 "register_operand" "w") + (match_operand:GPF 1 "register_operand" "w"))))] "!HONOR_SIGNED_ZEROS (<MODE>mode) && TARGET_FLOAT" - "fnmadd\\t%<s>0, %<s>1, %<s>2, %<s>3" + "fnmadd\\t%<s>0, %<s>2, %<s>3, %<s>1" [(set_attr "type" "fmac<s>")] ) @@ -4849,24 +5631,59 @@ [(set_attr "type" "f_cvt")] ) -(define_insn "<optab>_trunc<GPF_F16:mode><GPI:mode>2" +;; Convert SF -> SI or DF -> DI while preferring w = w register constraints +;; and making r = w more expensive + +(define_insn "<optab>_trunc<fcvt_target><GPI:mode>2" + [(set (match_operand:GPI 0 "register_operand" "=w,?r") + (FIXUORS:GPI (match_operand:<FCVT_TARGET> 1 "register_operand" "w,w")))] + "TARGET_FLOAT" + "@ + fcvtz<su>\t%<s>0, %<s>1 + fcvtz<su>\t%<w>0, %<s>1" + [(set_attr "type" "neon_fp_to_int_s,f_cvtf2i")] +) + +;; Convert HF -> SI or DI + +(define_insn "<optab>_trunchf<GPI:mode>2" [(set (match_operand:GPI 0 "register_operand" "=r") - (FIXUORS:GPI (match_operand:GPF_F16 1 "register_operand" "w")))] + (FIXUORS:GPI (match_operand:HF 1 "register_operand" "w")))] + "TARGET_FP_F16INST" + "fcvtz<su>\t%<w>0, %h1" + [(set_attr "type" "f_cvtf2i")] +) + +;; Convert DF -> SI or SF -> DI which can only be accomplished with +;; input in a fp register and output in a integer register + +(define_insn "<optab>_trunc<fcvt_change_mode><GPI:mode>2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (FIXUORS:GPI (match_operand:<FCVT_CHANGE_MODE> 1 "register_operand" "w")))] "TARGET_FLOAT" - "fcvtz<su>\t%<GPI:w>0, %<GPF_F16:s>1" + "fcvtz<su>\t%<w>0, %<fpw>1" + [(set_attr "type" "f_cvtf2i")] +) + +(define_insn "*fix_to_zero_extend<mode>di2" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (unsigned_fix:SI + (match_operand:GPF 1 "register_operand" "w"))))] + "TARGET_FLOAT" + "fcvtzu\t%w0, %<s>1" [(set_attr "type" "f_cvtf2i")] ) (define_insn "<optab><fcvt_target><GPF:mode>2" [(set (match_operand:GPF 0 "register_operand" "=w,w") - (FLOATUORS:GPF (match_operand:<FCVT_TARGET> 1 "register_operand" "w,r")))] + (FLOATUORS:GPF (match_operand:<FCVT_TARGET> 1 "register_operand" "w,?r")))] "TARGET_FLOAT" "@ <su_optab>cvtf\t%<GPF:s>0, %<s>1 <su_optab>cvtf\t%<GPF:s>0, %<w1>1" - [(set_attr "simd" "yes,no") - (set_attr "fp" "no,yes") - (set_attr "type" "neon_int_to_fp_<Vetype>,f_cvti2f")] + [(set_attr "type" "neon_int_to_fp_<Vetype>,f_cvti2f") + (set_attr "arch" "simd,fp")] ) (define_insn "<optab><fcvt_iesize><GPF:mode>2" @@ -4951,8 +5768,7 @@ <FCVT_F2FIXED:fcvt_fixed_insn>\t%<GPF:w1>0, %<GPF:s>1, #%2 <FCVT_F2FIXED:fcvt_fixed_insn>\t%<GPF:s>0, %<GPF:s>1, #%2" [(set_attr "type" "f_cvtf2i, neon_fp_to_int_<GPF:Vetype>") - (set_attr "fp" "yes, *") - (set_attr "simd" "*, yes")] + (set_attr "arch" "fp,simd")] ) (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><GPI:mode>3" @@ -4965,8 +5781,7 @@ <FCVT_FIXED2F:fcvt_fixed_insn>\t%<GPI:v>0, %<GPI:w>1, #%2 <FCVT_FIXED2F:fcvt_fixed_insn>\t%<GPI:v>0, %<GPI:v>1, #%2" [(set_attr "type" "f_cvti2f, neon_int_to_fp_<GPI:Vetype>") - (set_attr "fp" "yes, *") - (set_attr "simd" "*, yes")] + (set_attr "arch" "fp,simd")] ) (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn>hf<mode>3" @@ -5067,7 +5882,7 @@ [(set (match_operand:GPF_F16 0 "register_operand") (div:GPF_F16 (match_operand:GPF_F16 1 "general_operand") (match_operand:GPF_F16 2 "register_operand")))] - "TARGET_SIMD" + "TARGET_FLOAT" { if (aarch64_emit_approx_div (operands[0], operands[1], operands[2])) DONE; @@ -5255,7 +6070,7 @@ ;; ------------------------------------------------------------------- ;; Reload Scalar Floating point modes from constant pool. ;; The AArch64 port doesn't have __int128 constant move support. -(define_expand "aarch64_reload_movcp<GPF_TF:mode><P:mode>" +(define_expand "@aarch64_reload_movcp<GPF_TF:mode><P:mode>" [(set (match_operand:GPF_TF 0 "register_operand" "=w") (mem:GPF_TF (match_operand 1 "aarch64_constant_pool_symref" "S"))) (clobber (match_operand:P 2 "register_operand" "=&r"))] @@ -5268,7 +6083,7 @@ ) ;; Reload Vector modes from constant pool. -(define_expand "aarch64_reload_movcp<VALL:mode><P:mode>" +(define_expand "@aarch64_reload_movcp<VALL:mode><P:mode>" [(set (match_operand:VALL 0 "register_operand" "=w") (mem:VALL (match_operand 1 "aarch64_constant_pool_symref" "S"))) (clobber (match_operand:P 2 "register_operand" "=&r"))] @@ -5280,7 +6095,7 @@ } ) -(define_expand "aarch64_reload_mov<mode>" +(define_expand "@aarch64_reload_mov<mode>" [(set (match_operand:TX 0 "register_operand" "=w") (match_operand:TX 1 "register_operand" "w")) (clobber (match_operand:DI 2 "register_operand" "=&r")) @@ -5300,7 +6115,7 @@ ;; after or during reload as we don't want these patterns to start ;; kicking in during the combiner. -(define_insn "aarch64_movdi_<mode>low" +(define_insn "@aarch64_movdi_<mode>low" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extract:DI (match_operand:TX 1 "register_operand" "w") (const_int 64) (const_int 0)))] @@ -5310,7 +6125,7 @@ (set_attr "length" "4") ]) -(define_insn "aarch64_movdi_<mode>high" +(define_insn "@aarch64_movdi_<mode>high" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extract:DI (match_operand:TX 1 "register_operand" "w") (const_int 64) (const_int 64)))] @@ -5320,7 +6135,7 @@ (set_attr "length" "4") ]) -(define_insn "aarch64_mov<mode>high_di" +(define_insn "@aarch64_mov<mode>high_di" [(set (zero_extract:TX (match_operand:TX 0 "register_operand" "+w") (const_int 64) (const_int 64)) (zero_extend:TX (match_operand:DI 1 "register_operand" "r")))] @@ -5330,7 +6145,7 @@ (set_attr "length" "4") ]) -(define_insn "aarch64_mov<mode>low_di" +(define_insn "@aarch64_mov<mode>low_di" [(set (match_operand:TX 0 "register_operand" "=w") (zero_extend:TX (match_operand:DI 1 "register_operand" "r")))] "TARGET_FLOAT && (reload_completed || reload_in_progress)" @@ -5375,7 +6190,7 @@ (lo_sum:P (match_operand:P 1 "register_operand" "r") (match_operand 2 "aarch64_valid_symref" "S")))] "" - "add\\t%<w>0, %<w>1, :lo12:%a2" + "add\\t%<w>0, %<w>1, :lo12:%c2" [(set_attr "type" "alu_imm")] ) @@ -5386,7 +6201,7 @@ (match_operand:PTR 2 "aarch64_valid_symref" "S")))] UNSPEC_GOTSMALLPIC))] "" - "ldr\\t%<w>0, [%1, #:got_lo12:%a2]" + "ldr\\t%<w>0, [%1, #:got_lo12:%c2]" [(set_attr "type" "load_<ldst_sz>")] ) @@ -5398,7 +6213,7 @@ (match_operand:DI 2 "aarch64_valid_symref" "S")))] UNSPEC_GOTSMALLPIC)))] "TARGET_ILP32" - "ldr\\t%w0, [%1, #:got_lo12:%a2]" + "ldr\\t%w0, [%1, #:got_lo12:%c2]" [(set_attr "type" "load_4")] ) @@ -5409,7 +6224,7 @@ (match_operand:PTR 2 "aarch64_valid_symref" "S")))] UNSPEC_GOTSMALLPIC28K))] "" - "ldr\\t%<w>0, [%1, #:<got_modifier>:%a2]" + "ldr\\t%<w>0, [%1, #:<got_modifier>:%c2]" [(set_attr "type" "load_<ldst_sz>")] ) @@ -5421,7 +6236,7 @@ (match_operand:DI 2 "aarch64_valid_symref" "S")))] UNSPEC_GOTSMALLPIC28K)))] "TARGET_ILP32" - "ldr\\t%w0, [%1, #:gotpage_lo14:%a2]" + "ldr\\t%w0, [%1, #:gotpage_lo14:%c2]" [(set_attr "type" "load_4")] ) @@ -5553,14 +6368,91 @@ (set_attr "length" "12")] ) -(define_insn "tlsdesc_small_<mode>" +(define_expand "tlsdesc_small_<mode>" + [(unspec:PTR [(match_operand 0 "aarch64_valid_symref")] UNSPEC_TLSDESC)] + "TARGET_TLS_DESC" + { + if (TARGET_SVE) + emit_insn (gen_tlsdesc_small_sve_<mode> (operands[0])); + else + emit_insn (gen_tlsdesc_small_advsimd_<mode> (operands[0])); + DONE; + } +) + +;; tlsdesc calls preserve all core and Advanced SIMD registers except +;; R0 and LR. +(define_insn "tlsdesc_small_advsimd_<mode>" [(set (reg:PTR R0_REGNUM) (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")] - UNSPEC_TLSDESC)) + UNSPEC_TLSDESC)) (clobber (reg:DI LR_REGNUM)) (clobber (reg:CC CC_REGNUM)) (clobber (match_scratch:DI 1 "=r"))] - "TARGET_TLS_DESC" + "TARGET_TLS_DESC && !TARGET_SVE" + "adrp\\tx0, %A0\;ldr\\t%<w>1, [x0, #%L0]\;add\\t<w>0, <w>0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" + [(set_attr "type" "call") + (set_attr "length" "16")]) + +;; For SVE, model tlsdesc calls as clobbering the lower 128 bits of +;; all vector registers, and clobber all predicate registers, on +;; top of the usual R0 and LR. +(define_insn "tlsdesc_small_sve_<mode>" + [(set (reg:PTR R0_REGNUM) + (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")] + UNSPEC_TLSDESC)) + (clobber (reg:DI LR_REGNUM)) + (clobber (reg:CC CC_REGNUM)) + (clobber_high (reg:TI V0_REGNUM)) + (clobber_high (reg:TI V1_REGNUM)) + (clobber_high (reg:TI V2_REGNUM)) + (clobber_high (reg:TI V3_REGNUM)) + (clobber_high (reg:TI V4_REGNUM)) + (clobber_high (reg:TI V5_REGNUM)) + (clobber_high (reg:TI V6_REGNUM)) + (clobber_high (reg:TI V7_REGNUM)) + (clobber_high (reg:TI V8_REGNUM)) + (clobber_high (reg:TI V9_REGNUM)) + (clobber_high (reg:TI V10_REGNUM)) + (clobber_high (reg:TI V11_REGNUM)) + (clobber_high (reg:TI V12_REGNUM)) + (clobber_high (reg:TI V13_REGNUM)) + (clobber_high (reg:TI V14_REGNUM)) + (clobber_high (reg:TI V15_REGNUM)) + (clobber_high (reg:TI V16_REGNUM)) + (clobber_high (reg:TI V17_REGNUM)) + (clobber_high (reg:TI V18_REGNUM)) + (clobber_high (reg:TI V19_REGNUM)) + (clobber_high (reg:TI V20_REGNUM)) + (clobber_high (reg:TI V21_REGNUM)) + (clobber_high (reg:TI V22_REGNUM)) + (clobber_high (reg:TI V23_REGNUM)) + (clobber_high (reg:TI V24_REGNUM)) + (clobber_high (reg:TI V25_REGNUM)) + (clobber_high (reg:TI V26_REGNUM)) + (clobber_high (reg:TI V27_REGNUM)) + (clobber_high (reg:TI V28_REGNUM)) + (clobber_high (reg:TI V29_REGNUM)) + (clobber_high (reg:TI V30_REGNUM)) + (clobber_high (reg:TI V31_REGNUM)) + (clobber (reg:VNx2BI P0_REGNUM)) + (clobber (reg:VNx2BI P1_REGNUM)) + (clobber (reg:VNx2BI P2_REGNUM)) + (clobber (reg:VNx2BI P3_REGNUM)) + (clobber (reg:VNx2BI P4_REGNUM)) + (clobber (reg:VNx2BI P5_REGNUM)) + (clobber (reg:VNx2BI P6_REGNUM)) + (clobber (reg:VNx2BI P7_REGNUM)) + (clobber (reg:VNx2BI P8_REGNUM)) + (clobber (reg:VNx2BI P9_REGNUM)) + (clobber (reg:VNx2BI P10_REGNUM)) + (clobber (reg:VNx2BI P11_REGNUM)) + (clobber (reg:VNx2BI P12_REGNUM)) + (clobber (reg:VNx2BI P13_REGNUM)) + (clobber (reg:VNx2BI P14_REGNUM)) + (clobber (reg:VNx2BI P15_REGNUM)) + (clobber (match_scratch:DI 1 "=r"))] + "TARGET_TLS_DESC && TARGET_SVE" "adrp\\tx0, %A0\;ldr\\t%<w>1, [x0, #%L0]\;add\\t<w>0, <w>0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" [(set_attr "type" "call") (set_attr "length" "16")]) @@ -5620,7 +6512,7 @@ ) (define_insn "probe_stack_range" - [(set (match_operand:DI 0 "register_operand" "=r") + [(set (match_operand:DI 0 "register_operand" "=rk") (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0") (match_operand:DI 2 "register_operand" "r")] UNSPECV_PROBE_STACK_RANGE))] @@ -5631,6 +6523,25 @@ [(set_attr "length" "32")] ) +;; This instruction is used to generate the stack clash stack adjustment and +;; probing loop. We can't change the control flow during prologue and epilogue +;; code generation. So we must emit a volatile unspec and expand it later on. + +(define_insn "@probe_sve_stack_clash_<mode>" + [(set (match_operand:P 0 "register_operand" "=rk") + (unspec_volatile:P [(match_operand:P 1 "register_operand" "0") + (match_operand:P 2 "register_operand" "r") + (match_operand:P 3 "const_int_operand" "n") + (match_operand:P 4 "aarch64_plus_immediate" "L")] + UNSPECV_PROBE_STACK_RANGE))] + "TARGET_SVE" +{ + return aarch64_output_probe_sve_stack_clash (operands[0], operands[2], + operands[3], operands[4]); +} + [(set_attr "length" "28")] +) + ;; Named pattern for expanding thread pointer reference. (define_expand "get_thread_pointerdi" [(match_operand:DI 0 "register_operand" "=r")] @@ -5771,6 +6682,134 @@ DONE; }) +;; Track speculation through conditional branches. We assume that +;; SPECULATION_TRACKER_REGNUM is reserved for this purpose when necessary. +(define_insn "speculation_tracker" + [(set (reg:DI SPECULATION_TRACKER_REGNUM) + (unspec [(reg:DI SPECULATION_TRACKER_REGNUM) (match_operand 0)] + UNSPEC_SPECULATION_TRACKER))] + "" + { + operands[1] = gen_rtx_REG (DImode, SPECULATION_TRACKER_REGNUM); + output_asm_insn ("csel\\t%1, %1, xzr, %m0", operands); + return ""; + } + [(set_attr "type" "csel")] +) + +;; Helper for aarch64.c code. +(define_expand "set_clobber_cc" + [(parallel [(set (match_operand 0) + (match_operand 1)) + (clobber (reg:CC CC_REGNUM))])]) + +;; Hard speculation barrier. +(define_insn "speculation_barrier" + [(unspec_volatile [(const_int 0)] UNSPECV_SPECULATION_BARRIER)] + "" + "isb\;dsb\\tsy" + [(set_attr "length" "8") + (set_attr "type" "block") + (set_attr "speculation_barrier" "true")] +) + +;; Support for __builtin_speculation_safe_value when we have speculation +;; tracking enabled. Use the speculation tracker to decide whether to +;; copy operand 1 to the target, or to copy the fail value (operand 2). +(define_expand "@despeculate_copy<ALLI_TI:mode>" + [(set (match_operand:ALLI_TI 0 "register_operand" "=r") + (unspec_volatile:ALLI_TI + [(match_operand:ALLI_TI 1 "register_operand" "r") + (match_operand:ALLI_TI 2 "aarch64_reg_or_zero" "rZ") + (use (reg:DI SPECULATION_TRACKER_REGNUM)) + (clobber (reg:CC CC_REGNUM))] UNSPECV_SPECULATION_BARRIER))] + "" + " + { + if (operands[2] == const0_rtx) + { + rtx tracker; + if (<MODE>mode == TImode) + tracker = gen_rtx_REG (DImode, SPECULATION_TRACKER_REGNUM); + else + tracker = gen_rtx_REG (<MODE>mode, SPECULATION_TRACKER_REGNUM); + + emit_insn (gen_despeculate_simple<mode> (operands[0], operands[1], + tracker)); + DONE; + } + } + " +) + +;; Patterns to match despeculate_copy<mode>. Note that "hint 0x14" is the +;; encoding for CSDB, but will work in older versions of the assembler. +(define_insn "*despeculate_copy<ALLI:mode>_insn" + [(set (match_operand:ALLI 0 "register_operand" "=r") + (unspec_volatile:ALLI + [(match_operand:ALLI 1 "register_operand" "r") + (match_operand:ALLI 2 "aarch64_reg_or_zero" "rZ") + (use (reg:DI SPECULATION_TRACKER_REGNUM)) + (clobber (reg:CC CC_REGNUM))] UNSPECV_SPECULATION_BARRIER))] + "" + { + operands[3] = gen_rtx_REG (DImode, SPECULATION_TRACKER_REGNUM); + output_asm_insn ("cmp\\t%3, #0\;csel\\t%<w>0, %<w>1, %<w>2, ne\;hint\t0x14 // csdb", + operands); + return ""; + } + [(set_attr "length" "12") + (set_attr "type" "block") + (set_attr "speculation_barrier" "true")] +) + +;; Pattern to match despeculate_copyti +(define_insn "*despeculate_copyti_insn" + [(set (match_operand:TI 0 "register_operand" "=r") + (unspec_volatile:TI + [(match_operand:TI 1 "register_operand" "r") + (match_operand:TI 2 "aarch64_reg_or_zero" "rZ") + (use (reg:DI SPECULATION_TRACKER_REGNUM)) + (clobber (reg:CC CC_REGNUM))] UNSPECV_SPECULATION_BARRIER))] + "" + { + operands[3] = gen_rtx_REG (DImode, SPECULATION_TRACKER_REGNUM); + output_asm_insn + ("cmp\\t%3, #0\;csel\\t%0, %1, %2, ne\;csel\\t%H0, %H1, %H2, ne\;hint\t0x14 // csdb", + operands); + return ""; + } + [(set_attr "length" "16") + (set_attr "type" "block") + (set_attr "speculation_barrier" "true")] +) + +(define_insn "despeculate_simple<ALLI:mode>" + [(set (match_operand:ALLI 0 "register_operand" "=r") + (unspec_volatile:ALLI + [(match_operand:ALLI 1 "register_operand" "r") + (use (match_operand:ALLI 2 "register_operand" ""))] + UNSPECV_SPECULATION_BARRIER))] + "" + "and\\t%<w>0, %<w>1, %<w>2\;hint\t0x14 // csdb" + [(set_attr "type" "block") + (set_attr "length" "8") + (set_attr "speculation_barrier" "true")] +) + +(define_insn "despeculate_simpleti" + [(set (match_operand:TI 0 "register_operand" "=r") + (unspec_volatile:TI + [(match_operand:TI 1 "register_operand" "r") + (use (match_operand:DI 2 "register_operand" ""))] + UNSPECV_SPECULATION_BARRIER))] + "" + "and\\t%0, %1, %2\;and\\t%H0, %H1, %2\;hint\t0x14 // csdb" + [(set_attr "type" "block") + (set_attr "length" "12") + (set_attr "speculation_barrier" "true")] +) + ;; AdvSIMD Stuff (include "aarch64-simd.md") @@ -5779,3 +6818,6 @@ ;; ldp/stp peephole patterns (include "aarch64-ldpstp.md") + +;; SVE. +(include "aarch64-sve.md")