Mercurial > hg > CbC > CbC_gcc
diff gcc/config/nds32/nds32.md @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
line wrap: on
line diff
--- a/gcc/config/nds32/nds32.md Fri Oct 27 22:46:09 2017 +0900 +++ b/gcc/config/nds32/nds32.md Thu Oct 25 07:37:49 2018 +0900 @@ -1,5 +1,5 @@ ;; Machine description of Andes NDS32 cpu for GNU compiler -;; Copyright (C) 2012-2017 Free Software Foundation, Inc. +;; Copyright (C) 2012-2018 Free Software Foundation, Inc. ;; Contributed by Andes Technology Corporation. ;; ;; This file is part of GCC. @@ -46,58 +46,143 @@ ;; Include DImode/DFmode operations. (include "nds32-doubleword.md") +;; Include floating-point patterns. +(include "nds32-fpu.md") + ;; Include peephole patterns. (include "nds32-peephole2.md") +;; ------------------------------------------------------------------------ + +;; CPU pipeline model. +(define_attr "pipeline_model" "n7,n8,e8,n9,n10,graywolf,n13,simple" + (const + (cond [(match_test "nds32_cpu_option == CPU_N7") (const_string "n7") + (match_test "nds32_cpu_option == CPU_E8") (const_string "e8") + (match_test "nds32_cpu_option == CPU_N6 || nds32_cpu_option == CPU_N8") (const_string "n8") + (match_test "nds32_cpu_option == CPU_N9") (const_string "n9") + (match_test "nds32_cpu_option == CPU_N10") (const_string "n10") + (match_test "nds32_cpu_option == CPU_GRAYWOLF") (const_string "graywolf") + (match_test "nds32_cpu_option == CPU_N12") (const_string "n13") + (match_test "nds32_cpu_option == CPU_N13") (const_string "n13") + (match_test "nds32_cpu_option == CPU_SIMPLE") (const_string "simple")] + (const_string "n9")))) + ;; Insn type, it is used to default other attribute values. (define_attr "type" - "unknown,move,load,store,alu,compare,branch,call,misc" + "unknown,load,store,load_multiple,store_multiple,alu,alu_shift,pbsad,pbsada,mul,mac,div,branch,mmu,misc,\ + falu,fmuls,fmuld,fmacs,fmacd,fdivs,fdivd,fsqrts,fsqrtd,fcmp,fabs,fcpy,fcmov,fmfsr,fmfdr,fmtsr,fmtdr,fload,fstore,\ + dalu,dalu64,daluround,dcmp,dclip,dmul,dmac,dinsb,dpack,dbpick,dwext" (const_string "unknown")) +;; Insn sub-type +(define_attr "subtype" + "simple,shift,saturation" + (const_string "simple")) ;; Length, in bytes, default is 4-bytes. (define_attr "length" "" (const_int 4)) +;; Indicate the amount of micro instructions. +(define_attr "combo" + "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25" + (const_string "1")) + +;; Insn in which feature set, it is used to enable/disable insn alternatives. +;; v1 : Baseline Instructions +;; v2 : Baseline Version 2 Instructions +;; v3m : Baseline Version 3m Instructions +;; v3 : Baseline Version 3 Instructions +;; pe1 : Performance Extension Instructions +;; pe2 : Performance Extension Version 2 Instructions +;; se : String Extension instructions +(define_attr "feature" + "v1,v2,v3m,v3,pe1,pe2,se,fpu" + (const_string "v1")) ;; Enabled, which is used to enable/disable insn alternatives. ;; Note that we use length and TARGET_16_BIT here as criteria. -;; If the instruction pattern already check TARGET_16_BIT to -;; determine the length by itself, its enabled attribute should be -;; always 1 to avoid the conflict with the settings here. -(define_attr "enabled" "" - (cond [(and (eq_attr "length" "2") - (match_test "!TARGET_16_BIT")) - (const_int 0)] - (const_int 1))) +;; If the instruction pattern already check TARGET_16_BIT to determine +;; the length by itself, its enabled attribute should be customized to +;; avoid the conflict between length attribute and this default setting. +(define_attr "enabled" "no,yes" + (if_then_else + (and (eq_attr "length" "2") + (match_test "!TARGET_16_BIT")) + (const_string "no") + (cond [(eq_attr "feature" "v1") (const_string "yes") + (eq_attr "feature" "v2") (if_then_else (match_test "TARGET_ISA_V2 || TARGET_ISA_V3 || TARGET_ISA_V3M") + (const_string "yes") + (const_string "no")) + (eq_attr "feature" "v3") (if_then_else (match_test "TARGET_ISA_V3") + (const_string "yes") + (const_string "no")) + (eq_attr "feature" "v3m") (if_then_else (match_test "TARGET_ISA_V3 || TARGET_ISA_V3M") + (const_string "yes") + (const_string "no")) + (eq_attr "feature" "pe1") (if_then_else (match_test "TARGET_EXT_PERF") + (const_string "yes") + (const_string "no")) + (eq_attr "feature" "pe2") (if_then_else (match_test "TARGET_EXT_PERF2") + (const_string "yes") + (const_string "no")) + (eq_attr "feature" "se") (if_then_else (match_test "TARGET_EXT_STRING") + (const_string "yes") + (const_string "no")) + (eq_attr "feature" "fpu") (if_then_else (match_test "TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE") + (const_string "yes") + (const_string "no"))] + (const_string "yes")))) ;; ---------------------------------------------------------------------------- +(include "nds32-dspext.md") ;; Move instructions. ;; For QImode and HImode, the immediate value can be fit in imm20s. ;; So there is no need to split rtx for QI and HI patterns. -(define_expand "movqi" - [(set (match_operand:QI 0 "general_operand" "") - (match_operand:QI 1 "general_operand" ""))] +(define_expand "mov<mode>" + [(set (match_operand:QIHI 0 "general_operand" "") + (match_operand:QIHI 1 "general_operand" ""))] "" { /* Need to force register if mem <- !reg. */ if (MEM_P (operands[0]) && !REG_P (operands[1])) - operands[1] = force_reg (QImode, operands[1]); + operands[1] = force_reg (<MODE>mode, operands[1]); + + if (MEM_P (operands[1]) && optimize > 0) + { + rtx reg = gen_reg_rtx (SImode); + + emit_insn (gen_zero_extend<mode>si2 (reg, operands[1])); + operands[1] = gen_lowpart (<MODE>mode, reg); + } }) -(define_expand "movhi" - [(set (match_operand:HI 0 "general_operand" "") - (match_operand:HI 1 "general_operand" ""))] +(define_expand "movmisalign<mode>" + [(set (match_operand:SIDI 0 "general_operand" "") + (match_operand:SIDI 1 "general_operand" ""))] "" { - /* Need to force register if mem <- !reg. */ + rtx addr; if (MEM_P (operands[0]) && !REG_P (operands[1])) - operands[1] = force_reg (HImode, operands[1]); + operands[1] = force_reg (<MODE>mode, operands[1]); + + if (MEM_P (operands[0])) + { + addr = force_reg (Pmode, XEXP (operands[0], 0)); + emit_insn (gen_unaligned_store<mode> (addr, operands[1])); + } + else + { + addr = force_reg (Pmode, XEXP (operands[1], 0)); + emit_insn (gen_unaligned_load<mode> (operands[0], addr)); + } + DONE; }) (define_expand "movsi" @@ -130,12 +215,34 @@ low12_int)); DONE; } + + if ((REG_P (operands[0]) || GET_CODE (operands[0]) == SUBREG) + && SYMBOLIC_CONST_P (operands[1])) + { + if (TARGET_ICT_MODEL_LARGE + && nds32_indirect_call_referenced_p (operands[1])) + { + nds32_expand_ict_move (operands); + DONE; + } + else if (nds32_tls_referenced_p (operands [1])) + { + nds32_expand_tls_move (operands); + DONE; + } + else if (flag_pic) + { + nds32_expand_pic_move (operands); + DONE; + } + } }) (define_insn "*mov<mode>" - [(set (match_operand:QIHISI 0 "nonimmediate_operand" "=r, r, U45, U33, U37, U45, m, l, l, l, d, r, d, r, r, r") - (match_operand:QIHISI 1 "nds32_move_operand" " r, r, l, l, l, d, r, U45, U33, U37, U45, m, Ip05, Is05, Is20, Ihig"))] - "" + [(set (match_operand:QIHISI 0 "nonimmediate_operand" "=r, r,U45,U33,U37,U45, m, l, l, l, d, d, r, d, r, r, r, *f, *f, r, *f, Q") + (match_operand:QIHISI 1 "nds32_move_operand" " r, r, l, l, l, d, r,U45,U33,U37,U45,Ufe, m,Ip05, Is05, Is20, Ihig, *f, r, *f, Q, *f"))] + "register_operand(operands[0], <MODE>mode) + || register_operand(operands[1], <MODE>mode)" { switch (which_alternative) { @@ -154,37 +261,52 @@ case 8: case 9: case 10: + case 11: return nds32_output_16bit_load (operands, <byte>); - case 11: + case 12: return nds32_output_32bit_load (operands, <byte>); - case 12: - return "movpi45\t%0, %1"; case 13: + return "movpi45\t%0, %1"; + case 14: return "movi55\t%0, %1"; - case 14: + case 15: return "movi\t%0, %1"; - case 15: + case 16: return "sethi\t%0, hi20(%1)"; + case 17: + if (TARGET_FPU_SINGLE) + return "fcpyss\t%0, %1, %1"; + else + return "#"; + case 18: + return "fmtsr\t%1, %0"; + case 19: + return "fmfsr\t%0, %1"; + case 20: + return nds32_output_float_load (operands); + case 21: + return nds32_output_float_store (operands); default: gcc_unreachable (); } } - [(set_attr "type" "alu,alu,store,store,store,store,store,load,load,load,load,load,alu,alu,alu,alu") - (set_attr "length" " 2, 4, 2, 2, 2, 2, 4, 2, 2, 2, 2, 4, 2, 2, 4, 4")]) + [(set_attr "type" "alu,alu,store,store,store,store,store,load,load,load,load,load,load,alu,alu,alu,alu,fcpy,fmtsr,fmfsr,fload,fstore") + (set_attr "length" " 2, 4, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 4, 2, 2, 4, 4, 4, 4, 4, 4, 4") + (set_attr "feature" " v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, v1, v3m, v1, v1, v1, v1, v1, fpu, fpu, fpu, fpu, fpu")]) ;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF ;; are able to match such instruction template. -(define_insn "*move_addr" - [(set (match_operand:SI 0 "register_operand" "=l, r") - (match_operand:SI 1 "nds32_symbolic_operand" " i, i"))] +(define_insn "move_addr" + [(set (match_operand:SI 0 "nds32_general_register_operand" "=l, r") + (match_operand:SI 1 "nds32_nonunspec_symbolic_operand" " i, i"))] "" "la\t%0, %1" - [(set_attr "type" "move") + [(set_attr "type" "alu") (set_attr "length" "8")]) -(define_insn "*sethi" +(define_insn "sethi" [(set (match_operand:SI 0 "register_operand" "=r") (high:SI (match_operand:SI 1 "nds32_symbolic_operand" " i")))] "" @@ -193,7 +315,7 @@ (set_attr "length" "4")]) -(define_insn "*lo_sum" +(define_insn "lo_sum" [(set (match_operand:SI 0 "register_operand" "=r") (lo_sum:SI (match_operand:SI 1 "register_operand" " r") (match_operand:SI 2 "nds32_symbolic_operand" " i")))] @@ -256,13 +378,58 @@ ;; ---------------------------------------------------------------------------- +(define_expand "extv" + [(set (match_operand 0 "register_operand" "") + (sign_extract (match_operand 1 "nonimmediate_operand" "") + (match_operand 2 "const_int_operand" "") + (match_operand 3 "const_int_operand" "")))] + "" +{ + enum nds32_expand_result_type result = nds32_expand_extv (operands); + switch (result) + { + case EXPAND_DONE: + DONE; + break; + case EXPAND_FAIL: + FAIL; + break; + case EXPAND_CREATE_TEMPLATE: + break; + default: + gcc_unreachable (); + } +}) + +(define_expand "insv" + [(set (zero_extract (match_operand 0 "nonimmediate_operand" "") + (match_operand 1 "const_int_operand" "") + (match_operand 2 "const_int_operand" "")) + (match_operand 3 "register_operand" ""))] + "" +{ + enum nds32_expand_result_type result = nds32_expand_insv (operands); + switch (result) + { + case EXPAND_DONE: + DONE; + break; + case EXPAND_FAIL: + FAIL; + break; + case EXPAND_CREATE_TEMPLATE: + break; + default: + gcc_unreachable (); + } +}) ;; Arithmetic instructions. -(define_insn "add<mode>3" - [(set (match_operand:QIHISI 0 "register_operand" "= d, l, d, l, d, l, k, l, r, r") - (plus:QIHISI (match_operand:QIHISI 1 "register_operand" "% 0, l, 0, l, 0, l, 0, k, r, r") - (match_operand:QIHISI 2 "nds32_rimm15s_operand" " In05, In03, Iu05, Iu03, r, l, Is10, Iu06, Is15, r")))] +(define_insn "addsi3" + [(set (match_operand:SI 0 "register_operand" "= d, l, d, l, d, l, k, l, r, r") + (plus:SI (match_operand:SI 1 "register_operand" "% 0, l, 0, l, 0, l, 0, k, r, r") + (match_operand:SI 2 "nds32_rimm15s_operand" " In05,In03,Iu05,Iu03, r, l,Is10,IU06, Is15, r")))] "" { switch (which_alternative) @@ -298,19 +465,20 @@ gcc_unreachable (); } } - [(set_attr "type" "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu") - (set_attr "length" " 2, 2, 2, 2, 2, 2, 2, 2, 4, 4")]) + [(set_attr "type" "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu") + (set_attr "length" " 2, 2, 2, 2, 2, 2, 2, 2, 4, 4") + (set_attr "feature" " v1, v1, v1, v1, v1, v1, v2, v1, v1, v1")]) -(define_insn "sub<mode>3" - [(set (match_operand:QIHISI 0 "register_operand" "=d, l, r, r") - (minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" " 0, l, Is15, r") - (match_operand:QIHISI 2 "register_operand" " r, l, r, r")))] +(define_insn "subsi3" + [(set (match_operand:SI 0 "register_operand" "=d, l, r, r") + (minus:SI (match_operand:SI 1 "nds32_rimm15s_operand" " 0, l, Is15, r") + (match_operand:SI 2 "register_operand" " r, l, r, r")))] "" "@ - sub45\t%0, %2 - sub333\t%0, %1, %2 - subri\t%0, %2, %1 - sub\t%0, %1, %2" + sub45\t%0, %2 + sub333\t%0, %1, %2 + subri\t%0, %2, %1 + sub\t%0, %1, %2" [(set_attr "type" "alu,alu,alu,alu") (set_attr "length" " 2, 2, 4, 4")]) @@ -323,7 +491,7 @@ (plus:SI (mult:SI (match_operand:SI 1 "register_operand" " r") (match_operand:SI 2 "immediate_operand" " i")) (match_operand:SI 3 "register_operand" " r")))] - "TARGET_ISA_V3 + "TARGET_ISA_V3 && optimize_size && (exact_log2 (INTVAL (operands[2])) != -1) && (exact_log2 (INTVAL (operands[2])) <= 31)" { @@ -333,18 +501,20 @@ return "add_slli\t%0, %3, %1, %2"; } - [(set_attr "type" "alu") - (set_attr "length" "4")]) + [(set_attr "type" "alu_shift") + (set_attr "combo" "2") + (set_attr "length" "4")]) (define_insn "*add_srli" - [(set (match_operand:SI 0 "register_operand" "= r") - (plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") - (match_operand:SI 2 "immediate_operand" " Iu05")) - (match_operand:SI 3 "register_operand" " r")))] - "TARGET_ISA_V3" + [(set (match_operand:SI 0 "register_operand" "= r") + (plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "nds32_imm5u_operand" " Iu05")) + (match_operand:SI 3 "register_operand" " r")))] + "TARGET_ISA_V3 && optimize_size" "add_srli\t%0, %3, %1, %2" - [(set_attr "type" "alu") - (set_attr "length" "4")]) + [(set_attr "type" "alu_shift") + (set_attr "combo" "2") + (set_attr "length" "4")]) ;; GCC intends to simplify (minus (reg) (ashift ...)) @@ -355,7 +525,7 @@ (minus:SI (match_operand:SI 1 "register_operand" " r") (mult:SI (match_operand:SI 2 "register_operand" " r") (match_operand:SI 3 "immediate_operand" " i"))))] - "TARGET_ISA_V3 + "TARGET_ISA_V3 && optimize_size && (exact_log2 (INTVAL (operands[3])) != -1) && (exact_log2 (INTVAL (operands[3])) <= 31)" { @@ -365,18 +535,20 @@ return "sub_slli\t%0, %1, %2, %3"; } - [(set_attr "type" "alu") - (set_attr "length" "4")]) + [(set_attr "type" "alu_shift") + (set_attr "combo" "2") + (set_attr "length" "4")]) (define_insn "*sub_srli" - [(set (match_operand:SI 0 "register_operand" "= r") - (minus:SI (match_operand:SI 1 "register_operand" " r") - (lshiftrt:SI (match_operand:SI 2 "register_operand" " r") - (match_operand:SI 3 "immediate_operand" " Iu05"))))] - "TARGET_ISA_V3" + [(set (match_operand:SI 0 "register_operand" "= r") + (minus:SI (match_operand:SI 1 "register_operand" " r") + (lshiftrt:SI (match_operand:SI 2 "register_operand" " r") + (match_operand:SI 3 "nds32_imm5u_operand" " Iu05"))))] + "TARGET_ISA_V3 && optimize_size" "sub_srli\t%0, %1, %2, %3" - [(set_attr "type" "alu") - (set_attr "length" "4")]) + [(set_attr "type" "alu_shift") + (set_attr "combo" "2") + (set_attr "length" "4")]) ;; Multiplication instructions. @@ -387,10 +559,11 @@ (match_operand:SI 2 "register_operand" " w, r")))] "" "@ - mul33\t%0, %2 - mul\t%0, %1, %2" - [(set_attr "type" "alu,alu") - (set_attr "length" " 2, 4")]) + mul33\t%0, %2 + mul\t%0, %1, %2" + [(set_attr "type" "mul,mul") + (set_attr "length" " 2, 4") + (set_attr "feature" "v3m, v1")]) (define_insn "mulsidi3" [(set (match_operand:DI 0 "register_operand" "=r") @@ -398,7 +571,7 @@ (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))))] "TARGET_ISA_V2 || TARGET_ISA_V3" "mulsr64\t%0, %1, %2" - [(set_attr "type" "alu") + [(set_attr "type" "mul") (set_attr "length" "4")]) (define_insn "umulsidi3" @@ -407,7 +580,7 @@ (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))))] "TARGET_ISA_V2 || TARGET_ISA_V3" "mulr64\t%0, %1, %2" - [(set_attr "type" "alu") + [(set_attr "type" "mul") (set_attr "length" "4")]) @@ -420,7 +593,7 @@ (match_operand:SI 2 "register_operand" " r"))))] "" "maddr32\t%0, %1, %2" - [(set_attr "type" "alu") + [(set_attr "type" "mac") (set_attr "length" "4")]) (define_insn "*maddr32_1" @@ -430,7 +603,7 @@ (match_operand:SI 3 "register_operand" " 0")))] "" "maddr32\t%0, %1, %2" - [(set_attr "type" "alu") + [(set_attr "type" "mac") (set_attr "length" "4")]) (define_insn "*msubr32" @@ -440,7 +613,7 @@ (match_operand:SI 2 "register_operand" " r"))))] "" "msubr32\t%0, %1, %2" - [(set_attr "type" "alu") + [(set_attr "type" "mac") (set_attr "length" "4")]) @@ -454,7 +627,7 @@ (mod:SI (match_dup 1) (match_dup 2)))] "" "divsr\t%0, %3, %1, %2" - [(set_attr "type" "alu") + [(set_attr "type" "div") (set_attr "length" "4")]) (define_insn "udivmodsi4" @@ -465,9 +638,29 @@ (umod:SI (match_dup 1) (match_dup 2)))] "" "divr\t%0, %3, %1, %2" - [(set_attr "type" "alu") + [(set_attr "type" "div") (set_attr "length" "4")]) +;; divsr/divr will keep quotient only when quotient and remainder is the same +;; register in our ISA spec, it's can reduce 1 register presure if we don't +;; want remainder. +(define_insn "divsi4" + [(set (match_operand:SI 0 "register_operand" "=r") + (div:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "register_operand" " r")))] + "" + "divsr\t%0, %0, %1, %2" + [(set_attr "type" "div") + (set_attr "length" "4")]) + +(define_insn "udivsi4" + [(set (match_operand:SI 0 "register_operand" "=r") + (udiv:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "register_operand" " r")))] + "" + "divr\t%0, %0, %1, %2" + [(set_attr "type" "div") + (set_attr "length" "4")]) ;; ---------------------------------------------------------------------------- @@ -488,14 +681,28 @@ (set_attr "length" "4")] ) -(define_insn "andsi3" - [(set (match_operand:SI 0 "register_operand" "=w, r, l, l, l, l, l, l, r, r, r, r, r") - (and:SI (match_operand:SI 1 "register_operand" "%0, r, l, l, l, l, 0, 0, r, r, r, r, r") - (match_operand:SI 2 "general_operand" " w, r, Izeb, Izeh, Ixls, Ix11, Ibms, Ifex, Izeb, Izeh, Iu15, Ii15, Ic15")))] +(define_expand "andsi3" + [(set (match_operand:SI 0 "register_operand" "") + (and:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nds32_reg_constant_operand" "")))] + "" +{ + if (CONST_INT_P (operands[2]) + && !nds32_and_operand (operands[2], SImode)) + { + nds32_expand_constant (SImode, INTVAL (operands[2]), + operands[0], operands[1]); + DONE; + } +}) + +(define_insn "*andsi3" + [(set (match_operand:SI 0 "register_operand" "=l, r, l, l, l, l, l, l, r, r, r, r, r") + (and:SI (match_operand:SI 1 "register_operand" "%0, r, l, l, l, l, 0, 0, r, r, r, r, r") + (match_operand:SI 2 "nds32_and_operand" " l, r,Izeb,Izeh,Ixls,Ix11,Ibms,Ifex, Izeb, Izeh, Iu15, Ii15, Ic15")))] "" { HOST_WIDE_INT mask = INTVAL (operands[2]); - int zero_position; /* 16-bit andi instructions: andi Rt3,Ra3,0xff -> zeb33 Rt3,Ra3 @@ -520,8 +727,7 @@ case 5: return "x11b33\t%0, %1"; case 6: - operands[2] = GEN_INT (floor_log2 (mask)); - return "bmski33\t%0, %2"; + return "bmski33\t%0, %B2"; case 7: operands[2] = GEN_INT (floor_log2 (mask + 1) - 1); return "fexti33\t%0, %2"; @@ -535,47 +741,35 @@ operands[2] = GEN_INT (~mask); return "bitci\t%0, %1, %2"; case 12: - /* If we reach this alternative, - it must pass the nds32_can_use_bclr_p() test, - so that we can guarantee there is only one 0-bit - within the immediate value. */ - for (zero_position = 31; zero_position >= 0; zero_position--) - { - if ((INTVAL (operands[2]) & (1 << zero_position)) == 0) - { - /* Found the 0-bit position. */ - operands[2] = GEN_INT (zero_position); - break; - } - } - return "bclr\t%0, %1, %2"; + return "bclr\t%0, %1, %b2"; default: gcc_unreachable (); } } - [(set_attr "type" "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu") - (set_attr "length" " 2, 4, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4")]) + [(set_attr "type" "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu") + (set_attr "length" " 2, 4, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4") + (set_attr "feature" "v3m, v1, v1, v1, v1, v1,v3m,v3m, v1, v1, v1, v3,pe1")]) (define_insn "*and_slli" - [(set (match_operand:SI 0 "register_operand" "= r") - (and:SI (ashift:SI (match_operand:SI 1 "register_operand" " r") - (match_operand:SI 2 "immediate_operand" " Iu05")) - (match_operand:SI 3 "register_operand" " r")))] - "TARGET_ISA_V3" + [(set (match_operand:SI 0 "register_operand" "= r") + (and:SI (ashift:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "nds32_imm5u_operand" " Iu05")) + (match_operand:SI 3 "register_operand" " r")))] + "TARGET_ISA_V3 && optimize_size" "and_slli\t%0, %3, %1, %2" - [(set_attr "type" "alu") - (set_attr "length" "4")]) + [(set_attr "type" "alu_shift") + (set_attr "length" "4")]) (define_insn "*and_srli" - [(set (match_operand:SI 0 "register_operand" "= r") - (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") - (match_operand:SI 2 "immediate_operand" " Iu05")) - (match_operand:SI 3 "register_operand" " r")))] - "TARGET_ISA_V3" + [(set (match_operand:SI 0 "register_operand" "= r") + (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "nds32_imm5u_operand" " Iu05")) + (match_operand:SI 3 "register_operand" " r")))] + "TARGET_ISA_V3 && optimize_size" "and_srli\t%0, %3, %1, %2" - [(set_attr "type" "alu") - (set_attr "length" "4")]) + [(set_attr "type" "alu_shift") + (set_attr "length" "4")]) ;; ---------------------------------------------------------------------------- @@ -584,58 +778,50 @@ ;; For V3/V3M ISA, we have 'or33' instruction. ;; So we can identify 'or Rt3,Rt3,Ra3' case and set its length to be 2. -(define_insn "iorsi3" - [(set (match_operand:SI 0 "register_operand" "=w, r, r, r") - (ior:SI (match_operand:SI 1 "register_operand" "%0, r, r, r") - (match_operand:SI 2 "general_operand" " w, r, Iu15, Ie15")))] + +(define_expand "iorsi3" + [(set (match_operand:SI 0 "register_operand" "") + (ior:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "general_operand" "")))] "" { - int one_position; + if (!nds32_ior_operand (operands[2], SImode)) + operands[2] = force_reg (SImode, operands[2]); +}) - switch (which_alternative) - { - case 0: - return "or33\t%0, %2"; - case 1: - return "or\t%0, %1, %2"; - case 2: - return "ori\t%0, %1, %2"; - case 3: - /* If we reach this alternative, - it must pass the nds32_can_use_bset_p() test, - so that we can guarantee there is only one 1-bit - within the immediate value. */ - /* Use exact_log2() to search the 1-bit position. */ - one_position = exact_log2 (INTVAL (operands[2])); - operands[2] = GEN_INT (one_position); - return "bset\t%0, %1, %2"; - - default: - gcc_unreachable (); - } -} - [(set_attr "type" "alu,alu,alu,alu") - (set_attr "length" " 2, 4, 4, 4")]) +(define_insn "*iorsi3" + [(set (match_operand:SI 0 "register_operand" "=l, r, r, r") + (ior:SI (match_operand:SI 1 "register_operand" "%0, r, r, r") + (match_operand:SI 2 "nds32_ior_operand" " l, r, Iu15, Ie15")))] + "" + "@ + or33\t%0, %2 + or\t%0, %1, %2 + ori\t%0, %1, %2 + bset\t%0, %1, %B2" + [(set_attr "type" "alu,alu,alu,alu") + (set_attr "length" " 2, 4, 4, 4") + (set_attr "feature" "v3m, v1, v1,pe1")]) (define_insn "*or_slli" - [(set (match_operand:SI 0 "register_operand" "= r") - (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" " r") - (match_operand:SI 2 "immediate_operand" " Iu05")) - (match_operand:SI 3 "register_operand" " r")))] - "TARGET_ISA_V3" + [(set (match_operand:SI 0 "register_operand" "= r") + (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "nds32_imm5u_operand" " Iu05")) + (match_operand:SI 3 "register_operand" " r")))] + "TARGET_ISA_V3 && optimize_size" "or_slli\t%0, %3, %1, %2" - [(set_attr "type" "alu") - (set_attr "length" "4")]) + [(set_attr "type" "alu_shift") + (set_attr "length" "4")]) (define_insn "*or_srli" - [(set (match_operand:SI 0 "register_operand" "= r") - (ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") - (match_operand:SI 2 "immediate_operand" " Iu05")) - (match_operand:SI 3 "register_operand" " r")))] - "TARGET_ISA_V3" + [(set (match_operand:SI 0 "register_operand" "= r") + (ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "nds32_imm5u_operand" " Iu05")) + (match_operand:SI 3 "register_operand" " r")))] + "TARGET_ISA_V3 && optimize_size" "or_srli\t%0, %3, %1, %2" - [(set_attr "type" "alu") - (set_attr "length" "4")]) + [(set_attr "type" "alu_shift") + (set_attr "length" "4")]) ;; ---------------------------------------------------------------------------- @@ -644,71 +830,64 @@ ;; For V3/V3M ISA, we have 'xor33' instruction. ;; So we can identify 'xor Rt3,Rt3,Ra3' case and set its length to be 2. -(define_insn "xorsi3" - [(set (match_operand:SI 0 "register_operand" "=w, r, r, r") - (xor:SI (match_operand:SI 1 "register_operand" "%0, r, r, r") - (match_operand:SI 2 "general_operand" " w, r, Iu15, It15")))] + +(define_expand "xorsi3" + [(set (match_operand:SI 0 "register_operand" "") + (xor:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "general_operand" "")))] "" { - int one_position; + if (!nds32_xor_operand (operands[2], SImode)) + operands[2] = force_reg (SImode, operands[2]); +}) - switch (which_alternative) - { - case 0: - return "xor33\t%0, %2"; - case 1: - return "xor\t%0, %1, %2"; - case 2: - return "xori\t%0, %1, %2"; - case 3: - /* If we reach this alternative, - it must pass the nds32_can_use_btgl_p() test, - so that we can guarantee there is only one 1-bit - within the immediate value. */ - /* Use exact_log2() to search the 1-bit position. */ - one_position = exact_log2 (INTVAL (operands[2])); - operands[2] = GEN_INT (one_position); - return "btgl\t%0, %1, %2"; - - default: - gcc_unreachable (); - } -} - [(set_attr "type" "alu,alu,alu,alu") - (set_attr "length" " 2, 4, 4, 4")]) +(define_insn "*xorsi3" + [(set (match_operand:SI 0 "register_operand" "=l, r, r, r") + (xor:SI (match_operand:SI 1 "register_operand" "%0, r, r, r") + (match_operand:SI 2 "nds32_xor_operand" " l, r, Iu15, It15")))] + "" + "@ + xor33\t%0, %2 + xor\t%0, %1, %2 + xori\t%0, %1, %2 + btgl\t%0, %1, %B2" + [(set_attr "type" "alu,alu,alu,alu") + (set_attr "length" " 2, 4, 4, 4") + (set_attr "feature" "v3m, v1, v1,pe1")]) (define_insn "*xor_slli" [(set (match_operand:SI 0 "register_operand" "= r") (xor:SI (ashift:SI (match_operand:SI 1 "register_operand" " r") - (match_operand:SI 2 "immediate_operand" " Iu05")) + (match_operand:SI 2 "nds32_imm5u_operand" " Iu05")) (match_operand:SI 3 "register_operand" " r")))] - "TARGET_ISA_V3" + "TARGET_ISA_V3 && optimize_size" "xor_slli\t%0, %3, %1, %2" - [(set_attr "type" "alu") - (set_attr "length" "4")]) + [(set_attr "type" "alu_shift") + (set_attr "length" "4")]) (define_insn "*xor_srli" - [(set (match_operand:SI 0 "register_operand" "= r") - (xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") - (match_operand:SI 2 "immediate_operand" " Iu05")) - (match_operand:SI 3 "register_operand" " r")))] - "TARGET_ISA_V3" + [(set (match_operand:SI 0 "register_operand" "= r") + (xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" " r") + (match_operand:SI 2 "nds32_imm5u_operand" " Iu05")) + (match_operand:SI 3 "register_operand" " r")))] + "TARGET_ISA_V3 && optimize_size" "xor_srli\t%0, %3, %1, %2" - [(set_attr "type" "alu") - (set_attr "length" "4")]) + [(set_attr "type" "alu_shift") + (set_attr "length" "4")]) ;; Rotate Right Instructions. -(define_insn "rotrsi3" - [(set (match_operand:SI 0 "register_operand" "= r, r") - (rotatert:SI (match_operand:SI 1 "register_operand" " r, r") - (match_operand:SI 2 "nonmemory_operand" " Iu05, r")))] +(define_insn "*rotrsi3" + [(set (match_operand:SI 0 "register_operand" "= r, r") + (rotatert:SI (match_operand:SI 1 "register_operand" " r, r") + (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r")))] "" "@ - rotri\t%0, %1, %2 - rotr\t%0, %1, %2" - [(set_attr "type" "alu,alu") - (set_attr "length" " 4, 4")]) + rotri\t%0, %1, %2 + rotr\t%0, %1, %2" + [(set_attr "type" " alu, alu") + (set_attr "subtype" "shift,shift") + (set_attr "length" " 4, 4")]) ;; ---------------------------------------------------------------------------- @@ -720,14 +899,95 @@ ;; And for V2 ISA, there is NO 'neg33' instruction. ;; The only option is to use 'subri A,B,0' (its semantic is 'A = 0 - B'). (define_insn "negsi2" - [(set (match_operand:SI 0 "register_operand" "=w, r") - (neg:SI (match_operand:SI 1 "register_operand" " w, r")))] + [(set (match_operand:SI 0 "register_operand" "=l, r") + (neg:SI (match_operand:SI 1 "register_operand" " l, r")))] "" "@ neg33\t%0, %1 subri\t%0, %1, 0" - [(set_attr "type" "alu,alu") - (set_attr "length" " 2, 4")]) + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4") + (set_attr "feature" "v3m, v1")]) + +(define_expand "negsf2" + [(set (match_operand:SF 0 "register_operand" "") + (neg:SF (match_operand:SF 1 "register_operand" "")))] + "" +{ + if (!TARGET_FPU_SINGLE && !TARGET_EXT_PERF) + { + rtx new_dst = simplify_gen_subreg (SImode, operands[0], SFmode, 0); + rtx new_src = simplify_gen_subreg (SImode, operands[1], SFmode, 0); + + emit_insn (gen_xorsi3 (new_dst, + new_src, + gen_int_mode (0x80000000, SImode))); + + DONE; + } +}) + +(define_expand "negdf2" + [(set (match_operand:DF 0 "register_operand" "") + (neg:DF (match_operand:DF 1 "register_operand" "")))] + "" +{ +}) + +(define_insn_and_split "soft_negdf2" + [(set (match_operand:DF 0 "register_operand" "") + (neg:DF (match_operand:DF 1 "register_operand" "")))] + "!TARGET_FPU_DOUBLE" + "#" + "!TARGET_FPU_DOUBLE" + [(const_int 1)] +{ + rtx src = operands[1]; + rtx dst = operands[0]; + rtx ori_dst = operands[0]; + + bool need_extra_move_for_dst_p; + /* FPU register can't change mode to SI directly, so we need create a + tmp register to handle it, and FPU register can't do `xor` or btgl. */ + if (HARD_REGISTER_P (src) + && TEST_HARD_REG_BIT (reg_class_contents[FP_REGS], REGNO (src))) + { + rtx tmp = gen_reg_rtx (DFmode); + emit_move_insn (tmp, src); + src = tmp; + } + + if (HARD_REGISTER_P (dst) + && TEST_HARD_REG_BIT (reg_class_contents[FP_REGS], REGNO (dst))) + { + need_extra_move_for_dst_p = true; + rtx tmp = gen_reg_rtx (DFmode); + dst = tmp; + } + + rtx dst_high_part = simplify_gen_subreg ( + SImode, dst, + DFmode, subreg_highpart_offset (SImode, DFmode)); + rtx dst_low_part = simplify_gen_subreg ( + SImode, dst, + DFmode, subreg_lowpart_offset (SImode, DFmode)); + rtx src_high_part = simplify_gen_subreg ( + SImode, src, + DFmode, subreg_highpart_offset (SImode, DFmode)); + rtx src_low_part = simplify_gen_subreg ( + SImode, src, + DFmode, subreg_lowpart_offset (SImode, DFmode)); + + emit_insn (gen_xorsi3 (dst_high_part, + src_high_part, + gen_int_mode (0x80000000, SImode))); + emit_move_insn (dst_low_part, src_low_part); + + if (need_extra_move_for_dst_p) + emit_move_insn (ori_dst, dst); + + DONE; +}) ;; ---------------------------------------------------------------------------- @@ -743,49 +1003,66 @@ "@ not33\t%0, %1 nor\t%0, %1, %1" - [(set_attr "type" "alu,alu") - (set_attr "length" " 2, 4")]) + [(set_attr "type" "alu,alu") + (set_attr "length" " 2, 4") + (set_attr "feature" "v3m, v1")]) ;; ---------------------------------------------------------------------------- ;; Shift instructions. -(define_insn "ashlsi3" - [(set (match_operand:SI 0 "register_operand" "= l, r, r") - (ashift:SI (match_operand:SI 1 "register_operand" " l, r, r") - (match_operand:SI 2 "nonmemory_operand" " Iu03, Iu05, r")))] +(define_expand "<shift>si3" + [(set (match_operand:SI 0 "register_operand" "") + (shift_rotate:SI (match_operand:SI 1 "register_operand" "") + (match_operand:SI 2 "nds32_rimm5u_operand" "")))] "" - "@ - slli333\t%0, %1, %2 - slli\t%0, %1, %2 - sll\t%0, %1, %2" - [(set_attr "type" "alu,alu,alu") - (set_attr "length" " 2, 4, 4")]) +{ + if (operands[2] == const0_rtx) + { + emit_move_insn (operands[0], operands[1]); + DONE; + } +}) -(define_insn "ashrsi3" - [(set (match_operand:SI 0 "register_operand" "= d, r, r") - (ashiftrt:SI (match_operand:SI 1 "register_operand" " 0, r, r") - (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))] +(define_insn "*ashlsi3" + [(set (match_operand:SI 0 "register_operand" "= l, r, r") + (ashift:SI (match_operand:SI 1 "register_operand" " l, r, r") + (match_operand:SI 2 "nds32_rimm5u_operand" " Iu03, Iu05, r")))] "" "@ - srai45\t%0, %2 - srai\t%0, %1, %2 - sra\t%0, %1, %2" - [(set_attr "type" "alu,alu,alu") - (set_attr "length" " 2, 4, 4")]) + slli333\t%0, %1, %2 + slli\t%0, %1, %2 + sll\t%0, %1, %2" + [(set_attr "type" " alu, alu, alu") + (set_attr "subtype" "shift,shift,shift") + (set_attr "length" " 2, 4, 4")]) -(define_insn "lshrsi3" - [(set (match_operand:SI 0 "register_operand" "= d, r, r") - (lshiftrt:SI (match_operand:SI 1 "register_operand" " 0, r, r") - (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))] +(define_insn "*ashrsi3" + [(set (match_operand:SI 0 "register_operand" "= d, r, r") + (ashiftrt:SI (match_operand:SI 1 "register_operand" " 0, r, r") + (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, Iu05, r")))] "" "@ - srli45\t%0, %2 - srli\t%0, %1, %2 - srl\t%0, %1, %2" - [(set_attr "type" "alu,alu,alu") - (set_attr "length" " 2, 4, 4")]) + srai45\t%0, %2 + srai\t%0, %1, %2 + sra\t%0, %1, %2" + [(set_attr "type" " alu, alu, alu") + (set_attr "subtype" "shift,shift,shift") + (set_attr "length" " 2, 4, 4")]) + +(define_insn "*lshrsi3" + [(set (match_operand:SI 0 "register_operand" "= d, r, r") + (lshiftrt:SI (match_operand:SI 1 "register_operand" " 0, r, r") + (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, Iu05, r")))] + "" + "@ + srli45\t%0, %2 + srli\t%0, %1, %2 + srl\t%0, %1, %2" + [(set_attr "type" " alu, alu, alu") + (set_attr "subtype" "shift,shift,shift") + (set_attr "length" " 2, 4, 4")]) ;; ---------------------------------------------------------------------------- @@ -794,148 +1071,65 @@ ;; Conditional Move patterns ;; ---------------------------------------------------------------------------- -(define_expand "movsicc" - [(set (match_operand:SI 0 "register_operand" "") - (if_then_else:SI (match_operand 1 "comparison_operator" "") - (match_operand:SI 2 "register_operand" "") - (match_operand:SI 3 "register_operand" "")))] - "TARGET_CMOV" +(define_expand "mov<mode>cc" + [(set (match_operand:QIHISI 0 "register_operand" "") + (if_then_else:QIHISI (match_operand 1 "nds32_movecc_comparison_operator" "") + (match_operand:QIHISI 2 "register_operand" "") + (match_operand:QIHISI 3 "register_operand" "")))] + "TARGET_CMOV && !optimize_size" { - if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE) - && GET_MODE (XEXP (operands[1], 0)) == SImode - && XEXP (operands[1], 1) == const0_rtx) - { - /* If the operands[1] rtx is already (eq X 0) or (ne X 0), - we have gcc generate original template rtx. */ - goto create_template; - } - else + enum nds32_expand_result_type result = nds32_expand_movcc (operands); + switch (result) { - /* Since there is only 'slt'(Set when Less Than) instruction for - comparison in Andes ISA, the major strategy we use here is to - convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination. - We design constraints properly so that the reload phase will assist - to make one source operand to use same register as result operand. - Then we can use cmovz/cmovn to catch the other source operand - which has different register. */ - enum rtx_code code = GET_CODE (operands[1]); - enum rtx_code new_code = code; - rtx cmp_op0 = XEXP (operands[1], 0); - rtx cmp_op1 = XEXP (operands[1], 1); - rtx tmp; - int reverse = 0; - - /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part - Strategy : Reverse condition and swap comparison operands - - For example: - - a <= b ? P : Q (LE or LEU) - --> a > b ? Q : P (reverse condition) - --> b < a ? Q : P (swap comparison operands to achieve 'LT/LTU') - - a >= b ? P : Q (GE or GEU) - --> a < b ? Q : P (reverse condition to achieve 'LT/LTU') - - a < b ? P : Q (LT or LTU) - --> (NO NEED TO CHANGE, it is already 'LT/LTU') - - a > b ? P : Q (GT or GTU) - --> b < a ? P : Q (swap comparison operands to achieve 'LT/LTU') */ - switch (code) - { - case NE: - /* (a != b ? P : Q) - can be expressed as - (a == b ? Q : P) - so, fall through to reverse condition */ - case GE: case GEU: case LE: case LEU: - new_code = reverse_condition (code); - reverse = 1; - break; - case EQ: case GT: case GTU: case LT: case LTU: - /* no need to reverse condition */ - break; - default: - FAIL; - } - - /* For '>' comparison operator, we swap operands - so that we can have 'LT/LTU' operator. */ - if (new_code == GT || new_code == GTU) - { - tmp = cmp_op0; - cmp_op0 = cmp_op1; - cmp_op1 = tmp; - - new_code = swap_condition (new_code); - } - - /* Use a temporary register to store slt/slts result. */ - tmp = gen_reg_rtx (SImode); - - /* Split EQ and NE because we don't have direct comparison of EQ and NE. - If we don't split it, the conditional move transformation will fail - when producing (SET A (EQ B C)) or (SET A (NE B C)). */ - if (new_code == EQ) - { - emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1)); - emit_insn (gen_slt_compare (tmp, tmp, GEN_INT (1))); - } - else if (new_code == NE) - { - emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1)); - emit_insn (gen_slt_compare (tmp, GEN_INT (0), tmp)); - } - else - /* This emit_insn will create corresponding 'slt/slts' insturction. */ - emit_insn (gen_rtx_SET (tmp, gen_rtx_fmt_ee (new_code, SImode, - cmp_op0, cmp_op1))); - - /* Change comparison semantic into (eq X 0) or (ne X 0) behavior - so that cmovz or cmovn will be matched later. - - For reverse condition cases, we want to create a semantic that: - (eq X 0) --> pick up "else" part - For normal cases, we want to create a semantic that: - (ne X 0) --> pick up "then" part - - Later we will have cmovz/cmovn instruction pattern to - match corresponding behavior and output instruction. */ - operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE, - VOIDmode, tmp, const0_rtx); + case EXPAND_DONE: + DONE; + break; + case EXPAND_FAIL: + FAIL; + break; + case EXPAND_CREATE_TEMPLATE: + break; + default: + gcc_unreachable (); } - -create_template: - do {} while(0); /* dummy line */ }) -(define_insn "cmovz" - [(set (match_operand:SI 0 "register_operand" "=r, r") - (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r") +(define_insn "cmovz<mode>" + [(set (match_operand:QIHISI 0 "register_operand" "=r, r") + (if_then_else:QIHISI (eq (match_operand:SI 1 "register_operand" " r, r") (const_int 0)) - (match_operand:SI 2 "register_operand" " r, 0") - (match_operand:SI 3 "register_operand" " 0, r")))] + (match_operand:QIHISI 2 "register_operand" " r, 0") + (match_operand:QIHISI 3 "register_operand" " 0, r")))] "TARGET_CMOV" "@ cmovz\t%0, %2, %1 cmovn\t%0, %3, %1" - [(set_attr "type" "move") + [(set_attr "type" "alu") (set_attr "length" "4")]) -(define_insn "cmovn" - [(set (match_operand:SI 0 "register_operand" "=r, r") - (if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r") +(define_insn "cmovn<mode>" + [(set (match_operand:QIHISI 0 "register_operand" "=r, r") + (if_then_else:QIHISI (ne (match_operand:SI 1 "register_operand" " r, r") (const_int 0)) - (match_operand:SI 2 "register_operand" " r, 0") - (match_operand:SI 3 "register_operand" " 0, r")))] + (match_operand:QIHISI 2 "register_operand" " r, 0") + (match_operand:QIHISI 3 "register_operand" " 0, r")))] "TARGET_CMOV" "@ cmovn\t%0, %2, %1 cmovz\t%0, %3, %1" - [(set_attr "type" "move") + [(set_attr "type" "alu") (set_attr "length" "4")]) +;; A hotfix to help RTL combiner to merge a cmovn insn and a zero_extend insn. +;; It should be removed once after we change the expansion form of the cmovn. +(define_insn "*cmovn_simplified_<mode>" + [(set (match_operand:QIHISI 0 "register_operand" "=r") + (if_then_else:QIHISI (match_operand:SI 1 "register_operand" "r") + (match_operand:QIHISI 2 "register_operand" "r") + (match_operand:QIHISI 3 "register_operand" "0")))] + "" + "cmovn\t%0, %2, %1" + [(set_attr "type" "alu")]) ;; ---------------------------------------------------------------------------- ;; Conditional Branch patterns @@ -950,573 +1144,188 @@ (pc)))] "" { - rtx tmp_reg; - enum rtx_code code; - - code = GET_CODE (operands[0]); - - /* If operands[2] is (const_int 0), - we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions. - So we have gcc generate original template rtx. */ - if (GET_CODE (operands[2]) == CONST_INT) - if (INTVAL (operands[2]) == 0) - if ((code != GTU) - && (code != GEU) - && (code != LTU) - && (code != LEU)) - goto create_template; - - /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than) - behavior for the comparison, we might need to generate other - rtx patterns to achieve same semantic. */ - switch (code) + enum nds32_expand_result_type result = nds32_expand_cbranch (operands); + switch (result) { - case GT: - case GTU: - if (GET_CODE (operands[2]) == CONST_INT) - { - /* GT reg_A, const_int => !(LT reg_A, const_int + 1) */ - tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); - - /* We want to plus 1 into the integer value - of operands[2] to create 'slt' instruction. - This caculation is performed on the host machine, - which may be 64-bit integer. - So the meaning of caculation result may be - different from the 32-bit nds32 target. - - For example: - 0x7fffffff + 0x1 -> 0x80000000, - this value is POSITIVE on 64-bit machine, - but the expected value on 32-bit nds32 target - should be NEGATIVE value. - - Hence, instead of using GEN_INT(), we use gen_int_mode() to - explicitly create SImode constant rtx. */ - operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode); - - if (code == GT) - { - /* GT, use slts instruction */ - emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2])); - } - else - { - /* GTU, use slt instruction */ - emit_insn (gen_slt_compare (tmp_reg, operands[1], operands[2])); - } - - PUT_CODE (operands[0], EQ); - operands[1] = tmp_reg; - operands[2] = const0_rtx; - emit_insn (gen_cbranchsi4 (operands[0], operands[1], - operands[2], operands[3])); - - DONE; - } - else - { - /* GT reg_A, reg_B => LT reg_B, reg_A */ - tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); - - if (code == GT) - { - /* GT, use slts instruction */ - emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1])); - } - else - { - /* GTU, use slt instruction */ - emit_insn (gen_slt_compare (tmp_reg, operands[2], operands[1])); - } - - PUT_CODE (operands[0], NE); - operands[1] = tmp_reg; - operands[2] = const0_rtx; - emit_insn (gen_cbranchsi4 (operands[0], operands[1], - operands[2], operands[3])); - - DONE; - } - - case GE: - case GEU: - /* GE reg_A, reg_B => !(LT reg_A, reg_B) */ - /* GE reg_A, const_int => !(LT reg_A, const_int) */ - tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); - - if (code == GE) - { - /* GE, use slts instruction */ - emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2])); - } - else - { - /* GEU, use slt instruction */ - emit_insn (gen_slt_compare (tmp_reg, operands[1], operands[2])); - } - - PUT_CODE (operands[0], EQ); - operands[1] = tmp_reg; - operands[2] = const0_rtx; - emit_insn (gen_cbranchsi4 (operands[0], operands[1], - operands[2], operands[3])); - + case EXPAND_DONE: DONE; - - case LT: - case LTU: - /* LT reg_A, reg_B => LT reg_A, reg_B */ - /* LT reg_A, const_int => LT reg_A, const_int */ - tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); - - if (code == LT) - { - /* LT, use slts instruction */ - emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2])); - } - else - { - /* LTU, use slt instruction */ - emit_insn (gen_slt_compare (tmp_reg, operands[1], operands[2])); - } - - PUT_CODE (operands[0], NE); - operands[1] = tmp_reg; - operands[2] = const0_rtx; - emit_insn (gen_cbranchsi4 (operands[0], operands[1], - operands[2], operands[3])); - - DONE; - - case LE: - case LEU: - if (GET_CODE (operands[2]) == CONST_INT) - { - /* LE reg_A, const_int => LT reg_A, const_int + 1 */ - tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); - - /* Note that (le:SI X INT_MAX) is not the same as (lt:SI X INT_MIN). - We better have an assert here in case GCC does not properly - optimize it away. The INT_MAX here is 0x7fffffff for target. */ - gcc_assert (code != LE || INTVAL (operands[2]) != 0x7fffffff); - operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode); - - if (code == LE) - { - /* LE, use slts instruction */ - emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2])); - } - else - { - /* LEU, use slt instruction */ - emit_insn (gen_slt_compare (tmp_reg, operands[1], operands[2])); - } - - PUT_CODE (operands[0], NE); - operands[1] = tmp_reg; - operands[2] = const0_rtx; - emit_insn (gen_cbranchsi4 (operands[0], operands[1], - operands[2], operands[3])); - - DONE; - } - else - { - /* LE reg_A, reg_B => !(LT reg_B, reg_A) */ - tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); - - if (code == LE) - { - /* LE, use slts instruction */ - emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1])); - } - else - { - /* LEU, use slt instruction */ - emit_insn (gen_slt_compare (tmp_reg, operands[2], operands[1])); - } - - PUT_CODE (operands[0], EQ); - operands[1] = tmp_reg; - operands[2] = const0_rtx; - emit_insn (gen_cbranchsi4 (operands[0], operands[1], - operands[2], operands[3])); - - DONE; - } - - case EQ: - case NE: - /* NDS32 ISA has various form for eq/ne behavior no matter - what kind of the operand is. - So just generate original template rtx. */ - goto create_template; - + break; + case EXPAND_FAIL: + FAIL; + break; + case EXPAND_CREATE_TEMPLATE: + break; default: - FAIL; + gcc_unreachable (); } - -create_template: - do {} while(0); /* dummy line */ }) -(define_insn "*cbranchsi4_equality_zero" +(define_insn "cbranchsi4_equality_zero" [(set (pc) (if_then_else (match_operator 0 "nds32_equality_comparison_operator" - [(match_operand:SI 1 "register_operand" "t, l, r") + [(match_operand:SI 1 "register_operand" "t,l, r") (const_int 0)]) (label_ref (match_operand 2 "" "")) (pc)))] "" { - enum rtx_code code; - - code = GET_CODE (operands[0]); - - /* This zero-comparison conditional branch has two forms: - 32-bit instruction => beqz/bnez imm16s << 1 - 16-bit instruction => beqzs8/bnezs8/beqz38/bnez38 imm8s << 1 - - For 32-bit case, - we assume it is always reachable. (but check range -65500 ~ 65500) - - For 16-bit case, - it must satisfy { 255 >= (label - pc) >= -256 } condition. - However, since the $pc for nds32 is at the beginning of the instruction, - we should leave some length space for current insn. - So we use range -250 ~ 250. */ - - switch (get_attr_length (insn)) - { - case 2: - if (which_alternative == 0) - { - /* constraint: t */ - return (code == EQ) ? "beqzs8\t%2" : "bnezs8\t%2"; - } - else if (which_alternative == 1) - { - /* constraint: l */ - return (code == EQ) ? "beqz38\t%1, %2" : "bnez38\t%1, %2"; - } - else - { - /* constraint: r */ - /* For which_alternative==2, it should not be here. */ - gcc_unreachable (); - } - case 4: - /* including constraints: t, l, and r */ - return (code == EQ) ? "beqz\t%1, %2" : "bnez\t%1, %2"; - case 6: - if (which_alternative == 0) - { - /* constraint: t */ - if (code == EQ) - { - /* beqzs8 .L0 - => - bnezs8 .LCB0 - j .L0 - .LCB0: - */ - return "bnezs8\t.LCB%=\;j\t%2\n.LCB%=:"; - } - else - { - /* bnezs8 .L0 - => - beqzs8 .LCB0 - j .L0 - .LCB0: - */ - return "beqzs8\t.LCB%=\;j\t%2\n.LCB%=:"; - } - } - else if (which_alternative == 1) - { - /* constraint: l */ - if (code == EQ) - { - /* beqz38 $r0, .L0 - => - bnez38 $r0, .LCB0 - j .L0 - .LCB0: - */ - return "bnez38\t%1, .LCB%=\;j\t%2\n.LCB%=:"; - } - else - { - /* bnez38 $r0, .L0 - => - beqz38 $r0, .LCB0 - j .L0 - .LCB0: - */ - return "beqz38\t%1, .LCB%=\;j\t%2\n.LCB%=:"; - } - } - else - { - /* constraint: r */ - /* For which_alternative==2, it should not be here. */ - gcc_unreachable (); - } - case 8: - /* constraint: t, l, r. */ - if (code == EQ) - { - /* beqz $r8, .L0 - => - bnez $r8, .LCB0 - j .L0 - .LCB0: - */ - return "bnez\t%1, .LCB%=\;j\t%2\n.LCB%=:"; - } - else - { - /* bnez $r8, .L0 - => - beqz $r8, .LCB0 - j .L0 - .LCB0: - */ - return "beqz\t%1, .LCB%=\;j\t%2\n.LCB%=:"; - } - default: - gcc_unreachable (); - } + return nds32_output_cbranchsi4_equality_zero (insn, operands); } [(set_attr "type" "branch") - (set_attr "enabled" "1") + (set_attr_alternative "enabled" + [ + ;; Alternative 0 + (if_then_else (match_test "TARGET_16_BIT") + (const_string "yes") + (const_string "no")) + ;; Alternative 1 + (if_then_else (match_test "TARGET_16_BIT") + (const_string "yes") + (const_string "no")) + ;; Alternative 2 + (const_string "yes") + ]) (set_attr_alternative "length" [ ;; Alternative 0 - (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250)) - (le (minus (match_dup 2) (pc)) (const_int 250))) - (if_then_else (match_test "TARGET_16_BIT") - (const_int 2) - (const_int 4)) + (if_then_else (match_test "!CROSSING_JUMP_P (insn)") + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250)) + (le (minus (match_dup 2) (pc)) (const_int 250))) + (if_then_else (match_test "TARGET_16_BIT") + (const_int 2) + (const_int 4)) + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500)) + (le (minus (match_dup 2) (pc)) (const_int 65500))) + (const_int 4) + (if_then_else (match_test "TARGET_16_BIT") + (const_int 8) + (const_int 10)))) + (const_int 10)) + ;; Alternative 1 + (if_then_else (match_test "!CROSSING_JUMP_P (insn)") + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250)) + (le (minus (match_dup 2) (pc)) (const_int 250))) + (if_then_else (match_test "TARGET_16_BIT") + (const_int 2) + (const_int 4)) + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500)) + (le (minus (match_dup 2) (pc)) (const_int 65500))) + (const_int 4) + (if_then_else (match_test "TARGET_16_BIT") + (const_int 8) + (const_int 10)))) + (const_int 10)) + ;; Alternative 2 + (if_then_else (match_test "!CROSSING_JUMP_P (insn)") (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500)) (le (minus (match_dup 2) (pc)) (const_int 65500))) (const_int 4) - (if_then_else (match_test "TARGET_16_BIT") - (const_int 6) - (const_int 8)))) - ;; Alternative 1 - (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250)) - (le (minus (match_dup 2) (pc)) (const_int 250))) - (if_then_else (match_test "TARGET_16_BIT") - (const_int 2) - (const_int 4)) - (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500)) - (le (minus (match_dup 2) (pc)) (const_int 65500))) - (const_int 4) - (if_then_else (match_test "TARGET_16_BIT") - (const_int 6) - (const_int 8)))) - ;; Alternative 2 - (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500)) - (le (minus (match_dup 2) (pc)) (const_int 65500))) - (const_int 4) - (const_int 8)) + (const_int 10)) + (const_int 10)) ])]) ;; This pattern is dedicated to V2 ISA, ;; because V2 DOES NOT HAVE beqc/bnec instruction. -(define_insn "*cbranchsi4_equality_reg" +(define_insn "cbranchsi4_equality_reg" [(set (pc) (if_then_else (match_operator 0 "nds32_equality_comparison_operator" - [(match_operand:SI 1 "register_operand" "r") - (match_operand:SI 2 "nds32_reg_constant_operand" "r")]) + [(match_operand:SI 1 "register_operand" "v, r") + (match_operand:SI 2 "register_operand" "l, r")]) (label_ref (match_operand 3 "" "")) (pc)))] "TARGET_ISA_V2" { - enum rtx_code code; - - code = GET_CODE (operands[0]); - - /* This register-comparison conditional branch has one form: - 32-bit instruction => beq/bne imm14s << 1 - - For 32-bit case, - we assume it is always reachable. (but check range -16350 ~ 16350). */ - - switch (code) - { - case EQ: - /* r, r */ - switch (get_attr_length (insn)) - { - case 4: - return "beq\t%1, %2, %3"; - case 8: - /* beq $r0, $r1, .L0 - => - bne $r0, $r1, .LCB0 - j .L0 - .LCB0: - */ - return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:"; - default: - gcc_unreachable (); - } - - case NE: - /* r, r */ - switch (get_attr_length (insn)) - { - case 4: - return "bne\t%1, %2, %3"; - case 8: - /* bne $r0, $r1, .L0 - => - beq $r0, $r1, .LCB0 - j .L0 - .LCB0: - */ - return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:"; - default: - gcc_unreachable (); - } - - default: - gcc_unreachable (); - } + return nds32_output_cbranchsi4_equality_reg (insn, operands); } [(set_attr "type" "branch") - (set (attr "length") - (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350)) - (le (minus (match_dup 3) (pc)) (const_int 16350))) - (const_int 4) - (const_int 8)))]) + (set_attr_alternative "enabled" + [ + ;; Alternative 0 + (if_then_else (match_test "TARGET_16_BIT") + (const_string "yes") + (const_string "no")) + ;; Alternative 1 + (const_string "yes") + ]) + (set_attr_alternative "length" + [ + ;; Alternative 0 + (if_then_else (match_test "!CROSSING_JUMP_P (insn)") + (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250)) + (le (minus (match_dup 3) (pc)) (const_int 250))) + (const_int 2) + (if_then_else (and (ge (minus (match_dup 3) (pc)) + (const_int -16350)) + (le (minus (match_dup 3) (pc)) + (const_int 16350))) + (const_int 4) + (const_int 8))) + (const_int 8)) + ;; Alternative 1 + (if_then_else (match_test "!CROSSING_JUMP_P (insn)") + (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350)) + (le (minus (match_dup 3) (pc)) (const_int 16350))) + (const_int 4) + (const_int 10)) + (const_int 10)) + ])]) ;; This pattern is dedicated to V3/V3M, ;; because V3/V3M DO HAVE beqc/bnec instruction. -(define_insn "*cbranchsi4_equality_reg_or_const_int" +(define_insn "cbranchsi4_equality_reg_or_const_int" [(set (pc) (if_then_else (match_operator 0 "nds32_equality_comparison_operator" - [(match_operand:SI 1 "register_operand" "r, r") - (match_operand:SI 2 "nds32_reg_constant_operand" "r, Is11")]) + [(match_operand:SI 1 "register_operand" "v, r, r") + (match_operand:SI 2 "nds32_rimm11s_operand" "l, r, Is11")]) (label_ref (match_operand 3 "" "")) (pc)))] "TARGET_ISA_V3 || TARGET_ISA_V3M" { - enum rtx_code code; - - code = GET_CODE (operands[0]); - - /* This register-comparison conditional branch has one form: - 32-bit instruction => beq/bne imm14s << 1 - 32-bit instruction => beqc/bnec imm8s << 1 - - For 32-bit case, we assume it is always reachable. - (but check range -16350 ~ 16350 and -250 ~ 250). */ - - switch (code) - { - case EQ: - if (which_alternative == 0) - { - /* r, r */ - switch (get_attr_length (insn)) - { - case 4: - return "beq\t%1, %2, %3"; - case 8: - /* beq $r0, $r1, .L0 - => - bne $r0, $r1, .LCB0 - j .L0 - .LCB0: - */ - return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:"; - default: - gcc_unreachable (); - } - } - else - { - /* r, Is11 */ - switch (get_attr_length (insn)) - { - case 4: - return "beqc\t%1, %2, %3"; - case 8: - /* beqc $r0, constant, .L0 - => - bnec $r0, constant, .LCB0 - j .L0 - .LCB0: - */ - return "bnec\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:"; - default: - gcc_unreachable (); - } - } - case NE: - if (which_alternative == 0) - { - /* r, r */ - switch (get_attr_length (insn)) - { - case 4: - return "bne\t%1, %2, %3"; - case 8: - /* bne $r0, $r1, .L0 - => - beq $r0, $r1, .LCB0 - j .L0 - .LCB0: - */ - return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:"; - default: - gcc_unreachable (); - } - } - else - { - /* r, Is11 */ - switch (get_attr_length (insn)) - { - case 4: - return "bnec\t%1, %2, %3"; - case 8: - /* bnec $r0, constant, .L0 - => - beqc $r0, constant, .LCB0 - j .L0 - .LCB0: - */ - return "beqc\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:"; - default: - gcc_unreachable (); - } - } - default: - gcc_unreachable (); - } + return nds32_output_cbranchsi4_equality_reg_or_const_int (insn, operands); } [(set_attr "type" "branch") + (set_attr_alternative "enabled" + [ + ;; Alternative 0 + (if_then_else (match_test "TARGET_16_BIT") + (const_string "yes") + (const_string "no")) + ;; Alternative 1 + (const_string "yes") + ;; Alternative 2 + (const_string "yes") + ]) (set_attr_alternative "length" [ ;; Alternative 0 - (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350)) - (le (minus (match_dup 3) (pc)) (const_int 16350))) - (const_int 4) - (const_int 8)) + (if_then_else (match_test "!CROSSING_JUMP_P (insn)") + (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250)) + (le (minus (match_dup 3) (pc)) (const_int 250))) + (const_int 2) + (if_then_else (and (ge (minus (match_dup 3) (pc)) + (const_int -16350)) + (le (minus (match_dup 3) (pc)) + (const_int 16350))) + (const_int 4) + (const_int 8))) + (const_int 8)) ;; Alternative 1 - (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250)) - (le (minus (match_dup 3) (pc)) (const_int 250))) - (const_int 4) - (const_int 8)) + (if_then_else (match_test "!CROSSING_JUMP_P (insn)") + (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350)) + (le (minus (match_dup 3) (pc)) (const_int 16350))) + (const_int 4) + (const_int 10)) + (const_int 10)) + ;; Alternative 2 + (if_then_else (match_test "!CROSSING_JUMP_P (insn)") + (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250)) + (le (minus (match_dup 3) (pc)) (const_int 250))) + (const_int 4) + (const_int 10)) + (const_int 10)) ])]) @@ -1529,80 +1338,16 @@ (pc)))] "" { - enum rtx_code code; - - code = GET_CODE (operands[0]); - - /* This zero-greater-less-comparison conditional branch has one form: - 32-bit instruction => bgtz/bgez/bltz/blez imm16s << 1 - - For 32-bit case, we assume it is always reachable. - (but check range -65500 ~ 65500). */ - - if (get_attr_length (insn) == 8) - { - /* The branch target is too far to simply use one - bgtz/bgez/bltz/blez instruction. - We need to reverse condition and use 'j' to jump to the target. */ - switch (code) - { - case GT: - /* bgtz $r8, .L0 - => - blez $r8, .LCB0 - j .L0 - .LCB0: - */ - return "blez\t%1, .LCB%=\;j\t%2\n.LCB%=:"; - case GE: - /* bgez $r8, .L0 - => - bltz $r8, .LCB0 - j .L0 - .LCB0: - */ - return "bltz\t%1, .LCB%=\;j\t%2\n.LCB%=:"; - case LT: - /* bltz $r8, .L0 - => - bgez $r8, .LCB0 - j .L0 - .LCB0: - */ - return "bgez\t%1, .LCB%=\;j\t%2\n.LCB%=:"; - case LE: - /* blez $r8, .L0 - => - bgtz $r8, .LCB0 - j .L0 - .LCB0: - */ - return "bgtz\t%1, .LCB%=\;j\t%2\n.LCB%=:"; - default: - gcc_unreachable (); - } - } - - switch (code) - { - case GT: - return "bgtz\t%1, %2"; - case GE: - return "bgez\t%1, %2"; - case LT: - return "bltz\t%1, %2"; - case LE: - return "blez\t%1, %2"; - default: - gcc_unreachable (); - } + return nds32_output_cbranchsi4_greater_less_zero (insn, operands); } [(set_attr "type" "branch") (set (attr "length") - (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500)) - (le (minus (match_dup 2) (pc)) (const_int 65500))) - (const_int 4) - (const_int 8)))]) + (if_then_else (match_test "!CROSSING_JUMP_P (insn)") + (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500)) + (le (minus (match_dup 2) (pc)) (const_int 65500))) + (const_int 4) + (const_int 10)) + (const_int 10)))]) (define_expand "cstoresi4" @@ -1612,236 +1357,85 @@ (match_operand:SI 3 "nonmemory_operand" "")]))] "" { - rtx tmp_reg; - enum rtx_code code; - - code = GET_CODE (operands[1]); - - switch (code) + enum nds32_expand_result_type result = nds32_expand_cstore (operands); + switch (result) { - case EQ: - if (GET_CODE (operands[3]) == CONST_INT) - { - /* reg_R = (reg_A == const_int_B) - --> addi reg_C, reg_A, -const_int_B - slti reg_R, reg_C, const_int_1 */ - tmp_reg = gen_reg_rtx (SImode); - operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode); - /* If the integer value is not in the range of imm15s, - we need to force register first because our addsi3 pattern - only accept nds32_rimm15s_operand predicate. */ - if (!satisfies_constraint_Is15 (operands[3])) - operands[3] = force_reg (SImode, operands[3]); - emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3])); - emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx)); - - DONE; - } - else - { - /* reg_R = (reg_A == reg_B) - --> xor reg_C, reg_A, reg_B - slti reg_R, reg_C, const_int_1 */ - tmp_reg = gen_reg_rtx (SImode); - emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3])); - emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx)); - - DONE; - } - - case NE: - if (GET_CODE (operands[3]) == CONST_INT) - { - /* reg_R = (reg_A != const_int_B) - --> addi reg_C, reg_A, -const_int_B - slti reg_R, const_int_0, reg_C */ - tmp_reg = gen_reg_rtx (SImode); - operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode); - /* If the integer value is not in the range of imm15s, - we need to force register first because our addsi3 pattern - only accept nds32_rimm15s_operand predicate. */ - if (!satisfies_constraint_Is15 (operands[3])) - operands[3] = force_reg (SImode, operands[3]); - emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3])); - emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg)); - - DONE; - } - else - { - /* reg_R = (reg_A != reg_B) - --> xor reg_C, reg_A, reg_B - slti reg_R, const_int_0, reg_C */ - tmp_reg = gen_reg_rtx (SImode); - emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3])); - emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg)); - - DONE; - } - - case GT: - case GTU: - /* reg_R = (reg_A > reg_B) --> slt reg_R, reg_B, reg_A */ - /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */ - if (code == GT) - { - /* GT, use slts instruction */ - emit_insn (gen_slts_compare (operands[0], operands[3], operands[2])); - } - else - { - /* GTU, use slt instruction */ - emit_insn (gen_slt_compare (operands[0], operands[3], operands[2])); - } - + case EXPAND_DONE: DONE; - - case GE: - case GEU: - if (GET_CODE (operands[3]) == CONST_INT) - { - /* reg_R = (reg_A >= const_int_B) - --> movi reg_C, const_int_B - 1 - slt reg_R, reg_C, reg_A */ - tmp_reg = gen_reg_rtx (SImode); - - emit_insn (gen_movsi (tmp_reg, - gen_int_mode (INTVAL (operands[3]) - 1, - SImode))); - if (code == GE) - { - /* GE, use slts instruction */ - emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2])); - } - else - { - /* GEU, use slt instruction */ - emit_insn (gen_slt_compare (operands[0], tmp_reg, operands[2])); - } - - DONE; - } - else - { - /* reg_R = (reg_A >= reg_B) - --> slt reg_R, reg_A, reg_B - xori reg_R, reg_R, const_int_1 */ - if (code == GE) - { - /* GE, use slts instruction */ - emit_insn (gen_slts_compare (operands[0], - operands[2], operands[3])); - } - else - { - /* GEU, use slt instruction */ - emit_insn (gen_slt_compare (operands[0], - operands[2], operands[3])); - } - - /* perform 'not' behavior */ - emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx)); - - DONE; - } - - case LT: - case LTU: - /* reg_R = (reg_A < reg_B) --> slt reg_R, reg_A, reg_B */ - /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */ - if (code == LT) - { - /* LT, use slts instruction */ - emit_insn (gen_slts_compare (operands[0], operands[2], operands[3])); - } - else - { - /* LTU, use slt instruction */ - emit_insn (gen_slt_compare (operands[0], operands[2], operands[3])); - } - - DONE; - - case LE: - case LEU: - if (GET_CODE (operands[3]) == CONST_INT) - { - /* reg_R = (reg_A <= const_int_B) - --> movi reg_C, const_int_B + 1 - slt reg_R, reg_A, reg_C */ - tmp_reg = gen_reg_rtx (SImode); - - emit_insn (gen_movsi (tmp_reg, - gen_int_mode (INTVAL (operands[3]) + 1, - SImode))); - if (code == LE) - { - /* LE, use slts instruction */ - emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg)); - } - else - { - /* LEU, use slt instruction */ - emit_insn (gen_slt_compare (operands[0], operands[2], tmp_reg)); - } - - DONE; - } - else - { - /* reg_R = (reg_A <= reg_B) --> slt reg_R, reg_B, reg_A - xori reg_R, reg_R, const_int_1 */ - if (code == LE) - { - /* LE, use slts instruction */ - emit_insn (gen_slts_compare (operands[0], - operands[3], operands[2])); - } - else - { - /* LEU, use slt instruction */ - emit_insn (gen_slt_compare (operands[0], - operands[3], operands[2])); - } - - /* perform 'not' behavior */ - emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx)); - - DONE; - } - - + break; + case EXPAND_FAIL: + FAIL; + break; + case EXPAND_CREATE_TEMPLATE: + break; default: gcc_unreachable (); } }) -(define_insn "slts_compare" - [(set (match_operand:SI 0 "register_operand" "=t, t, r, r") - (lt:SI (match_operand:SI 1 "nonmemory_operand" " d, d, r, r") - (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))] +(define_expand "slts_compare" + [(set (match_operand:SI 0 "register_operand" "") + (lt:SI (match_operand:SI 1 "general_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "" +{ + if (!REG_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + + if (!REG_P (operands[2]) && !satisfies_constraint_Is15 (operands[2])) + operands[2] = force_reg (SImode, operands[2]); +}) + +(define_insn "slts_compare_impl" + [(set (match_operand:SI 0 "register_operand" "=t, t, r, r") + (lt:SI (match_operand:SI 1 "register_operand" " d, d, r, r") + (match_operand:SI 2 "nds32_rimm15s_operand" " r,Iu05, r, Is15")))] "" "@ slts45\t%1, %2 sltsi45\t%1, %2 slts\t%0, %1, %2 sltsi\t%0, %1, %2" - [(set_attr "type" "compare,compare,compare,compare") - (set_attr "length" " 2, 2, 4, 4")]) + [(set_attr "type" "alu, alu, alu, alu") + (set_attr "length" " 2, 2, 4, 4")]) + +(define_insn "slt_eq0" + [(set (match_operand:SI 0 "register_operand" "=t, r") + (eq:SI (match_operand:SI 1 "register_operand" " d, r") + (const_int 0)))] + "" + "@ + slti45\t%1, 1 + slti\t%0, %1, 1" + [(set_attr "type" "alu, alu") + (set_attr "length" " 2, 4")]) -(define_insn "slt_compare" - [(set (match_operand:SI 0 "register_operand" "=t, t, r, r") - (ltu:SI (match_operand:SI 1 "nonmemory_operand" " d, d, r, r") - (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))] +(define_expand "slt_compare" + [(set (match_operand:SI 0 "register_operand" "") + (ltu:SI (match_operand:SI 1 "general_operand" "") + (match_operand:SI 2 "general_operand" "")))] + "" +{ + if (!REG_P (operands[1])) + operands[1] = force_reg (SImode, operands[1]); + + if (!REG_P (operands[2]) && !satisfies_constraint_Is15 (operands[2])) + operands[2] = force_reg (SImode, operands[2]); +}) + +(define_insn "slt_compare_impl" + [(set (match_operand:SI 0 "register_operand" "=t, t, r, r") + (ltu:SI (match_operand:SI 1 "register_operand" " d, d, r, r") + (match_operand:SI 2 "nds32_rimm15s_operand" " r, Iu05, r, Is15")))] "" "@ slt45\t%1, %2 slti45\t%1, %2 slt\t%0, %1, %2 slti\t%0, %1, %2" - [(set_attr "type" "compare,compare,compare,compare") - (set_attr "length" " 2, 2, 4, 4")]) + [(set_attr "type" "alu, alu, alu, alu") + (set_attr "length" " 2, 2, 4, 4")]) ;; ---------------------------------------------------------------------------- @@ -1874,12 +1468,14 @@ } } [(set_attr "type" "branch") - (set_attr "enabled" "1") + (set_attr "enabled" "yes") (set (attr "length") - (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250)) - (le (minus (match_dup 0) (pc)) (const_int 250))) - (if_then_else (match_test "TARGET_16_BIT") - (const_int 2) + (if_then_else (match_test "!CROSSING_JUMP_P (insn)") + (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250)) + (le (minus (match_dup 0) (pc)) (const_int 250))) + (if_then_else (match_test "TARGET_16_BIT") + (const_int 2) + (const_int 4)) (const_int 4)) (const_int 4)))]) @@ -1887,8 +1483,8 @@ [(set (pc) (match_operand:SI 0 "register_operand" "r, r"))] "" "@ - jr5\t%0 - jr\t%0" + jr5\t%0 + jr\t%0" [(set_attr "type" "branch,branch") (set_attr "length" " 2, 4")]) @@ -1904,39 +1500,78 @@ (clobber (reg:SI LP_REGNUM)) (clobber (reg:SI TA_REGNUM))])] "" - "" + { + rtx insn; + rtx sym = XEXP (operands[0], 0); + + if (TARGET_ICT_MODEL_LARGE + && nds32_indirect_call_referenced_p (sym)) + { + rtx reg = gen_reg_rtx (Pmode); + emit_move_insn (reg, sym); + operands[0] = gen_const_mem (Pmode, reg); + } + + if (flag_pic) + { + insn = emit_call_insn (gen_call_internal + (XEXP (operands[0], 0), GEN_INT (0))); + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); + DONE; + } + } ) -(define_insn "*call_register" - [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r, r")) - (match_operand 1)) - (clobber (reg:SI LP_REGNUM)) - (clobber (reg:SI TA_REGNUM))])] - "" - "@ - jral5\t%0 - jral\t%0" - [(set_attr "type" "branch,branch") - (set_attr "length" " 2, 4")]) - -(define_insn "*call_immediate" - [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i")) +(define_insn "call_internal" + [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, S")) (match_operand 1)) (clobber (reg:SI LP_REGNUM)) (clobber (reg:SI TA_REGNUM))])] "" { - if (TARGET_CMODEL_LARGE) - return "bal\t%0"; - else - return "jal\t%0"; + rtx_insn *next_insn = next_active_insn (insn); + bool align_p = (!(next_insn && get_attr_length (next_insn) == 2)) + && NDS32_ALIGN_P (); + switch (which_alternative) + { + case 0: + if (TARGET_16_BIT) + { + if (align_p) + return "jral5\t%0\;.align 2"; + else + return "jral5\t%0"; + } + else + { + if (align_p) + return "jral\t%0\;.align 2"; + else + return "jral\t%0"; + } + case 1: + return nds32_output_call (insn, operands, operands[0], + "bal\t%0", "jal\t%0", align_p); + default: + gcc_unreachable (); + } } - [(set_attr "type" "branch") - (set (attr "length") - (if_then_else (match_test "TARGET_CMODEL_LARGE") - (const_int 12) - (const_int 4)))]) - + [(set_attr "enabled" "yes") + (set_attr "type" "branch") + (set_attr_alternative "length" + [ + ;; Alternative 0 + (if_then_else (match_test "TARGET_16_BIT") + (const_int 2) + (const_int 4)) + ;; Alternative 1 + (if_then_else (match_test "flag_pic") + (const_int 16) + (if_then_else (match_test "nds32_long_call_p (operands[0])") + (const_int 12) + (const_int 4))) + ])] +) ;; Subroutine call instruction returning a value. ;; operands[0]: It is the hard regiser in which the value is returned. @@ -1951,49 +1586,114 @@ (clobber (reg:SI LP_REGNUM)) (clobber (reg:SI TA_REGNUM))])] "" - "" + { + rtx insn; + rtx sym = XEXP (operands[1], 0); + + if (TARGET_ICT_MODEL_LARGE + && nds32_indirect_call_referenced_p (sym)) + { + rtx reg = gen_reg_rtx (Pmode); + emit_move_insn (reg, sym); + operands[1] = gen_const_mem (Pmode, reg); + } + + if (flag_pic) + { + insn = + emit_call_insn (gen_call_value_internal + (operands[0], XEXP (operands[1], 0), GEN_INT (0))); + use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); + DONE; + } + } ) -(define_insn "*call_value_register" +(define_insn "call_value_internal" [(parallel [(set (match_operand 0) - (call (mem (match_operand:SI 1 "register_operand" "r, r")) + (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, S")) (match_operand 2))) (clobber (reg:SI LP_REGNUM)) (clobber (reg:SI TA_REGNUM))])] "" - "@ - jral5\t%1 - jral\t%1" - [(set_attr "type" "branch,branch") - (set_attr "length" " 2, 4")]) +{ + rtx_insn *next_insn = next_active_insn (insn); + bool align_p = (!(next_insn && get_attr_length (next_insn) == 2)) + && NDS32_ALIGN_P (); + switch (which_alternative) + { + case 0: + if (TARGET_16_BIT) + { + if (align_p) + return "jral5\t%1\;.align 2"; + else + return "jral5\t%1"; + } + else + { + if (align_p) + return "jral\t%1\;.align 2"; + else + return "jral\t%1"; + } + case 1: + return nds32_output_call (insn, operands, operands[1], + "bal\t%1", "jal\t%1", align_p); + default: + gcc_unreachable (); + } +} + [(set_attr "enabled" "yes") + (set_attr "type" "branch") + (set_attr_alternative "length" + [ + ;; Alternative 0 + (if_then_else (match_test "TARGET_16_BIT") + (const_int 2) + (const_int 4)) + ;; Alternative 1 + (if_then_else (match_test "flag_pic") + (const_int 16) + (if_then_else (match_test "nds32_long_call_p (operands[1])") + (const_int 12) + (const_int 4))) + ])] +) -(define_insn "*call_value_immediate" - [(parallel [(set (match_operand 0) - (call (mem (match_operand:SI 1 "immediate_operand" "i")) - (match_operand 2))) - (clobber (reg:SI LP_REGNUM)) - (clobber (reg:SI TA_REGNUM))])] +;; Call subroutine returning any type. + +(define_expand "untyped_call" + [(parallel [(call (match_operand 0 "" "") + (const_int 0)) + (match_operand 1 "" "") + (match_operand 2 "" "")])] "" { - if (TARGET_CMODEL_LARGE) - return "bal\t%1"; - else - return "jal\t%1"; -} - [(set_attr "type" "branch") - (set (attr "length") - (if_then_else (match_test "TARGET_CMODEL_LARGE") - (const_int 12) - (const_int 4)))]) + int i; + + emit_call_insn (gen_call (operands[0], const0_rtx)); + for (i = 0; i < XVECLEN (operands[2], 0); i++) + { + rtx set = XVECEXP (operands[2], 0, i); + emit_move_insn (SET_DEST (set), SET_SRC (set)); + } + + /* The optimizer does not know that the call sets the function value + registers we stored in the result block. We avoid problems by + claiming that all hard registers are used and clobbered at this + point. */ + emit_insn (gen_blockage ()); + DONE; +}) ;; ---------------------------------------------------------------------------- ;; The sibcall patterns. ;; sibcall -;; sibcall_register -;; sibcall_immediate +;; sibcall_internal (define_expand "sibcall" [(parallel [(call (match_operand 0 "memory_operand" "") @@ -2001,41 +1701,60 @@ (clobber (reg:SI TA_REGNUM)) (return)])] "" - "" -) +{ + rtx sym = XEXP (operands[0], 0); -(define_insn "*sibcall_register" - [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r, r")) - (match_operand 1)) - (clobber (reg:SI TA_REGNUM)) - (return)])] - "" - "@ - jr5\t%0 - jr\t%0" - [(set_attr "type" "branch,branch") - (set_attr "length" " 2, 4")]) + if (TARGET_ICT_MODEL_LARGE + && nds32_indirect_call_referenced_p (sym)) + { + rtx reg = gen_reg_rtx (Pmode); + emit_move_insn (reg, sym); + operands[0] = gen_const_mem (Pmode, reg); + } +}) -(define_insn "*sibcall_immediate" - [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i")) +(define_insn "sibcall_internal" + [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, S")) (match_operand 1)) (clobber (reg:SI TA_REGNUM)) (return)])] "" { - if (TARGET_CMODEL_LARGE) - return "b\t%0"; - else - return "j\t%0"; + switch (which_alternative) + { + case 0: + if (TARGET_16_BIT) + return "jr5\t%0"; + else + return "jr\t%0"; + case 1: + if (nds32_long_call_p (operands[0])) + return "b\t%0"; + else + return "j\t%0"; + default: + gcc_unreachable (); + } } - [(set_attr "type" "branch") - (set (attr "length") - (if_then_else (match_test "TARGET_CMODEL_LARGE") - (const_int 12) - (const_int 4)))]) + [(set_attr "enabled" "yes") + (set_attr "type" "branch") + (set_attr_alternative "length" + [ + ;; Alternative 0 + (if_then_else (match_test "TARGET_16_BIT") + (const_int 2) + (const_int 4)) + ;; Alternative 1 + (if_then_else (match_test "flag_pic") + (const_int 16) + (if_then_else (match_test "nds32_long_call_p (operands[0])") + (const_int 12) + (const_int 4))) + ])] +) ;; sibcall_value -;; sibcall_value_register +;; sibcall_value_internal ;; sibcall_value_immediate (define_expand "sibcall_value" @@ -2045,41 +1764,58 @@ (clobber (reg:SI TA_REGNUM)) (return)])] "" - "" -) +{ + rtx sym = XEXP (operands[1], 0); -(define_insn "*sibcall_value_register" + if (TARGET_ICT_MODEL_LARGE + && nds32_indirect_call_referenced_p (sym)) + { + rtx reg = gen_reg_rtx (Pmode); + emit_move_insn (reg, sym); + operands[1] = gen_const_mem (Pmode, reg); + } +}) + +(define_insn "sibcall_value_internal" [(parallel [(set (match_operand 0) - (call (mem (match_operand:SI 1 "register_operand" "r, r")) - (match_operand 2))) - (clobber (reg:SI TA_REGNUM)) - (return)])] - "" - "@ - jr5\t%1 - jr\t%1" - [(set_attr "type" "branch,branch") - (set_attr "length" " 2, 4")]) - -(define_insn "*sibcall_value_immediate" - [(parallel [(set (match_operand 0) - (call (mem (match_operand:SI 1 "immediate_operand" "i")) + (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, S")) (match_operand 2))) (clobber (reg:SI TA_REGNUM)) (return)])] "" { - if (TARGET_CMODEL_LARGE) - return "b\t%1"; - else - return "j\t%1"; + switch (which_alternative) + { + case 0: + if (TARGET_16_BIT) + return "jr5\t%1"; + else + return "jr\t%1"; + case 1: + if (nds32_long_call_p (operands[1])) + return "b\t%1"; + else + return "j\t%1"; + default: + gcc_unreachable (); + } } - [(set_attr "type" "branch") - (set (attr "length") - (if_then_else (match_test "TARGET_CMODEL_LARGE") - (const_int 12) - (const_int 4)))]) - + [(set_attr "enabled" "yes") + (set_attr "type" "branch") + (set_attr_alternative "length" + [ + ;; Alternative 0 + (if_then_else (match_test "TARGET_16_BIT") + (const_int 2) + (const_int 4)) + ;; Alternative 1 + (if_then_else (match_test "flag_pic") + (const_int 16) + (if_then_else (match_test "nds32_long_call_p (operands[1])") + (const_int 12) + (const_int 4))) + ])] +) ;; ---------------------------------------------------------------------------- @@ -2089,26 +1825,41 @@ "" { /* Note that only under V3/V3M ISA, we could use v3push prologue. - In addition, we do not want to use v3push for isr function - and variadic function. */ - if (TARGET_V3PUSH - && !nds32_isr_function_p (current_function_decl) - && (cfun->machine->va_args_size == 0)) + In addition, we need to check if v3push is indeed available. */ + if (NDS32_V3PUSH_AVAILABLE_P) nds32_expand_prologue_v3push (); else nds32_expand_prologue (); + + /* If cfun->machine->fp_as_gp_p is true, we can generate special + directive to guide linker doing fp-as-gp optimization. + However, for a naked function, which means + it should not have prologue/epilogue, + using fp-as-gp still requires saving $fp by push/pop behavior and + there is no benefit to use fp-as-gp on such small function. + So we need to make sure this function is NOT naked as well. */ + if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p) + emit_insn (gen_omit_fp_begin (gen_rtx_REG (SImode, FP_REGNUM))); + DONE; }) (define_expand "epilogue" [(const_int 0)] "" { + /* If cfun->machine->fp_as_gp_p is true, we can generate special + directive to guide linker doing fp-as-gp optimization. + However, for a naked function, which means + it should not have prologue/epilogue, + using fp-as-gp still requires saving $fp by push/pop behavior and + there is no benefit to use fp-as-gp on such small function. + So we need to make sure this function is NOT naked as well. */ + if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p) + emit_insn (gen_omit_fp_end (gen_rtx_REG (SImode, FP_REGNUM))); + /* Note that only under V3/V3M ISA, we could use v3pop epilogue. - In addition, we do not want to use v3pop for isr function - and variadic function. */ - if (TARGET_V3PUSH - && !nds32_isr_function_p (current_function_decl) - && (cfun->machine->va_args_size == 0)) + In addition, we need to check if v3push is indeed available. */ + if (NDS32_V3PUSH_AVAILABLE_P) nds32_expand_epilogue_v3pop (false); else nds32_expand_epilogue (false); @@ -2121,10 +1872,7 @@ /* Pass true to indicate that this is sibcall epilogue and exit from a function without the final branch back to the calling function. */ - if (TARGET_V3PUSH && !nds32_isr_function_p (current_function_decl)) - nds32_expand_epilogue_v3pop (true); - else - nds32_expand_epilogue (true); + nds32_expand_epilogue (true); DONE; }) @@ -2142,7 +1890,7 @@ return "nop"; } [(set_attr "type" "misc") - (set_attr "enabled" "1") + (set_attr "enabled" "yes") (set (attr "length") (if_then_else (match_test "TARGET_16_BIT") (const_int 2) @@ -2166,12 +1914,11 @@ { return nds32_output_stack_push (operands[0]); } - [(set_attr "type" "misc") - (set_attr "enabled" "1") + [(set_attr "type" "store_multiple") + (set_attr "combo" "12") + (set_attr "enabled" "yes") (set (attr "length") - (if_then_else (match_test "TARGET_V3PUSH - && !nds32_isr_function_p (cfun->decl) - && (cfun->machine->va_args_size == 0)") + (if_then_else (match_test "NDS32_V3PUSH_AVAILABLE_P") (const_int 2) (const_int 4)))]) @@ -2188,12 +1935,11 @@ { return nds32_output_stack_pop (operands[0]); } - [(set_attr "type" "misc") - (set_attr "enabled" "1") + [(set_attr "type" "load_multiple") + (set_attr "combo" "12") + (set_attr "enabled" "yes") (set (attr "length") - (if_then_else (match_test "TARGET_V3PUSH - && !nds32_isr_function_p (cfun->decl) - && (cfun->machine->va_args_size == 0)") + (if_then_else (match_test "NDS32_V3PUSH_AVAILABLE_P") (const_int 2) (const_int 4)))]) @@ -2205,34 +1951,64 @@ ;; Use this pattern to expand a return instruction ;; with simple_return rtx if no epilogue is required. (define_expand "return" + [(parallel [(return) + (clobber (reg:SI FP_REGNUM))])] + "nds32_can_use_return_insn ()" +{ + /* Emit as the simple return. */ + if (!cfun->machine->fp_as_gp_p + && cfun->machine->naked_p + && (cfun->machine->va_args_size == 0)) + { + emit_jump_insn (gen_return_internal ()); + DONE; + } +}) + +;; This pattern is expanded only by the shrink-wrapping optimization +;; on paths where the function prologue has not been executed. +;; However, such optimization may reorder the prologue/epilogue blocks +;; together with basic blocks within function body. +;; So we must disable this pattern if we have already decided +;; to perform fp_as_gp optimization, which requires prologue to be +;; first block and epilogue to be last block. +(define_expand "simple_return" [(simple_return)] - "nds32_can_use_return_insn ()" + "!cfun->machine->fp_as_gp_p" "" ) -;; This pattern is expanded only by the shrink-wrapping optimization -;; on paths where the function prologue has not been executed. -(define_expand "simple_return" - [(simple_return)] +(define_insn "*nds32_return" + [(parallel [(return) + (clobber (reg:SI FP_REGNUM))])] "" - "" -) +{ + return nds32_output_return (); +} + [(set_attr "type" "branch") + (set_attr "enabled" "yes") + (set_attr "length" "4")]) (define_insn "return_internal" [(simple_return)] "" { + if (nds32_isr_function_critical_p (current_function_decl)) + return "iret"; + if (TARGET_16_BIT) return "ret5"; else return "ret"; } [(set_attr "type" "branch") - (set_attr "enabled" "1") + (set_attr "enabled" "yes") (set (attr "length") - (if_then_else (match_test "TARGET_16_BIT") - (const_int 2) - (const_int 4)))]) + (if_then_else (match_test "nds32_isr_function_critical_p (current_function_decl)") + (const_int 4) + (if_then_else (match_test "TARGET_16_BIT") + (const_int 2) + (const_int 4))))]) ;; ---------------------------------------------------------------------------- @@ -2267,6 +2043,7 @@ { rtx add_tmp; rtx reg, test; + rtx tmp_reg; /* Step A: "k <-- (plus (operands[0]) (-operands[1]))". */ if (operands[1] != const0_rtx) @@ -2288,9 +2065,14 @@ emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2], operands[4])); - /* Step C, D, E, and F, using another temporary register. */ - rtx tmp = gen_reg_rtx (SImode); - emit_jump_insn (gen_casesi_internal (operands[0], operands[3], tmp)); + tmp_reg = gen_reg_rtx (SImode); + /* Step C, D, E, and F, using another temporary register tmp_reg. */ + if (flag_pic) + emit_use (pic_offset_table_rtx); + + emit_jump_insn (gen_casesi_internal (operands[0], + operands[3], + tmp_reg)); DONE; }) @@ -2326,17 +2108,34 @@ else return nds32_output_casesi (operands); } - [(set_attr "length" "20") - (set_attr "type" "alu")]) + [(set_attr "type" "branch") + (set (attr "length") + (if_then_else (match_test "flag_pic") + (const_int 28) + (const_int 20)))]) ;; ---------------------------------------------------------------------------- ;; Performance Extension +; If -fwrapv option is issued, GCC expects there will be +; signed overflow situation. So the ABS(INT_MIN) is still INT_MIN +; (e.g. ABS(0x80000000)=0x80000000). +; However, the hardware ABS instruction of nds32 target +; always performs saturation: abs 0x80000000 -> 0x7fffffff. +; So that we can only enable abssi2 pattern if flag_wrapv is NOT presented. +(define_insn "abssi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (abs:SI (match_operand:SI 1 "register_operand" " r")))] + "TARGET_EXT_PERF && TARGET_HW_ABS && !flag_wrapv" + "abs\t%0, %1" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + (define_insn "clzsi2" [(set (match_operand:SI 0 "register_operand" "=r") (clz:SI (match_operand:SI 1 "register_operand" " r")))] - "TARGET_PERF_EXT" + "TARGET_EXT_PERF" "clz\t%0, %1" [(set_attr "type" "alu") (set_attr "length" "4")]) @@ -2345,7 +2144,7 @@ [(set (match_operand:SI 0 "register_operand" "=r") (smax:SI (match_operand:SI 1 "register_operand" " r") (match_operand:SI 2 "register_operand" " r")))] - "TARGET_PERF_EXT" + "TARGET_EXT_PERF" "max\t%0, %1, %2" [(set_attr "type" "alu") (set_attr "length" "4")]) @@ -2354,25 +2153,66 @@ [(set (match_operand:SI 0 "register_operand" "=r") (smin:SI (match_operand:SI 1 "register_operand" " r") (match_operand:SI 2 "register_operand" " r")))] - "TARGET_PERF_EXT" + "TARGET_EXT_PERF" "min\t%0, %1, %2" [(set_attr "type" "alu") (set_attr "length" "4")]) -(define_insn "*btst" - [(set (match_operand:SI 0 "register_operand" "= r") - (zero_extract:SI (match_operand:SI 1 "register_operand" " r") +(define_insn "btst" + [(set (match_operand:SI 0 "register_operand" "= r") + (zero_extract:SI (match_operand:SI 1 "register_operand" " r") (const_int 1) - (match_operand:SI 2 "immediate_operand" " Iu05")))] - "TARGET_PERF_EXT" + (match_operand:SI 2 "nds32_imm5u_operand" " Iu05")))] + "TARGET_EXT_PERF" "btst\t%0, %1, %2" [(set_attr "type" "alu") (set_attr "length" "4")]) +(define_insn "ave" + [(set (match_operand:SI 0 "register_operand" "=r") + (truncate:SI + (ashiftrt:DI + (plus:DI + (plus:DI + (sign_extend:DI (match_operand:SI 1 "register_operand" "r")) + (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))) + (const_int 1)) + (const_int 1))))] + "TARGET_EXT_PERF" + "ave\t%0, %1, %2" + [(set_attr "type" "alu") + (set_attr "length" "4")]) + ;; ---------------------------------------------------------------------------- ;; Pseudo NOPs +(define_insn "relax_group" + [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP)] + "" + ".relax_hint %0" + [(set_attr "length" "0")] +) + +;; Output .omit_fp_begin for fp-as-gp optimization. +;; Also we have to set $fp register. +(define_insn "omit_fp_begin" + [(set (match_operand:SI 0 "register_operand" "=x") + (unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_OMIT_FP_BEGIN))] + "" + "! -----\;.omit_fp_begin\;la\t$fp,_FP_BASE_\;! -----" + [(set_attr "length" "8")] +) + +;; Output .omit_fp_end for fp-as-gp optimization. +;; Claim that we have to use $fp register. +(define_insn "omit_fp_end" + [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "x")] UNSPEC_VOLATILE_OMIT_FP_END)] + "" + "! -----\;.omit_fp_end\;! -----" + [(set_attr "length" "0")] +) + (define_insn "pop25return" [(return) (unspec_volatile:SI [(reg:SI LP_REGNUM)] UNSPEC_VOLATILE_POP25_RETURN)] @@ -2381,4 +2221,151 @@ [(set_attr "length" "0")] ) +;; Add pc +(define_insn "add_pc" + [(set (match_operand:SI 0 "register_operand" "=r") + (plus:SI (match_operand:SI 1 "register_operand" "0") + (pc)))] + "TARGET_LINUX_ABI || flag_pic" + "add5.pc\t%0" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + +(define_expand "bswapsi2" + [(set (match_operand:SI 0 "register_operand" "=r") + (bswap:SI (match_operand:SI 1 "register_operand" "r")))] + "" +{ + emit_insn (gen_unspec_wsbh (operands[0], operands[1])); + emit_insn (gen_rotrsi3 (operands[0], operands[0], GEN_INT (16))); + DONE; +}) + +(define_insn "bswaphi2" + [(set (match_operand:HI 0 "register_operand" "=r") + (bswap:HI (match_operand:HI 1 "register_operand" "r")))] + "" + "wsbh\t%0, %1" + [(set_attr "type" "alu") + (set_attr "length" "4")] +) + ;; ---------------------------------------------------------------------------- + +;; Patterns for exception handling + +(define_expand "eh_return" + [(use (match_operand 0 "general_operand"))] + "" +{ + emit_insn (gen_nds32_eh_return (operands[0])); + DONE; +}) + +(define_insn_and_split "nds32_eh_return" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_EH_RETURN)] + "" + "#" + "reload_completed" + [(const_int 0)] +{ + rtx place; + rtx addr; + + /* The operands[0] is the handler address. We need to assign it + to return address rtx so that we can jump to exception handler + when returning from current function. */ + + if (cfun->machine->lp_size == 0) + { + /* If $lp is not saved in the stack frame, we can take $lp directly. */ + place = gen_rtx_REG (SImode, LP_REGNUM); + } + else + { + /* Otherwise, we need to locate the stack slot of return address. + The return address is generally saved in [$fp-4] location. + However, DSE (dead store elimination) does not detect an alias + between [$fp-x] and [$sp+y]. This can result in a store to save + $lp introduced by builtin_eh_return() being incorrectly deleted + if it is based on $fp. The solution we take here is to compute + the offset relative to stack pointer and then use $sp to access + location so that the alias can be detected. + FIXME: What if the immediate value "offset" is too large to be + fit in a single addi instruction? */ + HOST_WIDE_INT offset; + + offset = (cfun->machine->fp_size + + cfun->machine->gp_size + + cfun->machine->lp_size + + cfun->machine->callee_saved_gpr_regs_size + + cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size + + cfun->machine->eh_return_data_regs_size + + cfun->machine->local_size + + cfun->machine->out_args_size); + + addr = plus_constant (Pmode, stack_pointer_rtx, offset - 4); + place = gen_frame_mem (SImode, addr); + } + + emit_move_insn (place, operands[0]); + DONE; +}) + +;; ---------------------------------------------------------------------------- + +;; Patterns for TLS. +;; The following two tls patterns don't be expanded directly because the +;; intermediate value may be spilled into the stack. As a result, it is +;; hard to analyze the define-use chain in the relax_opt pass. + + +;; There is a unspec operand to record RELAX_GROUP number because each +;; emitted instruction need a relax_hint above it. +(define_insn "tls_desc" + [(set (reg:SI 0) + (call (unspec_volatile:SI [(match_operand:SI 0 "nds32_symbolic_operand" "i")] UNSPEC_TLS_DESC) + (const_int 1))) + (use (unspec [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP)) + (use (reg:SI GP_REGNUM)) + (clobber (reg:SI LP_REGNUM)) + (clobber (reg:SI TA_REGNUM))] + "" + { + return nds32_output_tls_desc (operands); + } + [(set_attr "length" "20") + (set_attr "type" "branch")] +) + +;; There is a unspec operand to record RELAX_GROUP number because each +;; emitted instruction need a relax_hint above it. +(define_insn "tls_ie" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "nds32_symbolic_operand" "i")] UNSPEC_TLS_IE)) + (use (unspec [(match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP)) + (use (reg:SI GP_REGNUM))] + "" + { + return nds32_output_tls_ie (operands); + } + [(set (attr "length") (if_then_else (match_test "flag_pic") + (const_int 12) + (const_int 8))) + (set_attr "type" "misc")] +) + +;; The pattern is for some relaxation groups that have to keep addsi3 in 32-bit mode. +(define_insn "addsi3_32bit" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "%r") + (match_operand:SI 2 "register_operand" " r")] UNSPEC_ADD32))] + "" + "add\t%0, %1, %2"; + [(set_attr "type" "alu") + (set_attr "length" "4") + (set_attr "feature" "v1")]) + +;; ----------------------------------------------------------------------------