diff gcc/config/nds32/nds32.md @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
line wrap: on
line diff
--- a/gcc/config/nds32/nds32.md	Fri Oct 27 22:46:09 2017 +0900
+++ b/gcc/config/nds32/nds32.md	Thu Oct 25 07:37:49 2018 +0900
@@ -1,5 +1,5 @@
 ;; Machine description of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2017 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2018 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
@@ -46,58 +46,143 @@
 ;; Include DImode/DFmode operations.
 (include "nds32-doubleword.md")
 
+;; Include floating-point patterns.
+(include "nds32-fpu.md")
+
 ;; Include peephole patterns.
 (include "nds32-peephole2.md")
 
 
+;; ------------------------------------------------------------------------
+
+;; CPU pipeline model.
+(define_attr "pipeline_model" "n7,n8,e8,n9,n10,graywolf,n13,simple"
+  (const
+    (cond [(match_test "nds32_cpu_option == CPU_N7")  (const_string "n7")
+	   (match_test "nds32_cpu_option == CPU_E8")  (const_string "e8")
+	   (match_test "nds32_cpu_option == CPU_N6 || nds32_cpu_option == CPU_N8")  (const_string "n8")
+	   (match_test "nds32_cpu_option == CPU_N9")  (const_string "n9")
+	   (match_test "nds32_cpu_option == CPU_N10") (const_string "n10")
+	   (match_test "nds32_cpu_option == CPU_GRAYWOLF") (const_string "graywolf")
+	   (match_test "nds32_cpu_option == CPU_N12") (const_string "n13")
+	   (match_test "nds32_cpu_option == CPU_N13") (const_string "n13")
+	   (match_test "nds32_cpu_option == CPU_SIMPLE") (const_string "simple")]
+	  (const_string "n9"))))
+
 ;; Insn type, it is used to default other attribute values.
 (define_attr "type"
-  "unknown,move,load,store,alu,compare,branch,call,misc"
+  "unknown,load,store,load_multiple,store_multiple,alu,alu_shift,pbsad,pbsada,mul,mac,div,branch,mmu,misc,\
+   falu,fmuls,fmuld,fmacs,fmacd,fdivs,fdivd,fsqrts,fsqrtd,fcmp,fabs,fcpy,fcmov,fmfsr,fmfdr,fmtsr,fmtdr,fload,fstore,\
+   dalu,dalu64,daluround,dcmp,dclip,dmul,dmac,dinsb,dpack,dbpick,dwext"
   (const_string "unknown"))
 
+;; Insn sub-type
+(define_attr "subtype"
+  "simple,shift,saturation"
+  (const_string "simple"))
 
 ;; Length, in bytes, default is 4-bytes.
 (define_attr "length" "" (const_int 4))
 
+;; Indicate the amount of micro instructions.
+(define_attr "combo"
+  "0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25"
+  (const_string "1"))
+
+;; Insn in which feature set, it is used to enable/disable insn alternatives.
+;; v1  : Baseline Instructions
+;; v2  : Baseline Version 2 Instructions
+;; v3m : Baseline Version 3m Instructions
+;; v3  : Baseline Version 3 Instructions
+;; pe1 : Performance Extension Instructions
+;; pe2 : Performance Extension Version 2 Instructions
+;; se  : String Extension instructions
+(define_attr "feature"
+  "v1,v2,v3m,v3,pe1,pe2,se,fpu"
+  (const_string "v1"))
 
 ;; Enabled, which is used to enable/disable insn alternatives.
 ;; Note that we use length and TARGET_16_BIT here as criteria.
-;; If the instruction pattern already check TARGET_16_BIT to
-;; determine the length by itself, its enabled attribute should be
-;; always 1 to avoid the conflict with the settings here.
-(define_attr "enabled" ""
-  (cond [(and (eq_attr "length" "2")
-	      (match_test "!TARGET_16_BIT"))
-	 (const_int 0)]
-	(const_int 1)))
+;; If the instruction pattern already check TARGET_16_BIT to determine
+;; the length by itself, its enabled attribute should be customized to
+;; avoid the conflict between length attribute and this default setting.
+(define_attr "enabled" "no,yes"
+  (if_then_else
+    (and (eq_attr "length" "2")
+	 (match_test "!TARGET_16_BIT"))
+    (const_string "no")
+    (cond [(eq_attr "feature" "v1")   (const_string "yes")
+	   (eq_attr "feature" "v2")   (if_then_else (match_test "TARGET_ISA_V2 || TARGET_ISA_V3 || TARGET_ISA_V3M")
+						    (const_string "yes")
+						    (const_string "no"))
+	   (eq_attr "feature" "v3")   (if_then_else (match_test "TARGET_ISA_V3")
+						    (const_string "yes")
+						    (const_string "no"))
+	   (eq_attr "feature" "v3m")  (if_then_else (match_test "TARGET_ISA_V3 || TARGET_ISA_V3M")
+						    (const_string "yes")
+						    (const_string "no"))
+	   (eq_attr "feature" "pe1")  (if_then_else (match_test "TARGET_EXT_PERF")
+						    (const_string "yes")
+						    (const_string "no"))
+	   (eq_attr "feature" "pe2")  (if_then_else (match_test "TARGET_EXT_PERF2")
+						    (const_string "yes")
+						    (const_string "no"))
+	   (eq_attr "feature" "se")   (if_then_else (match_test "TARGET_EXT_STRING")
+						    (const_string "yes")
+						    (const_string "no"))
+	   (eq_attr "feature" "fpu")  (if_then_else (match_test "TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE")
+						    (const_string "yes")
+						    (const_string "no"))]
+	   (const_string "yes"))))
 
 
 ;; ----------------------------------------------------------------------------
 
+(include "nds32-dspext.md")
 
 ;; Move instructions.
 
 ;; For QImode and HImode, the immediate value can be fit in imm20s.
 ;; So there is no need to split rtx for QI and HI patterns.
 
-(define_expand "movqi"
-  [(set (match_operand:QI 0 "general_operand" "")
-	(match_operand:QI 1 "general_operand" ""))]
+(define_expand "mov<mode>"
+  [(set (match_operand:QIHI 0 "general_operand" "")
+	(match_operand:QIHI 1 "general_operand" ""))]
   ""
 {
   /* Need to force register if mem <- !reg.  */
   if (MEM_P (operands[0]) && !REG_P (operands[1]))
-    operands[1] = force_reg (QImode, operands[1]);
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+
+  if (MEM_P (operands[1]) && optimize > 0)
+    {
+      rtx reg = gen_reg_rtx (SImode);
+
+      emit_insn (gen_zero_extend<mode>si2 (reg, operands[1]));
+      operands[1] = gen_lowpart (<MODE>mode, reg);
+    }
 })
 
-(define_expand "movhi"
-  [(set (match_operand:HI 0 "general_operand" "")
-	(match_operand:HI 1 "general_operand" ""))]
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:SIDI 0 "general_operand" "")
+	(match_operand:SIDI 1 "general_operand" ""))]
   ""
 {
-  /* Need to force register if mem <- !reg.  */
+  rtx addr;
   if (MEM_P (operands[0]) && !REG_P (operands[1]))
-    operands[1] = force_reg (HImode, operands[1]);
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+
+  if (MEM_P (operands[0]))
+    {
+      addr = force_reg (Pmode, XEXP (operands[0], 0));
+      emit_insn (gen_unaligned_store<mode> (addr, operands[1]));
+    }
+  else
+    {
+      addr = force_reg (Pmode, XEXP (operands[1], 0));
+      emit_insn (gen_unaligned_load<mode> (operands[0], addr));
+    }
+  DONE;
 })
 
 (define_expand "movsi"
@@ -130,12 +215,34 @@
 						  low12_int));
       DONE;
     }
+
+  if ((REG_P (operands[0]) || GET_CODE (operands[0]) == SUBREG)
+       && SYMBOLIC_CONST_P (operands[1]))
+    {
+      if (TARGET_ICT_MODEL_LARGE
+	  && nds32_indirect_call_referenced_p (operands[1]))
+	{
+	  nds32_expand_ict_move (operands);
+	  DONE;
+	}
+      else if (nds32_tls_referenced_p (operands [1]))
+	{
+	  nds32_expand_tls_move (operands);
+	  DONE;
+	}
+      else if (flag_pic)
+	{
+	  nds32_expand_pic_move (operands);
+	  DONE;
+	}
+    }
 })
 
 (define_insn "*mov<mode>"
-  [(set (match_operand:QIHISI 0 "nonimmediate_operand" "=r, r, U45, U33, U37, U45, m,   l,   l,   l,   d, r,    d,    r,    r,    r")
-	(match_operand:QIHISI 1 "nds32_move_operand"   " r, r,   l,   l,   l,   d, r, U45, U33, U37, U45, m, Ip05, Is05, Is20, Ihig"))]
-  ""
+  [(set (match_operand:QIHISI 0 "nonimmediate_operand" "=r, r,U45,U33,U37,U45, m,  l,  l,  l,  d,  d, r,   d,    r,    r,    r, *f, *f,  r, *f,  Q")
+	(match_operand:QIHISI 1 "nds32_move_operand"   " r, r,  l,  l,  l,  d, r,U45,U33,U37,U45,Ufe, m,Ip05, Is05, Is20, Ihig, *f,  r, *f,  Q, *f"))]
+  "register_operand(operands[0], <MODE>mode)
+   || register_operand(operands[1], <MODE>mode)"
 {
   switch (which_alternative)
     {
@@ -154,37 +261,52 @@
     case 8:
     case 9:
     case 10:
+    case 11:
       return nds32_output_16bit_load (operands, <byte>);
-    case 11:
+    case 12:
       return nds32_output_32bit_load (operands, <byte>);
-    case 12:
-      return "movpi45\t%0, %1";
     case 13:
+      return "movpi45\t%0, %1";
+    case 14:
       return "movi55\t%0, %1";
-    case 14:
+    case 15:
       return "movi\t%0, %1";
-    case 15:
+    case 16:
       return "sethi\t%0, hi20(%1)";
+    case 17:
+      if (TARGET_FPU_SINGLE)
+	return "fcpyss\t%0, %1, %1";
+      else
+	return "#";
+    case 18:
+      return "fmtsr\t%1, %0";
+    case 19:
+      return "fmfsr\t%0, %1";
+    case 20:
+      return nds32_output_float_load (operands);
+    case 21:
+      return nds32_output_float_store (operands);
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "type"   "alu,alu,store,store,store,store,store,load,load,load,load,load,alu,alu,alu,alu")
-   (set_attr "length" "  2,  4,    2,    2,    2,    2,    4,   2,   2,   2,   2,   4,  2,  2,  4,  4")])
+  [(set_attr "type"    "alu,alu,store,store,store,store,store,load,load,load,load,load,load,alu,alu,alu,alu,fcpy,fmtsr,fmfsr,fload,fstore")
+   (set_attr "length"  "  2,  4,    2,    2,    2,    2,    4,   2,   2,   2,   2,   2,   4,  2,  2,  4,  4,   4,    4,    4,    4,     4")
+   (set_attr "feature" " v1, v1,   v1,   v1,   v1,   v1,   v1,  v1,  v1,  v1,  v1, v3m,  v1, v1, v1, v1, v1, fpu,  fpu,  fpu,  fpu,   fpu")])
 
 
 ;; We use nds32_symbolic_operand to limit that only CONST/SYMBOL_REF/LABEL_REF
 ;; are able to match such instruction template.
-(define_insn "*move_addr"
-  [(set (match_operand:SI 0 "register_operand"       "=l, r")
-	(match_operand:SI 1 "nds32_symbolic_operand" " i, i"))]
+(define_insn "move_addr"
+  [(set (match_operand:SI 0 "nds32_general_register_operand"   "=l, r")
+	(match_operand:SI 1 "nds32_nonunspec_symbolic_operand" " i, i"))]
   ""
   "la\t%0, %1"
-  [(set_attr "type" "move")
+  [(set_attr "type"  "alu")
    (set_attr "length"  "8")])
 
 
-(define_insn "*sethi"
+(define_insn "sethi"
   [(set (match_operand:SI 0 "register_operand"                "=r")
 	(high:SI (match_operand:SI 1 "nds32_symbolic_operand" " i")))]
   ""
@@ -193,7 +315,7 @@
    (set_attr "length" "4")])
 
 
-(define_insn "*lo_sum"
+(define_insn "lo_sum"
   [(set (match_operand:SI 0 "register_operand"                  "=r")
 	(lo_sum:SI (match_operand:SI 1 "register_operand"       " r")
 		   (match_operand:SI 2 "nds32_symbolic_operand" " i")))]
@@ -256,13 +378,58 @@
 
 
 ;; ----------------------------------------------------------------------------
+(define_expand "extv"
+  [(set (match_operand 0 "register_operand" "")
+        (sign_extract (match_operand 1 "nonimmediate_operand" "")
+                      (match_operand 2 "const_int_operand" "")
+                      (match_operand 3 "const_int_operand" "")))]
+  ""
+{
+  enum nds32_expand_result_type result = nds32_expand_extv (operands);
+  switch (result)
+    {
+    case EXPAND_DONE:
+      DONE;
+      break;
+    case EXPAND_FAIL:
+      FAIL;
+      break;
+    case EXPAND_CREATE_TEMPLATE:
+      break;
+    default:
+      gcc_unreachable ();
+    }
+})
+
+(define_expand "insv"
+  [(set (zero_extract (match_operand 0 "nonimmediate_operand" "")
+                      (match_operand 1 "const_int_operand" "")
+                      (match_operand 2 "const_int_operand" ""))
+        (match_operand 3 "register_operand" ""))]
+  ""
+{
+  enum nds32_expand_result_type result = nds32_expand_insv (operands);
+  switch (result)
+    {
+    case EXPAND_DONE:
+      DONE;
+      break;
+    case EXPAND_FAIL:
+      FAIL;
+      break;
+    case EXPAND_CREATE_TEMPLATE:
+      break;
+    default:
+      gcc_unreachable ();
+    }
+})
 
 ;; Arithmetic instructions.
 
-(define_insn "add<mode>3"
-  [(set (match_operand:QIHISI 0 "register_operand"                   "=   d,    l,    d,    l,  d, l,    k,    l,    r, r")
-	(plus:QIHISI (match_operand:QIHISI 1 "register_operand"      "%   0,    l,    0,    l,  0, l,    0,    k,    r, r")
-		     (match_operand:QIHISI 2 "nds32_rimm15s_operand" " In05, In03, Iu05, Iu03,  r, l, Is10, Iu06, Is15, r")))]
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand"               "=   d,   l,   d,   l, d, l,   k,   l,    r, r")
+	(plus:SI (match_operand:SI 1 "register_operand"      "%   0,   l,   0,   l, 0, l,   0,   k,    r, r")
+		 (match_operand:SI 2 "nds32_rimm15s_operand" " In05,In03,Iu05,Iu03, r, l,Is10,IU06, Is15, r")))]
   ""
 {
   switch (which_alternative)
@@ -298,19 +465,20 @@
       gcc_unreachable ();
     }
 }
-  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
-   (set_attr "length" "  2,  2,  2,  2,  2,  2,  2,  2,  4,  4")])
+  [(set_attr "type"    "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length"  "  2,  2,  2,  2,  2,  2,  2,  2,  4,  4")
+   (set_attr "feature" " v1, v1, v1, v1, v1, v1, v2, v1, v1, v1")])
 
-(define_insn "sub<mode>3"
-  [(set (match_operand:QIHISI 0 "register_operand"                    "=d, l,    r, r")
-	(minus:QIHISI (match_operand:QIHISI 1 "nds32_rimm15s_operand" " 0, l, Is15, r")
-		      (match_operand:QIHISI 2 "register_operand"      " r, l,    r, r")))]
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand"                "=d, l,    r, r")
+	(minus:SI (match_operand:SI 1 "nds32_rimm15s_operand" " 0, l, Is15, r")
+		  (match_operand:SI 2 "register_operand"      " r, l,    r, r")))]
   ""
   "@
-  sub45\t%0, %2
-  sub333\t%0, %1, %2
-  subri\t%0, %2, %1
-  sub\t%0, %1, %2"
+   sub45\t%0, %2
+   sub333\t%0, %1, %2
+   subri\t%0, %2, %1
+   sub\t%0, %1, %2"
   [(set_attr "type"   "alu,alu,alu,alu")
    (set_attr "length" "  2,  2,  4,  4")])
 
@@ -323,7 +491,7 @@
 	(plus:SI (mult:SI (match_operand:SI 1 "register_operand"  " r")
 			  (match_operand:SI 2 "immediate_operand" " i"))
 		 (match_operand:SI 3 "register_operand"           " r")))]
-  "TARGET_ISA_V3
+  "TARGET_ISA_V3 && optimize_size
    && (exact_log2 (INTVAL (operands[2])) != -1)
    && (exact_log2 (INTVAL (operands[2])) <= 31)"
 {
@@ -333,18 +501,20 @@
 
   return "add_slli\t%0, %3, %1, %2";
 }
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "combo"        "2")
+   (set_attr "length"       "4")])
 
 (define_insn "*add_srli"
-  [(set (match_operand:SI 0 "register_operand"                        "=   r")
-	(plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
-			      (match_operand:SI 2 "immediate_operand" " Iu05"))
-		 (match_operand:SI 3 "register_operand"               "    r")))]
-  "TARGET_ISA_V3"
+  [(set (match_operand:SI 0 "register_operand"                          "=   r")
+	(plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"    "    r")
+			      (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
+		 (match_operand:SI 3 "register_operand"                 "    r")))]
+  "TARGET_ISA_V3 && optimize_size"
   "add_srli\t%0, %3, %1, %2"
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "combo"        "2")
+   (set_attr "length"       "4")])
 
 
 ;; GCC intends to simplify (minus (reg) (ashift ...))
@@ -355,7 +525,7 @@
 	(minus:SI (match_operand:SI 1 "register_operand"           " r")
 		  (mult:SI (match_operand:SI 2 "register_operand"  " r")
 			   (match_operand:SI 3 "immediate_operand" " i"))))]
-  "TARGET_ISA_V3
+  "TARGET_ISA_V3 && optimize_size
    && (exact_log2 (INTVAL (operands[3])) != -1)
    && (exact_log2 (INTVAL (operands[3])) <= 31)"
 {
@@ -365,18 +535,20 @@
 
   return "sub_slli\t%0, %1, %2, %3";
 }
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "combo"        "2")
+   (set_attr "length"       "4")])
 
 (define_insn "*sub_srli"
-  [(set (match_operand:SI 0 "register_operand"                         "=   r")
-	(minus:SI (match_operand:SI 1 "register_operand"               "    r")
-		  (lshiftrt:SI (match_operand:SI 2 "register_operand"  "    r")
-			       (match_operand:SI 3 "immediate_operand" " Iu05"))))]
-  "TARGET_ISA_V3"
+  [(set (match_operand:SI 0 "register_operand"                           "=   r")
+	(minus:SI (match_operand:SI 1 "register_operand"                 "    r")
+		  (lshiftrt:SI (match_operand:SI 2 "register_operand"    "    r")
+			       (match_operand:SI 3 "nds32_imm5u_operand" " Iu05"))))]
+  "TARGET_ISA_V3 && optimize_size"
   "sub_srli\t%0, %1, %2, %3"
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "combo"        "2")
+   (set_attr "length"       "4")])
 
 
 ;; Multiplication instructions.
@@ -387,10 +559,11 @@
 		 (match_operand:SI 2 "register_operand" " w, r")))]
   ""
   "@
-  mul33\t%0, %2
-  mul\t%0, %1, %2"
-  [(set_attr "type"   "alu,alu")
-   (set_attr "length" "  2,  4")])
+   mul33\t%0, %2
+   mul\t%0, %1, %2"
+  [(set_attr "type"    "mul,mul")
+   (set_attr "length"  "  2,  4")
+   (set_attr "feature" "v3m, v1")])
 
 (define_insn "mulsidi3"
   [(set (match_operand:DI 0 "register_operand"                          "=r")
@@ -398,7 +571,7 @@
 		 (sign_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
   "TARGET_ISA_V2 || TARGET_ISA_V3"
   "mulsr64\t%0, %1, %2"
-  [(set_attr "type"   "alu")
+  [(set_attr "type"   "mul")
    (set_attr "length"   "4")])
 
 (define_insn "umulsidi3"
@@ -407,7 +580,7 @@
 		 (zero_extend:DI (match_operand:SI 2 "register_operand" " r"))))]
   "TARGET_ISA_V2 || TARGET_ISA_V3"
   "mulr64\t%0, %1, %2"
-  [(set_attr "type"   "alu")
+  [(set_attr "type"   "mul")
    (set_attr "length"   "4")])
 
 
@@ -420,7 +593,7 @@
 			  (match_operand:SI 2 "register_operand" " r"))))]
   ""
   "maddr32\t%0, %1, %2"
-  [(set_attr "type"   "alu")
+  [(set_attr "type"   "mac")
    (set_attr "length"   "4")])
 
 (define_insn "*maddr32_1"
@@ -430,7 +603,7 @@
 		 (match_operand:SI 3 "register_operand"          " 0")))]
   ""
   "maddr32\t%0, %1, %2"
-  [(set_attr "type"   "alu")
+  [(set_attr "type"   "mac")
    (set_attr "length"   "4")])
 
 (define_insn "*msubr32"
@@ -440,7 +613,7 @@
 			   (match_operand:SI 2 "register_operand" " r"))))]
   ""
   "msubr32\t%0, %1, %2"
-  [(set_attr "type"   "alu")
+  [(set_attr "type"   "mac")
    (set_attr "length"   "4")])
 
 
@@ -454,7 +627,7 @@
 	(mod:SI (match_dup 1) (match_dup 2)))]
   ""
   "divsr\t%0, %3, %1, %2"
-  [(set_attr "type"   "alu")
+  [(set_attr "type"   "div")
    (set_attr "length"   "4")])
 
 (define_insn "udivmodsi4"
@@ -465,9 +638,29 @@
 	(umod:SI (match_dup 1) (match_dup 2)))]
   ""
   "divr\t%0, %3, %1, %2"
-  [(set_attr "type"   "alu")
+  [(set_attr "type"   "div")
    (set_attr "length"   "4")])
 
+;; divsr/divr will keep quotient only when quotient and remainder is the same
+;; register in our ISA spec, it's can reduce 1 register presure if we don't
+;; want remainder.
+(define_insn "divsi4"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(div:SI (match_operand:SI 1 "register_operand" " r")
+		(match_operand:SI 2 "register_operand" " r")))]
+  ""
+  "divsr\t%0, %0, %1, %2"
+  [(set_attr "type"   "div")
+   (set_attr "length"   "4")])
+
+(define_insn "udivsi4"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(udiv:SI (match_operand:SI 1 "register_operand" " r")
+		 (match_operand:SI 2 "register_operand"  " r")))]
+  ""
+  "divr\t%0, %0, %1, %2"
+  [(set_attr "type"   "div")
+   (set_attr "length"   "4")])
 
 ;; ----------------------------------------------------------------------------
 
@@ -488,14 +681,28 @@
    (set_attr "length" "4")]
 )
 
-(define_insn "andsi3"
-  [(set (match_operand:SI 0 "register_operand"         "=w, r,    l,    l,    l,    l,    l,    l,    r,   r,     r,    r,    r")
-	(and:SI (match_operand:SI 1 "register_operand" "%0, r,    l,    l,    l,    l,    0,    0,    r,   r,     r,    r,    r")
-		(match_operand:SI 2 "general_operand"  " w, r, Izeb, Izeh, Ixls, Ix11, Ibms, Ifex, Izeb, Izeh, Iu15, Ii15, Ic15")))]
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(and:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "nds32_reg_constant_operand" "")))]
+  ""
+{
+  if (CONST_INT_P (operands[2])
+      && !nds32_and_operand (operands[2], SImode))
+    {
+      nds32_expand_constant (SImode, INTVAL (operands[2]),
+			     operands[0], operands[1]);
+      DONE;
+    }
+})
+
+(define_insn "*andsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=l, r,   l,   l,   l,   l,   l,   l,    r,   r,     r,    r,    r")
+	(and:SI (match_operand:SI 1 "register_operand"  "%0, r,   l,   l,   l,   l,   0,   0,    r,   r,     r,    r,    r")
+		(match_operand:SI 2 "nds32_and_operand" " l, r,Izeb,Izeh,Ixls,Ix11,Ibms,Ifex, Izeb, Izeh, Iu15, Ii15, Ic15")))]
   ""
 {
   HOST_WIDE_INT mask = INTVAL (operands[2]);
-  int zero_position;
 
   /* 16-bit andi instructions:
      andi Rt3,Ra3,0xff   -> zeb33  Rt3,Ra3
@@ -520,8 +727,7 @@
     case 5:
       return "x11b33\t%0, %1";
     case 6:
-      operands[2] = GEN_INT (floor_log2 (mask));
-      return "bmski33\t%0, %2";
+      return "bmski33\t%0, %B2";
     case 7:
       operands[2] = GEN_INT (floor_log2 (mask + 1) - 1);
       return "fexti33\t%0, %2";
@@ -535,47 +741,35 @@
       operands[2] = GEN_INT (~mask);
       return "bitci\t%0, %1, %2";
     case 12:
-      /* If we reach this alternative,
-         it must pass the nds32_can_use_bclr_p() test,
-         so that we can guarantee there is only one 0-bit
-         within the immediate value.  */
-      for (zero_position = 31; zero_position >= 0; zero_position--)
-	{
-	  if ((INTVAL (operands[2]) & (1 << zero_position)) == 0)
-	    {
-	      /* Found the 0-bit position.  */
-	      operands[2] = GEN_INT (zero_position);
-	      break;
-	    }
-	}
-      return "bclr\t%0, %1, %2";
+      return "bclr\t%0, %1, %b2";
 
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "type"   "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
-   (set_attr "length" "  2,  4,  2,  2,  2,  2,  2,  2,  4,  4,  4,  4,  4")])
+  [(set_attr "type"    "alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu,alu")
+   (set_attr "length"  "  2,  4,  2,  2,  2,  2,  2,  2,  4,  4,  4,  4,  4")
+   (set_attr "feature" "v3m, v1, v1, v1, v1, v1,v3m,v3m, v1, v1, v1, v3,pe1")])
 
 (define_insn "*and_slli"
-  [(set (match_operand:SI 0 "register_operand"                      "=   r")
-	(and:SI (ashift:SI (match_operand:SI 1 "register_operand"   "    r")
-			    (match_operand:SI 2 "immediate_operand" " Iu05"))
-		(match_operand:SI 3 "register_operand"              "    r")))]
-  "TARGET_ISA_V3"
+  [(set (match_operand:SI 0 "register_operand"                        "=   r")
+	(and:SI (ashift:SI (match_operand:SI 1 "register_operand"     "    r")
+			    (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"                "    r")))]
+  "TARGET_ISA_V3 && optimize_size"
   "and_slli\t%0, %3, %1, %2"
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "length"       "4")])
 
 (define_insn "*and_srli"
-  [(set (match_operand:SI 0 "register_operand"                       "=   r")
-	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
-			     (match_operand:SI 2 "immediate_operand" " Iu05"))
-		(match_operand:SI 3 "register_operand"               "    r")))]
-  "TARGET_ISA_V3"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r")
+	(and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"    "    r")
+			     (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"                 "    r")))]
+  "TARGET_ISA_V3 && optimize_size"
   "and_srli\t%0, %3, %1, %2"
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "length"       "4")])
 
 
 ;; ----------------------------------------------------------------------------
@@ -584,58 +778,50 @@
 
 ;; For V3/V3M ISA, we have 'or33' instruction.
 ;; So we can identify 'or Rt3,Rt3,Ra3' case and set its length to be 2.
-(define_insn "iorsi3"
-  [(set (match_operand:SI 0 "register_operand"         "=w, r,    r,    r")
-	(ior:SI (match_operand:SI 1 "register_operand" "%0, r,    r,    r")
-		(match_operand:SI 2 "general_operand"  " w, r, Iu15, Ie15")))]
+
+(define_expand "iorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(ior:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
   ""
 {
-  int one_position;
+  if (!nds32_ior_operand (operands[2], SImode))
+    operands[2] = force_reg (SImode, operands[2]);
+})
 
-  switch (which_alternative)
-    {
-    case 0:
-      return "or33\t%0, %2";
-    case 1:
-      return "or\t%0, %1, %2";
-    case 2:
-      return "ori\t%0, %1, %2";
-    case 3:
-      /* If we reach this alternative,
-         it must pass the nds32_can_use_bset_p() test,
-         so that we can guarantee there is only one 1-bit
-         within the immediate value.  */
-      /* Use exact_log2() to search the 1-bit position.  */
-      one_position = exact_log2 (INTVAL (operands[2]));
-      operands[2] = GEN_INT (one_position);
-      return "bset\t%0, %1, %2";
-
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set_attr "type"   "alu,alu,alu,alu")
-   (set_attr "length" "  2,  4,  4,  4")])
+(define_insn "*iorsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=l, r,    r,    r")
+	(ior:SI (match_operand:SI 1 "register_operand"  "%0, r,    r,    r")
+		(match_operand:SI 2 "nds32_ior_operand" " l, r, Iu15, Ie15")))]
+  ""
+  "@
+   or33\t%0, %2
+   or\t%0, %1, %2
+   ori\t%0, %1, %2
+   bset\t%0, %1, %B2"
+  [(set_attr "type"    "alu,alu,alu,alu")
+   (set_attr "length"  "  2,  4,  4,  4")
+   (set_attr "feature" "v3m, v1, v1,pe1")])
 
 (define_insn "*or_slli"
-  [(set (match_operand:SI 0 "register_operand"                     "=   r")
-	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
-			   (match_operand:SI 2 "immediate_operand" " Iu05"))
-		(match_operand:SI 3 "register_operand"             "    r")))]
-  "TARGET_ISA_V3"
+  [(set (match_operand:SI 0 "register_operand"                       "=   r")
+	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand"    "    r")
+			   (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"               "    r")))]
+  "TARGET_ISA_V3 && optimize_size"
   "or_slli\t%0, %3, %1, %2"
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "length"       "4")])
 
 (define_insn "*or_srli"
-  [(set (match_operand:SI 0 "register_operand"                       "=   r")
-	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
-			     (match_operand:SI 2 "immediate_operand" " Iu05"))
-		(match_operand:SI 3 "register_operand"               "    r")))]
-  "TARGET_ISA_V3"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r")
+	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"    "    r")
+			     (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"                 "    r")))]
+  "TARGET_ISA_V3 && optimize_size"
   "or_srli\t%0, %3, %1, %2"
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "length"       "4")])
 
 
 ;; ----------------------------------------------------------------------------
@@ -644,71 +830,64 @@
 
 ;; For V3/V3M ISA, we have 'xor33' instruction.
 ;; So we can identify 'xor Rt3,Rt3,Ra3' case and set its length to be 2.
-(define_insn "xorsi3"
-  [(set (match_operand:SI 0 "register_operand"         "=w, r,    r,    r")
-	(xor:SI (match_operand:SI 1 "register_operand" "%0, r,    r,    r")
-		(match_operand:SI 2 "general_operand"  " w, r, Iu15, It15")))]
+
+(define_expand "xorsi3"
+  [(set (match_operand:SI 0 "register_operand"         "")
+	(xor:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "general_operand"  "")))]
   ""
 {
-  int one_position;
+  if (!nds32_xor_operand (operands[2], SImode))
+    operands[2] = force_reg (SImode, operands[2]);
+})
 
-  switch (which_alternative)
-    {
-    case 0:
-      return "xor33\t%0, %2";
-    case 1:
-      return "xor\t%0, %1, %2";
-    case 2:
-      return "xori\t%0, %1, %2";
-    case 3:
-      /* If we reach this alternative,
-         it must pass the nds32_can_use_btgl_p() test,
-         so that we can guarantee there is only one 1-bit
-         within the immediate value.  */
-      /* Use exact_log2() to search the 1-bit position.  */
-      one_position = exact_log2 (INTVAL (operands[2]));
-      operands[2] = GEN_INT (one_position);
-      return "btgl\t%0, %1, %2";
-
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set_attr "type"   "alu,alu,alu,alu")
-   (set_attr "length" "  2,  4,  4,  4")])
+(define_insn "*xorsi3"
+  [(set (match_operand:SI 0 "register_operand"          "=l, r,    r,    r")
+	(xor:SI (match_operand:SI 1 "register_operand"  "%0, r,    r,    r")
+		(match_operand:SI 2 "nds32_xor_operand" " l, r, Iu15, It15")))]
+  ""
+  "@
+   xor33\t%0, %2
+   xor\t%0, %1, %2
+   xori\t%0, %1, %2
+   btgl\t%0, %1, %B2"
+  [(set_attr "type"    "alu,alu,alu,alu")
+   (set_attr "length"  "  2,  4,  4,  4")
+   (set_attr "feature" "v3m, v1, v1,pe1")])
 
 (define_insn "*xor_slli"
   [(set (match_operand:SI 0 "register_operand"                     "=   r")
 	(xor:SI (ashift:SI (match_operand:SI 1 "register_operand"  "    r")
-			   (match_operand:SI 2 "immediate_operand" " Iu05"))
+			   (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
 		(match_operand:SI 3 "register_operand"             "    r")))]
-  "TARGET_ISA_V3"
+  "TARGET_ISA_V3 && optimize_size"
   "xor_slli\t%0, %3, %1, %2"
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "length"       "4")])
 
 (define_insn "*xor_srli"
-  [(set (match_operand:SI 0 "register_operand"                       "=   r")
-	(xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "    r")
-			     (match_operand:SI 2 "immediate_operand" " Iu05"))
-		(match_operand:SI 3 "register_operand"               "    r")))]
-  "TARGET_ISA_V3"
+  [(set (match_operand:SI 0 "register_operand"                         "=   r")
+	(xor:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"    "    r")
+			     (match_operand:SI 2 "nds32_imm5u_operand" " Iu05"))
+		(match_operand:SI 3 "register_operand"                 "    r")))]
+  "TARGET_ISA_V3 && optimize_size"
   "xor_srli\t%0, %3, %1, %2"
-  [(set_attr "type" "alu")
-   (set_attr "length" "4")])
+  [(set_attr "type" "alu_shift")
+   (set_attr "length"       "4")])
 
 ;; Rotate Right Instructions.
 
-(define_insn "rotrsi3"
-  [(set (match_operand:SI 0 "register_operand"                 "=   r, r")
-	  (rotatert:SI (match_operand:SI 1 "register_operand"  "    r, r")
-		       (match_operand:SI 2 "nonmemory_operand" " Iu05, r")))]
+(define_insn "*rotrsi3"
+  [(set (match_operand:SI 0 "register_operand"                    "=   r, r")
+	  (rotatert:SI (match_operand:SI 1 "register_operand"     "    r, r")
+		       (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, r")))]
   ""
   "@
-  rotri\t%0, %1, %2
-  rotr\t%0, %1, %2"
-  [(set_attr "type"   "alu,alu")
-   (set_attr "length" "  4,  4")])
+   rotri\t%0, %1, %2
+   rotr\t%0, %1, %2"
+  [(set_attr "type"    "  alu,  alu")
+   (set_attr "subtype" "shift,shift")
+   (set_attr "length"  "    4,    4")])
 
 
 ;; ----------------------------------------------------------------------------
@@ -720,14 +899,95 @@
 ;; And for V2 ISA, there is NO 'neg33' instruction.
 ;; The only option is to use 'subri A,B,0' (its semantic is 'A = 0 - B').
 (define_insn "negsi2"
-  [(set (match_operand:SI 0 "register_operand"         "=w, r")
-	(neg:SI (match_operand:SI 1 "register_operand" " w, r")))]
+  [(set (match_operand:SI 0 "register_operand"         "=l, r")
+	(neg:SI (match_operand:SI 1 "register_operand" " l, r")))]
   ""
   "@
    neg33\t%0, %1
    subri\t%0, %1, 0"
-  [(set_attr "type"   "alu,alu")
-   (set_attr "length" "  2,  4")])
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")
+   (set_attr "feature" "v3m, v1")])
+
+(define_expand "negsf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(neg:SF (match_operand:SF 1 "register_operand" "")))]
+  ""
+{
+  if (!TARGET_FPU_SINGLE && !TARGET_EXT_PERF)
+    {
+      rtx new_dst = simplify_gen_subreg (SImode, operands[0], SFmode, 0);
+      rtx new_src = simplify_gen_subreg (SImode, operands[1], SFmode, 0);
+
+      emit_insn (gen_xorsi3 (new_dst,
+			     new_src,
+			     gen_int_mode (0x80000000, SImode)));
+
+      DONE;
+    }
+})
+
+(define_expand "negdf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(neg:DF (match_operand:DF 1 "register_operand" "")))]
+  ""
+{
+})
+
+(define_insn_and_split "soft_negdf2"
+  [(set (match_operand:DF 0 "register_operand" "")
+	(neg:DF (match_operand:DF 1 "register_operand" "")))]
+  "!TARGET_FPU_DOUBLE"
+  "#"
+  "!TARGET_FPU_DOUBLE"
+  [(const_int 1)]
+{
+    rtx src = operands[1];
+    rtx dst = operands[0];
+    rtx ori_dst = operands[0];
+
+    bool need_extra_move_for_dst_p;
+    /* FPU register can't change mode to SI directly, so we need create a
+       tmp register to handle it, and FPU register can't do `xor` or btgl.  */
+    if (HARD_REGISTER_P (src)
+	&& TEST_HARD_REG_BIT (reg_class_contents[FP_REGS], REGNO (src)))
+      {
+	rtx tmp = gen_reg_rtx (DFmode);
+	emit_move_insn (tmp, src);
+	src = tmp;
+      }
+
+    if (HARD_REGISTER_P (dst)
+	&& TEST_HARD_REG_BIT (reg_class_contents[FP_REGS], REGNO (dst)))
+      {
+	need_extra_move_for_dst_p = true;
+	rtx tmp = gen_reg_rtx (DFmode);
+	dst = tmp;
+      }
+
+    rtx dst_high_part = simplify_gen_subreg (
+			  SImode, dst,
+			  DFmode, subreg_highpart_offset (SImode, DFmode));
+    rtx dst_low_part = simplify_gen_subreg (
+			  SImode, dst,
+			  DFmode, subreg_lowpart_offset (SImode, DFmode));
+    rtx src_high_part = simplify_gen_subreg (
+			  SImode, src,
+			  DFmode, subreg_highpart_offset (SImode, DFmode));
+    rtx src_low_part = simplify_gen_subreg (
+			  SImode, src,
+			  DFmode, subreg_lowpart_offset (SImode, DFmode));
+
+    emit_insn (gen_xorsi3 (dst_high_part,
+			   src_high_part,
+			   gen_int_mode (0x80000000, SImode)));
+    emit_move_insn (dst_low_part, src_low_part);
+
+    if (need_extra_move_for_dst_p)
+      emit_move_insn (ori_dst, dst);
+
+    DONE;
+})
 
 
 ;; ----------------------------------------------------------------------------
@@ -743,49 +1003,66 @@
   "@
    not33\t%0, %1
    nor\t%0, %1, %1"
-  [(set_attr "type"   "alu,alu")
-   (set_attr "length" "  2,  4")])
+  [(set_attr "type"    "alu,alu")
+   (set_attr "length"  "  2,  4")
+   (set_attr "feature" "v3m, v1")])
 
 
 ;; ----------------------------------------------------------------------------
 
 ;; Shift instructions.
 
-(define_insn "ashlsi3"
-  [(set (match_operand:SI 0 "register_operand"             "=   l,    r, r")
-	(ashift:SI (match_operand:SI 1 "register_operand"  "    l,    r, r")
-		   (match_operand:SI 2 "nonmemory_operand" " Iu03, Iu05, r")))]
+(define_expand "<shift>si3"
+  [(set (match_operand:SI 0 "register_operand"                      "")
+	(shift_rotate:SI (match_operand:SI 1 "register_operand"     "")
+			 (match_operand:SI 2 "nds32_rimm5u_operand" "")))]
   ""
-  "@
-  slli333\t%0, %1, %2
-  slli\t%0, %1, %2
-  sll\t%0, %1, %2"
-  [(set_attr "type"   "alu,alu,alu")
-   (set_attr "length" "  2,  4,  4")])
+{
+  if (operands[2] == const0_rtx)
+    {
+      emit_move_insn (operands[0], operands[1]);
+      DONE;
+    }
+})
 
-(define_insn "ashrsi3"
-  [(set (match_operand:SI 0 "register_operand"               "=   d,    r, r")
-	(ashiftrt:SI (match_operand:SI 1 "register_operand"  "    0,    r, r")
-		     (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))]
+(define_insn "*ashlsi3"
+  [(set (match_operand:SI 0 "register_operand"                "=   l,    r, r")
+	(ashift:SI (match_operand:SI 1 "register_operand"     "    l,    r, r")
+		   (match_operand:SI 2 "nds32_rimm5u_operand" " Iu03, Iu05, r")))]
   ""
   "@
-  srai45\t%0, %2
-  srai\t%0, %1, %2
-  sra\t%0, %1, %2"
-  [(set_attr "type"   "alu,alu,alu")
-   (set_attr "length" "  2,  4,  4")])
+   slli333\t%0, %1, %2
+   slli\t%0, %1, %2
+   sll\t%0, %1, %2"
+  [(set_attr "type"    "  alu,  alu,  alu")
+   (set_attr "subtype" "shift,shift,shift")
+   (set_attr "length"  "    2,    4,    4")])
 
-(define_insn "lshrsi3"
-  [(set (match_operand:SI 0 "register_operand"               "=   d,    r, r")
-	(lshiftrt:SI (match_operand:SI 1 "register_operand"  "    0,    r, r")
-		     (match_operand:SI 2 "nonmemory_operand" " Iu05, Iu05, r")))]
+(define_insn "*ashrsi3"
+  [(set (match_operand:SI 0 "register_operand"                  "=   d,    r, r")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand"     "    0,    r, r")
+		     (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, Iu05, r")))]
   ""
   "@
-  srli45\t%0, %2
-  srli\t%0, %1, %2
-  srl\t%0, %1, %2"
-  [(set_attr "type"   "alu,alu,alu")
-   (set_attr "length" "  2,  4,  4")])
+   srai45\t%0, %2
+   srai\t%0, %1, %2
+   sra\t%0, %1, %2"
+  [(set_attr "type"    "  alu,  alu,  alu")
+   (set_attr "subtype" "shift,shift,shift")
+   (set_attr "length"  "    2,    4,    4")])
+
+(define_insn "*lshrsi3"
+  [(set (match_operand:SI 0 "register_operand"                  "=   d,    r, r")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand"     "    0,    r, r")
+		     (match_operand:SI 2 "nds32_rimm5u_operand" " Iu05, Iu05, r")))]
+  ""
+  "@
+   srli45\t%0, %2
+   srli\t%0, %1, %2
+   srl\t%0, %1, %2"
+  [(set_attr "type"    "  alu,  alu,  alu")
+   (set_attr "subtype" "shift,shift,shift")
+   (set_attr "length"  "    2,    4,    4")])
 
 
 ;; ----------------------------------------------------------------------------
@@ -794,148 +1071,65 @@
 ;; Conditional Move patterns
 ;; ----------------------------------------------------------------------------
 
-(define_expand "movsicc"
-  [(set (match_operand:SI 0 "register_operand" "")
-	(if_then_else:SI (match_operand 1 "comparison_operator" "")
-			 (match_operand:SI 2 "register_operand" "")
-			 (match_operand:SI 3 "register_operand" "")))]
-  "TARGET_CMOV"
+(define_expand "mov<mode>cc"
+  [(set (match_operand:QIHISI 0 "register_operand" "")
+	(if_then_else:QIHISI (match_operand 1 "nds32_movecc_comparison_operator" "")
+			 (match_operand:QIHISI 2 "register_operand" "")
+			 (match_operand:QIHISI 3 "register_operand" "")))]
+  "TARGET_CMOV && !optimize_size"
 {
-  if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)
-      && GET_MODE (XEXP (operands[1], 0)) == SImode
-      && XEXP (operands[1], 1) == const0_rtx)
-    {
-      /* If the operands[1] rtx is already (eq X 0) or (ne X 0),
-         we have gcc generate original template rtx.  */
-      goto create_template;
-    }
-  else
+  enum nds32_expand_result_type result = nds32_expand_movcc (operands);
+  switch (result)
     {
-      /* Since there is only 'slt'(Set when Less Than) instruction for
-         comparison in Andes ISA, the major strategy we use here is to
-         convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination.
-         We design constraints properly so that the reload phase will assist
-         to make one source operand to use same register as result operand.
-         Then we can use cmovz/cmovn to catch the other source operand
-         which has different register.  */
-      enum rtx_code code = GET_CODE (operands[1]);
-      enum rtx_code new_code = code;
-      rtx cmp_op0 = XEXP (operands[1], 0);
-      rtx cmp_op1 = XEXP (operands[1], 1);
-      rtx tmp;
-      int reverse = 0;
-
-      /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part
-         Strategy : Reverse condition and swap comparison operands
-
-         For example:
-
-             a <= b ? P : Q   (LE or LEU)
-         --> a >  b ? Q : P   (reverse condition)
-         --> b <  a ? Q : P   (swap comparison operands to achieve 'LT/LTU')
-
-             a >= b ? P : Q   (GE or GEU)
-         --> a <  b ? Q : P   (reverse condition to achieve 'LT/LTU')
-
-             a <  b ? P : Q   (LT or LTU)
-         --> (NO NEED TO CHANGE, it is already 'LT/LTU')
-
-             a >  b ? P : Q   (GT or GTU)
-         --> b <  a ? P : Q   (swap comparison operands to achieve 'LT/LTU') */
-      switch (code)
-	{
-	case NE:
-	  /*   (a != b ? P : Q)
-	     can be expressed as
-	       (a == b ? Q : P)
-	     so, fall through to reverse condition */
-	case GE: case GEU: case LE: case LEU:
-	  new_code = reverse_condition (code);
-	  reverse = 1;
-	  break;
-	case EQ: case GT: case GTU: case LT: case LTU:
-	  /* no need to reverse condition */
-	  break;
-	default:
-	  FAIL;
-	}
-
-      /* For '>' comparison operator, we swap operands
-         so that we can have 'LT/LTU' operator.  */
-      if (new_code == GT || new_code == GTU)
-	{
-	  tmp     = cmp_op0;
-	  cmp_op0 = cmp_op1;
-	  cmp_op1 = tmp;
-
-	  new_code = swap_condition (new_code);
-	}
-
-      /* Use a temporary register to store slt/slts result.  */
-      tmp = gen_reg_rtx (SImode);
-
-      /* Split EQ and NE because we don't have direct comparison of EQ and NE.
-         If we don't split it, the conditional move transformation will fail
-         when producing (SET A (EQ B C)) or (SET A (NE B C)).  */
-      if (new_code == EQ)
-	{
-	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
-	  emit_insn (gen_slt_compare (tmp, tmp, GEN_INT (1)));
-	}
-      else if (new_code == NE)
-	{
-	  emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1));
-	  emit_insn (gen_slt_compare (tmp, GEN_INT (0), tmp));
-        }
-      else
-	/* This emit_insn will create corresponding 'slt/slts' insturction.  */
-	emit_insn (gen_rtx_SET (tmp, gen_rtx_fmt_ee (new_code, SImode,
-						     cmp_op0, cmp_op1)));
-
-      /* Change comparison semantic into (eq X 0) or (ne X 0) behavior
-         so that cmovz or cmovn will be matched later.
-
-         For reverse condition cases, we want to create a semantic that:
-           (eq X 0) --> pick up "else" part
-         For normal cases, we want to create a semantic that:
-           (ne X 0) --> pick up "then" part
-
-         Later we will have cmovz/cmovn instruction pattern to
-         match corresponding behavior and output instruction.  */
-      operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE,
-				    VOIDmode, tmp, const0_rtx);
+    case EXPAND_DONE:
+      DONE;
+      break;
+    case EXPAND_FAIL:
+      FAIL;
+      break;
+    case EXPAND_CREATE_TEMPLATE:
+      break;
+    default:
+      gcc_unreachable ();
     }
-
-create_template:
-  do {} while(0); /* dummy line */
 })
 
-(define_insn "cmovz"
-  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
-        (if_then_else:SI (eq (match_operand:SI 1 "register_operand" " r, r")
+(define_insn "cmovz<mode>"
+  [(set (match_operand:QIHISI 0 "register_operand"                      "=r, r")
+	(if_then_else:QIHISI (eq (match_operand:SI 1 "register_operand" " r, r")
 			     (const_int 0))
-			 (match_operand:SI 2 "register_operand"     " r, 0")
-			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+			 (match_operand:QIHISI 2 "register_operand"     " r, 0")
+			 (match_operand:QIHISI 3 "register_operand"     " 0, r")))]
   "TARGET_CMOV"
   "@
    cmovz\t%0, %2, %1
    cmovn\t%0, %3, %1"
-  [(set_attr "type" "move")
+  [(set_attr "type"  "alu")
    (set_attr "length"  "4")])
 
-(define_insn "cmovn"
-  [(set (match_operand:SI 0 "register_operand"                      "=r, r")
-	(if_then_else:SI (ne (match_operand:SI 1 "register_operand" " r, r")
+(define_insn "cmovn<mode>"
+  [(set (match_operand:QIHISI 0 "register_operand"                      "=r, r")
+	(if_then_else:QIHISI (ne (match_operand:SI 1 "register_operand" " r, r")
 			     (const_int 0))
-			 (match_operand:SI 2 "register_operand"     " r, 0")
-			 (match_operand:SI 3 "register_operand"     " 0, r")))]
+			 (match_operand:QIHISI 2 "register_operand"     " r, 0")
+			 (match_operand:QIHISI 3 "register_operand"     " 0, r")))]
   "TARGET_CMOV"
   "@
    cmovn\t%0, %2, %1
    cmovz\t%0, %3, %1"
-  [(set_attr "type" "move")
+  [(set_attr "type"  "alu")
    (set_attr "length"  "4")])
 
+;; A hotfix to help RTL combiner to merge a cmovn insn and a zero_extend insn.
+;; It should be removed once after we change the expansion form of the cmovn.
+(define_insn "*cmovn_simplified_<mode>"
+  [(set (match_operand:QIHISI 0 "register_operand" "=r")
+	(if_then_else:QIHISI (match_operand:SI 1 "register_operand" "r")
+			 (match_operand:QIHISI 2 "register_operand" "r")
+			 (match_operand:QIHISI 3 "register_operand" "0")))]
+  ""
+  "cmovn\t%0, %2, %1"
+  [(set_attr "type" "alu")])
 
 ;; ----------------------------------------------------------------------------
 ;; Conditional Branch patterns
@@ -950,573 +1144,188 @@
 		      (pc)))]
   ""
 {
-  rtx tmp_reg;
-  enum rtx_code code;
-
-  code = GET_CODE (operands[0]);
-
-  /* If operands[2] is (const_int 0),
-     we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions.
-     So we have gcc generate original template rtx.  */
-  if (GET_CODE (operands[2]) == CONST_INT)
-    if (INTVAL (operands[2]) == 0)
-      if ((code != GTU)
-	  && (code != GEU)
-	  && (code != LTU)
-	  && (code != LEU))
-	goto create_template;
-
-  /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than)
-     behavior for the comparison, we might need to generate other
-     rtx patterns to achieve same semantic.  */
-  switch (code)
+  enum nds32_expand_result_type result = nds32_expand_cbranch (operands);
+  switch (result)
     {
-    case GT:
-    case GTU:
-      if (GET_CODE (operands[2]) == CONST_INT)
-	{
-	  /* GT  reg_A, const_int  =>  !(LT  reg_A, const_int + 1) */
-	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
-
-	  /* We want to plus 1 into the integer value
-	     of operands[2] to create 'slt' instruction.
-	     This caculation is performed on the host machine,
-	     which may be 64-bit integer.
-	     So the meaning of caculation result may be
-	     different from the 32-bit nds32 target.
-
-	     For example:
-	       0x7fffffff + 0x1 -> 0x80000000,
-	       this value is POSITIVE on 64-bit machine,
-	       but the expected value on 32-bit nds32 target
-	       should be NEGATIVE value.
-
-	     Hence, instead of using GEN_INT(), we use gen_int_mode() to
-	     explicitly create SImode constant rtx.  */
-	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
-
-	  if (code == GT)
-	    {
-	      /* GT, use slts instruction */
-	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
-	    }
-	  else
-	    {
-	      /* GTU, use slt instruction */
-	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
-	    }
-
-	  PUT_CODE (operands[0], EQ);
-	  operands[1] = tmp_reg;
-	  operands[2] = const0_rtx;
-	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
-				     operands[2], operands[3]));
-
-	  DONE;
-	}
-      else
-	{
-	  /* GT  reg_A, reg_B  =>  LT  reg_B, reg_A */
-	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
-
-	  if (code == GT)
-	    {
-	      /* GT, use slts instruction */
-	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
-	    }
-	  else
-	    {
-	      /* GTU, use slt instruction */
-	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
-	    }
-
-	  PUT_CODE (operands[0], NE);
-	  operands[1] = tmp_reg;
-	  operands[2] = const0_rtx;
-	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
-				     operands[2], operands[3]));
-
-	  DONE;
-	}
-
-    case GE:
-    case GEU:
-      /* GE  reg_A, reg_B      =>  !(LT  reg_A, reg_B) */
-      /* GE  reg_A, const_int  =>  !(LT  reg_A, const_int) */
-      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
-
-      if (code == GE)
-	{
-	  /* GE, use slts instruction */
-	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
-	}
-      else
-	{
-	  /* GEU, use slt instruction */
-	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
-	}
-
-      PUT_CODE (operands[0], EQ);
-      operands[1] = tmp_reg;
-      operands[2] = const0_rtx;
-      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
-				 operands[2], operands[3]));
-
+    case EXPAND_DONE:
       DONE;
-
-    case LT:
-    case LTU:
-      /* LT  reg_A, reg_B      =>  LT  reg_A, reg_B */
-      /* LT  reg_A, const_int  =>  LT  reg_A, const_int */
-      tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
-
-      if (code == LT)
-	{
-	  /* LT, use slts instruction */
-	  emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
-	}
-      else
-	{
-	  /* LTU, use slt instruction */
-	  emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
-	}
-
-      PUT_CODE (operands[0], NE);
-      operands[1] = tmp_reg;
-      operands[2] = const0_rtx;
-      emit_insn (gen_cbranchsi4 (operands[0], operands[1],
-				 operands[2], operands[3]));
-
-      DONE;
-
-    case LE:
-    case LEU:
-      if (GET_CODE (operands[2]) == CONST_INT)
-	{
-	  /* LE  reg_A, const_int  =>  LT  reg_A, const_int + 1 */
-	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
-
-	  /* Note that (le:SI X INT_MAX) is not the same as (lt:SI X INT_MIN).
-	     We better have an assert here in case GCC does not properly
-	     optimize it away.  The INT_MAX here is 0x7fffffff for target.  */
-	  gcc_assert (code != LE || INTVAL (operands[2]) != 0x7fffffff);
-	  operands[2] = gen_int_mode (INTVAL (operands[2]) + 1, SImode);
-
-	  if (code == LE)
-	    {
-	      /* LE, use slts instruction */
-	      emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2]));
-	    }
-	  else
-	    {
-	      /* LEU, use slt instruction */
-	      emit_insn (gen_slt_compare  (tmp_reg, operands[1], operands[2]));
-	    }
-
-	  PUT_CODE (operands[0], NE);
-	  operands[1] = tmp_reg;
-	  operands[2] = const0_rtx;
-	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
-				     operands[2], operands[3]));
-
-	  DONE;
-	}
-      else
-	{
-	  /* LE  reg_A, reg_B  =>  !(LT  reg_B, reg_A) */
-	  tmp_reg = gen_rtx_REG (SImode, TA_REGNUM);
-
-	  if (code == LE)
-	    {
-	      /* LE, use slts instruction */
-	      emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1]));
-	    }
-	  else
-	    {
-	      /* LEU, use slt instruction */
-	      emit_insn (gen_slt_compare  (tmp_reg, operands[2], operands[1]));
-	    }
-
-	  PUT_CODE (operands[0], EQ);
-	  operands[1] = tmp_reg;
-	  operands[2] = const0_rtx;
-	  emit_insn (gen_cbranchsi4 (operands[0], operands[1],
-				     operands[2], operands[3]));
-
-	  DONE;
-	}
-
-    case EQ:
-    case NE:
-      /* NDS32 ISA has various form for eq/ne behavior no matter
-         what kind of the operand is.
-         So just generate original template rtx.  */
-      goto create_template;
-
+      break;
+    case EXPAND_FAIL:
+      FAIL;
+      break;
+    case EXPAND_CREATE_TEMPLATE:
+      break;
     default:
-      FAIL;
+      gcc_unreachable ();
     }
-
-create_template:
-  do {} while(0); /* dummy line */
 })
 
 
-(define_insn "*cbranchsi4_equality_zero"
+(define_insn "cbranchsi4_equality_zero"
   [(set (pc)
 	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
-			[(match_operand:SI 1 "register_operand"  "t, l, r")
+			[(match_operand:SI 1 "register_operand"  "t,l, r")
 			 (const_int 0)])
 		      (label_ref (match_operand 2 "" ""))
 		      (pc)))]
   ""
 {
-  enum rtx_code code;
-
-  code = GET_CODE (operands[0]);
-
-  /* This zero-comparison conditional branch has two forms:
-       32-bit instruction =>          beqz/bnez           imm16s << 1
-       16-bit instruction => beqzs8/bnezs8/beqz38/bnez38  imm8s << 1
-
-     For 32-bit case,
-     we assume it is always reachable. (but check range -65500 ~ 65500)
-
-     For 16-bit case,
-     it must satisfy { 255 >= (label - pc) >= -256 } condition.
-     However, since the $pc for nds32 is at the beginning of the instruction,
-     we should leave some length space for current insn.
-     So we use range -250 ~ 250.  */
-
-  switch (get_attr_length (insn))
-    {
-    case 2:
-      if (which_alternative == 0)
-	{
-	  /* constraint: t */
-	  return (code == EQ) ? "beqzs8\t%2" : "bnezs8\t%2";
-	}
-      else if (which_alternative == 1)
-	{
-	  /* constraint: l */
-	  return (code == EQ) ? "beqz38\t%1, %2" : "bnez38\t%1, %2";
-	}
-      else
-	{
-	  /* constraint: r */
-	  /* For which_alternative==2, it should not be here.  */
-	  gcc_unreachable ();
-	}
-    case 4:
-      /* including constraints: t, l, and r */
-      return (code == EQ) ? "beqz\t%1, %2" : "bnez\t%1, %2";
-    case 6:
-      if (which_alternative == 0)
-	{
-	  /* constraint: t */
-	  if (code == EQ)
-	    {
-	      /*    beqzs8  .L0
-	          =>
-	            bnezs8  .LCB0
-	            j  .L0
-	          .LCB0:
-	       */
-	      return "bnezs8\t.LCB%=\;j\t%2\n.LCB%=:";
-	    }
-	  else
-	    {
-	      /*    bnezs8  .L0
-	          =>
-	            beqzs8  .LCB0
-	            j  .L0
-	          .LCB0:
-	       */
-	      return "beqzs8\t.LCB%=\;j\t%2\n.LCB%=:";
-	    }
-	}
-      else if (which_alternative == 1)
-	{
-	  /* constraint: l */
-	  if (code == EQ)
-	    {
-	      /*    beqz38  $r0, .L0
-	          =>
-	            bnez38  $r0, .LCB0
-	            j  .L0
-	          .LCB0:
-	       */
-	      return "bnez38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
-	    }
-	  else
-	    {
-	      /*    bnez38  $r0, .L0
-	          =>
-	            beqz38  $r0, .LCB0
-	            j  .L0
-	          .LCB0:
-	       */
-	      return "beqz38\t%1, .LCB%=\;j\t%2\n.LCB%=:";
-	    }
-	}
-      else
-	{
-	  /* constraint: r */
-	  /* For which_alternative==2, it should not be here.  */
-	  gcc_unreachable ();
-	}
-    case 8:
-      /* constraint: t, l, r.  */
-      if (code == EQ)
-	{
-	  /*    beqz  $r8, .L0
-	      =>
-	        bnez  $r8, .LCB0
-	        j  .L0
-	      .LCB0:
-	   */
-	  return "bnez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
-	}
-      else
-	{
-	  /*    bnez  $r8, .L0
-	      =>
-	        beqz  $r8, .LCB0
-	        j  .L0
-	      .LCB0:
-	   */
-	  return "beqz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
-	}
-    default:
-      gcc_unreachable ();
-    }
+  return nds32_output_cbranchsi4_equality_zero (insn, operands);
 }
   [(set_attr "type" "branch")
-   (set_attr "enabled" "1")
+   (set_attr_alternative "enabled"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_string "yes")
+		     (const_string "no"))
+       ;; Alternative 1
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_string "yes")
+		     (const_string "no"))
+       ;; Alternative 2
+       (const_string "yes")
+     ])
    (set_attr_alternative "length"
      [
        ;; Alternative 0
-       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
-			  (le (minus (match_dup 2) (pc)) (const_int  250)))
-		     (if_then_else (match_test "TARGET_16_BIT")
-				   (const_int 2)
-				   (const_int 4))
+       (if_then_else (match_test "!CROSSING_JUMP_P (insn)")
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+					(le (minus (match_dup 2) (pc)) (const_int  250)))
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 2)
+						 (const_int 4))
+				   (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+						      (le (minus (match_dup 2) (pc)) (const_int  65500)))
+						 (const_int 4)
+						 (if_then_else (match_test "TARGET_16_BIT")
+							       (const_int 8)
+							       (const_int 10))))
+		     (const_int 10))
+       ;; Alternative 1
+       (if_then_else (match_test "!CROSSING_JUMP_P (insn)")
+		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
+					(le (minus (match_dup 2) (pc)) (const_int  250)))
+				   (if_then_else (match_test "TARGET_16_BIT")
+						 (const_int 2)
+						 (const_int 4))
+				   (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+						      (le (minus (match_dup 2) (pc)) (const_int  65500)))
+						 (const_int 4)
+						 (if_then_else (match_test "TARGET_16_BIT")
+							       (const_int 8)
+							       (const_int 10))))
+		     (const_int 10))
+       ;; Alternative 2
+       (if_then_else (match_test "!CROSSING_JUMP_P (insn)")
 		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
 					(le (minus (match_dup 2) (pc)) (const_int  65500)))
 				   (const_int 4)
-				   (if_then_else (match_test "TARGET_16_BIT")
-						 (const_int 6)
-						 (const_int 8))))
-       ;; Alternative 1
-       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -250))
-			  (le (minus (match_dup 2) (pc)) (const_int  250)))
-		     (if_then_else (match_test "TARGET_16_BIT")
-				   (const_int 2)
-				   (const_int 4))
-		     (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
-					(le (minus (match_dup 2) (pc)) (const_int  65500)))
-				   (const_int 4)
-				   (if_then_else (match_test "TARGET_16_BIT")
-						 (const_int 6)
-						 (const_int 8))))
-       ;; Alternative 2
-       (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
-			  (le (minus (match_dup 2) (pc)) (const_int  65500)))
-		     (const_int 4)
-		     (const_int 8))
+				   (const_int 10))
+		     (const_int 10))
      ])])
 
 
 ;; This pattern is dedicated to V2 ISA,
 ;; because V2 DOES NOT HAVE beqc/bnec instruction.
-(define_insn "*cbranchsi4_equality_reg"
+(define_insn "cbranchsi4_equality_reg"
   [(set (pc)
 	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
-			[(match_operand:SI 1 "register_operand"           "r")
-			 (match_operand:SI 2 "nds32_reg_constant_operand" "r")])
+			[(match_operand:SI 1 "register_operand" "v, r")
+			 (match_operand:SI 2 "register_operand" "l, r")])
 		      (label_ref (match_operand 3 "" ""))
 		      (pc)))]
   "TARGET_ISA_V2"
 {
-  enum rtx_code code;
-
-  code = GET_CODE (operands[0]);
-
-  /* This register-comparison conditional branch has one form:
-       32-bit instruction =>          beq/bne           imm14s << 1
-
-     For 32-bit case,
-     we assume it is always reachable. (but check range -16350 ~ 16350).  */
-
-  switch (code)
-    {
-    case EQ:
-      /* r, r */
-      switch (get_attr_length (insn))
-	{
-	case 4:
-	  return "beq\t%1, %2, %3";
-	case 8:
-	  /*    beq  $r0, $r1, .L0
-	      =>
-	        bne  $r0, $r1, .LCB0
-	        j  .L0
-	      .LCB0:
-	   */
-	  return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
-	default:
-	  gcc_unreachable ();
-	}
-
-    case NE:
-      /* r, r */
-      switch (get_attr_length (insn))
-	{
-	case 4:
-	  return "bne\t%1, %2, %3";
-	case 8:
-	  /*    bne  $r0, $r1, .L0
-	      =>
-	        beq  $r0, $r1, .LCB0
-	        j  .L0
-	      .LCB0:
-	   */
-	  return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
-	default:
-	  gcc_unreachable ();
-	}
-
-    default:
-      gcc_unreachable ();
-    }
+  return nds32_output_cbranchsi4_equality_reg (insn, operands);
 }
   [(set_attr "type"   "branch")
-   (set (attr "length")
-	(if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
-			   (le (minus (match_dup 3) (pc)) (const_int  16350)))
-		      (const_int 4)
-		      (const_int 8)))])
+   (set_attr_alternative "enabled"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_string "yes")
+		     (const_string "no"))
+       ;; Alternative 1
+       (const_string "yes")
+     ])
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "!CROSSING_JUMP_P (insn)")
+		     (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+					(le (minus (match_dup 3) (pc)) (const_int  250)))
+				   (const_int 2)
+				   (if_then_else (and (ge (minus (match_dup 3) (pc))
+							  (const_int -16350))
+						      (le (minus (match_dup 3) (pc))
+							  (const_int  16350)))
+						 (const_int 4)
+						 (const_int 8)))
+		     (const_int 8))
+       ;; Alternative 1
+       (if_then_else (match_test "!CROSSING_JUMP_P (insn)")
+		     (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+					(le (minus (match_dup 3) (pc)) (const_int  16350)))
+				   (const_int 4)
+				   (const_int 10))
+		     (const_int 10))
+     ])])
 
 
 ;; This pattern is dedicated to V3/V3M,
 ;; because V3/V3M DO HAVE beqc/bnec instruction.
-(define_insn "*cbranchsi4_equality_reg_or_const_int"
+(define_insn "cbranchsi4_equality_reg_or_const_int"
   [(set (pc)
 	(if_then_else (match_operator 0 "nds32_equality_comparison_operator"
-			[(match_operand:SI 1 "register_operand"           "r,    r")
-			 (match_operand:SI 2 "nds32_reg_constant_operand" "r, Is11")])
+			[(match_operand:SI 1 "register_operand"      "v, r,    r")
+			 (match_operand:SI 2 "nds32_rimm11s_operand" "l, r, Is11")])
 		      (label_ref (match_operand 3 "" ""))
 		      (pc)))]
   "TARGET_ISA_V3 || TARGET_ISA_V3M"
 {
-  enum rtx_code code;
-
-  code = GET_CODE (operands[0]);
-
-  /* This register-comparison conditional branch has one form:
-       32-bit instruction =>          beq/bne           imm14s << 1
-       32-bit instruction =>         beqc/bnec          imm8s << 1
-
-     For 32-bit case, we assume it is always reachable.
-     (but check range -16350 ~ 16350 and -250 ~ 250).  */
-
-  switch (code)
-    {
-    case EQ:
-      if (which_alternative == 0)
-	{
-	  /* r, r */
-	  switch (get_attr_length (insn))
-	    {
-	    case 4:
-	      return "beq\t%1, %2, %3";
-	    case 8:
-	      /*    beq  $r0, $r1, .L0
-	          =>
-	            bne  $r0, $r1, .LCB0
-	            j  .L0
-	          .LCB0:
-	       */
-	      return "bne\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
-	    default:
-	      gcc_unreachable ();
-	    }
-	}
-      else
-	{
-	  /* r, Is11 */
-	  switch (get_attr_length (insn))
-	    {
-	    case 4:
-	      return "beqc\t%1, %2, %3";
-	    case 8:
-	      /*    beqc  $r0, constant, .L0
-	          =>
-	            bnec  $r0, constant, .LCB0
-	            j  .L0
-	          .LCB0:
-	       */
-	      return "bnec\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
-	    default:
-	      gcc_unreachable ();
-	    }
-	}
-    case NE:
-      if (which_alternative == 0)
-	{
-	  /* r, r */
-	  switch (get_attr_length (insn))
-	    {
-	    case 4:
-	      return "bne\t%1, %2, %3";
-	    case 8:
-	      /*    bne  $r0, $r1, .L0
-	          =>
-	            beq  $r0, $r1, .LCB0
-	            j  .L0
-	          .LCB0:
-	       */
-	      return "beq\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
-	    default:
-	      gcc_unreachable ();
-	    }
-	}
-      else
-	{
-	  /* r, Is11 */
-	  switch (get_attr_length (insn))
-	    {
-	    case 4:
-	      return "bnec\t%1, %2, %3";
-	    case 8:
-	      /*    bnec  $r0, constant, .L0
-	          =>
-	            beqc  $r0, constant, .LCB0
-	            j  .L0
-	          .LCB0:
-	       */
-	      return "beqc\t%1, %2, .LCB%=\;j\t%3\n.LCB%=:";
-	    default:
-	      gcc_unreachable ();
-	    }
-	}
-    default:
-      gcc_unreachable ();
-    }
+  return nds32_output_cbranchsi4_equality_reg_or_const_int (insn, operands);
 }
   [(set_attr "type"   "branch")
+   (set_attr_alternative "enabled"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_string "yes")
+		     (const_string "no"))
+       ;; Alternative 1
+       (const_string "yes")
+       ;; Alternative 2
+       (const_string "yes")
+     ])
    (set_attr_alternative "length"
      [
        ;; Alternative 0
-       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
-			  (le (minus (match_dup 3) (pc)) (const_int  16350)))
-		     (const_int 4)
-		     (const_int 8))
+       (if_then_else (match_test "!CROSSING_JUMP_P (insn)")
+		     (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+					(le (minus (match_dup 3) (pc)) (const_int  250)))
+				   (const_int 2)
+				   (if_then_else (and (ge (minus (match_dup 3) (pc))
+							  (const_int -16350))
+						      (le (minus (match_dup 3) (pc))
+							  (const_int  16350)))
+						 (const_int 4)
+						 (const_int 8)))
+		    (const_int 8))
        ;; Alternative 1
-       (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
-			  (le (minus (match_dup 3) (pc)) (const_int  250)))
-		     (const_int 4)
-		     (const_int 8))
+       (if_then_else (match_test "!CROSSING_JUMP_P (insn)")
+		     (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -16350))
+					(le (minus (match_dup 3) (pc)) (const_int  16350)))
+				   (const_int 4)
+				   (const_int 10))
+		    (const_int 10))
+       ;; Alternative 2
+       (if_then_else (match_test "!CROSSING_JUMP_P (insn)")
+		     (if_then_else (and (ge (minus (match_dup 3) (pc)) (const_int -250))
+					(le (minus (match_dup 3) (pc)) (const_int  250)))
+				   (const_int 4)
+				   (const_int 10))
+		    (const_int 10))
      ])])
 
 
@@ -1529,80 +1338,16 @@
 		      (pc)))]
   ""
 {
-  enum rtx_code code;
-
-  code = GET_CODE (operands[0]);
-
-  /* This zero-greater-less-comparison conditional branch has one form:
-       32-bit instruction =>      bgtz/bgez/bltz/blez     imm16s << 1
-
-     For 32-bit case, we assume it is always reachable.
-     (but check range -65500 ~ 65500).  */
-
-  if (get_attr_length (insn) == 8)
-    {
-      /* The branch target is too far to simply use one
-         bgtz/bgez/bltz/blez instruction.
-         We need to reverse condition and use 'j' to jump to the target.  */
-      switch (code)
-	{
-	case GT:
-	  /*   bgtz  $r8, .L0
-	     =>
-	       blez  $r8, .LCB0
-	       j  .L0
-	     .LCB0:
-	   */
-	  return "blez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
-	case GE:
-	  /*   bgez  $r8, .L0
-	     =>
-	       bltz  $r8, .LCB0
-	       j  .L0
-	     .LCB0:
-	   */
-	  return "bltz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
-	case LT:
-	  /*   bltz  $r8, .L0
-	     =>
-	       bgez  $r8, .LCB0
-	       j  .L0
-	     .LCB0:
-	   */
-	  return "bgez\t%1, .LCB%=\;j\t%2\n.LCB%=:";
-	case LE:
-	  /*   blez  $r8, .L0
-	     =>
-	       bgtz  $r8, .LCB0
-	       j  .L0
-	     .LCB0:
-	   */
-	  return "bgtz\t%1, .LCB%=\;j\t%2\n.LCB%=:";
-	default:
-	  gcc_unreachable ();
-	}
-    }
-
-  switch (code)
-    {
-    case GT:
-      return "bgtz\t%1, %2";
-    case GE:
-      return "bgez\t%1, %2";
-    case LT:
-      return "bltz\t%1, %2";
-    case LE:
-      return "blez\t%1, %2";
-    default:
-      gcc_unreachable ();
-    }
+  return nds32_output_cbranchsi4_greater_less_zero (insn, operands);
 }
   [(set_attr "type"   "branch")
    (set (attr "length")
-        (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
-			   (le (minus (match_dup 2) (pc)) (const_int  65500)))
-		      (const_int 4)
-		      (const_int 8)))])
+	(if_then_else (match_test "!CROSSING_JUMP_P (insn)")
+		      (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -65500))
+					 (le (minus (match_dup 2) (pc)) (const_int  65500)))
+				    (const_int 4)
+				    (const_int 10))
+		      (const_int 10)))])
 
 
 (define_expand "cstoresi4"
@@ -1612,236 +1357,85 @@
 	   (match_operand:SI 3 "nonmemory_operand" "")]))]
   ""
 {
-  rtx tmp_reg;
-  enum rtx_code code;
-
-  code = GET_CODE (operands[1]);
-
-  switch (code)
+  enum nds32_expand_result_type result = nds32_expand_cstore (operands);
+  switch (result)
     {
-    case EQ:
-      if (GET_CODE (operands[3]) == CONST_INT)
-	{
-	  /* reg_R = (reg_A == const_int_B)
-	     --> addi reg_C, reg_A, -const_int_B
-	         slti reg_R, reg_C, const_int_1 */
-	  tmp_reg = gen_reg_rtx (SImode);
-	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
-	  /* If the integer value is not in the range of imm15s,
-	     we need to force register first because our addsi3 pattern
-	     only accept nds32_rimm15s_operand predicate.  */
-	  if (!satisfies_constraint_Is15 (operands[3]))
-	    operands[3] = force_reg (SImode, operands[3]);
-	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
-	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
-
-	  DONE;
-	}
-      else
-	{
-	  /* reg_R = (reg_A == reg_B)
-	     --> xor  reg_C, reg_A, reg_B
-	         slti reg_R, reg_C, const_int_1 */
-	  tmp_reg = gen_reg_rtx (SImode);
-	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
-	  emit_insn (gen_slt_compare (operands[0], tmp_reg, const1_rtx));
-
-	  DONE;
-	}
-
-    case NE:
-      if (GET_CODE (operands[3]) == CONST_INT)
-	{
-	  /* reg_R = (reg_A != const_int_B)
-	     --> addi reg_C, reg_A, -const_int_B
-	         slti reg_R, const_int_0, reg_C */
-	  tmp_reg = gen_reg_rtx (SImode);
-	  operands[3] = gen_int_mode (-INTVAL (operands[3]), SImode);
-	  /* If the integer value is not in the range of imm15s,
-	     we need to force register first because our addsi3 pattern
-	     only accept nds32_rimm15s_operand predicate.  */
-	  if (!satisfies_constraint_Is15 (operands[3]))
-	    operands[3] = force_reg (SImode, operands[3]);
-	  emit_insn (gen_addsi3 (tmp_reg, operands[2], operands[3]));
-	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
-
-	  DONE;
-	}
-      else
-	{
-	  /* reg_R = (reg_A != reg_B)
-	     --> xor  reg_C, reg_A, reg_B
-	         slti reg_R, const_int_0, reg_C */
-	  tmp_reg = gen_reg_rtx (SImode);
-	  emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3]));
-	  emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg));
-
-	  DONE;
-	}
-
-    case GT:
-    case GTU:
-      /* reg_R = (reg_A > reg_B)       --> slt reg_R, reg_B, reg_A */
-      /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */
-      if (code == GT)
-	{
-	  /* GT, use slts instruction */
-	  emit_insn (gen_slts_compare (operands[0], operands[3], operands[2]));
-	}
-      else
-	{
-	  /* GTU, use slt instruction */
-	  emit_insn (gen_slt_compare  (operands[0], operands[3], operands[2]));
-	}
-
+    case EXPAND_DONE:
       DONE;
-
-    case GE:
-    case GEU:
-      if (GET_CODE (operands[3]) == CONST_INT)
-	{
-	  /* reg_R = (reg_A >= const_int_B)
-	     --> movi reg_C, const_int_B - 1
-	         slt  reg_R, reg_C, reg_A */
-	  tmp_reg = gen_reg_rtx (SImode);
-
-	  emit_insn (gen_movsi (tmp_reg,
-				gen_int_mode (INTVAL (operands[3]) - 1,
-					      SImode)));
-	  if (code == GE)
-	    {
-	      /* GE, use slts instruction */
-	      emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2]));
-	    }
-	  else
-	    {
-	      /* GEU, use slt instruction */
-	      emit_insn (gen_slt_compare  (operands[0], tmp_reg, operands[2]));
-	    }
-
-	  DONE;
-	}
-      else
-	{
-	  /* reg_R = (reg_A >= reg_B)
-	     --> slt  reg_R, reg_A, reg_B
-	         xori reg_R, reg_R, const_int_1 */
-	  if (code == GE)
-	    {
-	      /* GE, use slts instruction */
-	      emit_insn (gen_slts_compare (operands[0],
-					   operands[2], operands[3]));
-	    }
-	  else
-	    {
-	      /* GEU, use slt instruction */
-	      emit_insn (gen_slt_compare  (operands[0],
-					   operands[2], operands[3]));
-	    }
-
-	  /* perform 'not' behavior */
-	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
-
-	  DONE;
-	}
-
-    case LT:
-    case LTU:
-      /* reg_R = (reg_A < reg_B)       --> slt reg_R, reg_A, reg_B */
-      /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */
-      if (code == LT)
-	{
-	  /* LT, use slts instruction */
-	  emit_insn (gen_slts_compare (operands[0], operands[2], operands[3]));
-	}
-      else
-	{
-	  /* LTU, use slt instruction */
-	  emit_insn (gen_slt_compare  (operands[0], operands[2], operands[3]));
-	}
-
-      DONE;
-
-    case LE:
-    case LEU:
-      if (GET_CODE (operands[3]) == CONST_INT)
-	{
-	  /* reg_R = (reg_A <= const_int_B)
-	     --> movi reg_C, const_int_B + 1
-	         slt  reg_R, reg_A, reg_C */
-	  tmp_reg = gen_reg_rtx (SImode);
-
-	  emit_insn (gen_movsi (tmp_reg,
-				gen_int_mode (INTVAL (operands[3]) + 1,
-						      SImode)));
-	  if (code == LE)
-	    {
-	      /* LE, use slts instruction */
-	      emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg));
-	    }
-	  else
-	    {
-	      /* LEU, use slt instruction */
-	      emit_insn (gen_slt_compare  (operands[0], operands[2], tmp_reg));
-	    }
-
-	  DONE;
-	}
-      else
-	{
-	  /* reg_R = (reg_A <= reg_B) --> slt  reg_R, reg_B, reg_A
-	                                  xori reg_R, reg_R, const_int_1 */
-	  if (code == LE)
-	    {
-	      /* LE, use slts instruction */
-	      emit_insn (gen_slts_compare (operands[0],
-					   operands[3], operands[2]));
-	    }
-	  else
-	    {
-	      /* LEU, use slt instruction */
-	      emit_insn (gen_slt_compare  (operands[0],
-					   operands[3], operands[2]));
-	    }
-
-	  /* perform 'not' behavior */
-	  emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx));
-
-	  DONE;
-	}
-
-
+      break;
+    case EXPAND_FAIL:
+      FAIL;
+      break;
+    case EXPAND_CREATE_TEMPLATE:
+      break;
     default:
       gcc_unreachable ();
     }
 })
 
 
-(define_insn "slts_compare"
-  [(set (match_operand:SI 0 "register_operand"         "=t,    t, r,    r")
-	(lt:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
-	       (match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+(define_expand "slts_compare"
+  [(set (match_operand:SI 0 "register_operand"       "")
+	(lt:SI (match_operand:SI 1 "general_operand" "")
+	       (match_operand:SI 2 "general_operand" "")))]
+  ""
+{
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+
+  if (!REG_P (operands[2]) && !satisfies_constraint_Is15 (operands[2]))
+    operands[2] = force_reg (SImode, operands[2]);
+})
+
+(define_insn "slts_compare_impl"
+  [(set (match_operand:SI 0 "register_operand"             "=t,   t, r,    r")
+	(lt:SI (match_operand:SI 1 "register_operand"      " d,   d, r,    r")
+	       (match_operand:SI 2 "nds32_rimm15s_operand" " r,Iu05, r, Is15")))]
   ""
   "@
    slts45\t%1, %2
    sltsi45\t%1, %2
    slts\t%0, %1, %2
    sltsi\t%0, %1, %2"
-  [(set_attr "type"   "compare,compare,compare,compare")
-   (set_attr "length" "      2,      2,      4,      4")])
+  [(set_attr "type"   "alu,    alu,    alu,    alu")
+   (set_attr "length" "  2,      2,      4,      4")])
+
+(define_insn "slt_eq0"
+  [(set (match_operand:SI 0 "register_operand"        "=t, r")
+	(eq:SI (match_operand:SI 1 "register_operand" " d, r")
+	       (const_int 0)))]
+  ""
+  "@
+   slti45\t%1, 1
+   slti\t%0, %1, 1"
+  [(set_attr "type"   "alu, alu")
+   (set_attr "length" "  2,   4")])
 
-(define_insn "slt_compare"
-  [(set (match_operand:SI 0 "register_operand"          "=t,    t, r,    r")
-	(ltu:SI (match_operand:SI 1 "nonmemory_operand" " d,    d, r,    r")
-		(match_operand:SI 2 "nonmemory_operand" " r, Iu05, r, Is15")))]
+(define_expand "slt_compare"
+  [(set (match_operand:SI 0 "register_operand"        "")
+	(ltu:SI (match_operand:SI 1 "general_operand" "")
+		(match_operand:SI 2 "general_operand" "")))]
+  ""
+{
+  if (!REG_P (operands[1]))
+    operands[1] = force_reg (SImode, operands[1]);
+
+  if (!REG_P (operands[2]) && !satisfies_constraint_Is15 (operands[2]))
+    operands[2] = force_reg (SImode, operands[2]);
+})
+
+(define_insn "slt_compare_impl"
+  [(set (match_operand:SI 0 "register_operand"              "=t,    t, r,    r")
+	(ltu:SI (match_operand:SI 1 "register_operand"      " d,    d, r,    r")
+		(match_operand:SI 2 "nds32_rimm15s_operand" " r, Iu05, r, Is15")))]
   ""
   "@
    slt45\t%1, %2
    slti45\t%1, %2
    slt\t%0, %1, %2
    slti\t%0, %1, %2"
-  [(set_attr "type"   "compare,compare,compare,compare")
-   (set_attr "length" "      2,      2,      4,      4")])
+  [(set_attr "type"   "alu,    alu,    alu,    alu")
+   (set_attr "length" "  2,      2,      4,      4")])
 
 
 ;; ----------------------------------------------------------------------------
@@ -1874,12 +1468,14 @@
     }
 }
   [(set_attr "type" "branch")
-   (set_attr "enabled" "1")
+   (set_attr "enabled" "yes")
    (set (attr "length")
-	(if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
-			   (le (minus (match_dup 0) (pc)) (const_int  250)))
-		      (if_then_else (match_test "TARGET_16_BIT")
-				    (const_int 2)
+	(if_then_else (match_test "!CROSSING_JUMP_P (insn)")
+		      (if_then_else (and (ge (minus (match_dup 0) (pc)) (const_int -250))
+					 (le (minus (match_dup 0) (pc)) (const_int  250)))
+				    (if_then_else (match_test "TARGET_16_BIT")
+						  (const_int 2)
+						  (const_int 4))
 				    (const_int 4))
 		      (const_int 4)))])
 
@@ -1887,8 +1483,8 @@
   [(set (pc) (match_operand:SI 0 "register_operand" "r, r"))]
   ""
   "@
-  jr5\t%0
-  jr\t%0"
+   jr5\t%0
+   jr\t%0"
   [(set_attr "type"   "branch,branch")
    (set_attr "length" "     2,     4")])
 
@@ -1904,39 +1500,78 @@
 	      (clobber (reg:SI LP_REGNUM))
 	      (clobber (reg:SI TA_REGNUM))])]
   ""
-  ""
+  {
+    rtx insn;
+    rtx sym = XEXP (operands[0], 0);
+
+    if (TARGET_ICT_MODEL_LARGE
+	&& nds32_indirect_call_referenced_p (sym))
+      {
+	rtx reg = gen_reg_rtx (Pmode);
+	emit_move_insn (reg, sym);
+	operands[0] = gen_const_mem (Pmode, reg);
+      }
+
+    if (flag_pic)
+      {
+	insn = emit_call_insn (gen_call_internal
+			       (XEXP (operands[0], 0), GEN_INT (0)));
+	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
+	DONE;
+      }
+  }
 )
 
-(define_insn "*call_register"
-  [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r, r"))
-		    (match_operand 1))
-	      (clobber (reg:SI LP_REGNUM))
-	      (clobber (reg:SI TA_REGNUM))])]
-  ""
-  "@
-  jral5\t%0
-  jral\t%0"
-  [(set_attr "type"   "branch,branch")
-   (set_attr "length" "     2,     4")])
-
-(define_insn "*call_immediate"
-  [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i"))
+(define_insn "call_internal"
+  [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, S"))
 		    (match_operand 1))
 	      (clobber (reg:SI LP_REGNUM))
 	      (clobber (reg:SI TA_REGNUM))])]
   ""
 {
-  if (TARGET_CMODEL_LARGE)
-    return "bal\t%0";
-  else
-    return "jal\t%0";
+  rtx_insn *next_insn = next_active_insn (insn);
+  bool align_p = (!(next_insn && get_attr_length (next_insn) == 2))
+		 && NDS32_ALIGN_P ();
+  switch (which_alternative)
+    {
+    case 0:
+      if (TARGET_16_BIT)
+	{
+	  if (align_p)
+	    return "jral5\t%0\;.align 2";
+	  else
+	    return "jral5\t%0";
+	}
+      else
+	{
+	  if (align_p)
+	    return "jral\t%0\;.align 2";
+	  else
+	    return "jral\t%0";
+	}
+    case 1:
+      return nds32_output_call (insn, operands, operands[0],
+				"bal\t%0", "jal\t%0", align_p);
+    default:
+      gcc_unreachable ();
+    }
 }
-  [(set_attr "type"   "branch")
-   (set (attr "length")
-	(if_then_else (match_test "TARGET_CMODEL_LARGE")
-		      (const_int 12)
-		      (const_int 4)))])
-
+  [(set_attr "enabled" "yes")
+   (set_attr "type" "branch")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 1
+       (if_then_else (match_test "flag_pic")
+		     (const_int 16)
+		     (if_then_else (match_test "nds32_long_call_p (operands[0])")
+				   (const_int 12)
+				   (const_int 4)))
+     ])]
+)
 
 ;; Subroutine call instruction returning a value.
 ;;   operands[0]: It is the hard regiser in which the value is returned.
@@ -1951,49 +1586,114 @@
 	      (clobber (reg:SI LP_REGNUM))
 	      (clobber (reg:SI TA_REGNUM))])]
   ""
-  ""
+  {
+    rtx insn;
+    rtx sym = XEXP (operands[1], 0);
+
+    if (TARGET_ICT_MODEL_LARGE
+	&& nds32_indirect_call_referenced_p (sym))
+      {
+	rtx reg = gen_reg_rtx (Pmode);
+	emit_move_insn (reg, sym);
+	operands[1] = gen_const_mem (Pmode, reg);
+      }
+
+    if (flag_pic)
+      {
+	insn =
+	  emit_call_insn (gen_call_value_internal
+			  (operands[0], XEXP (operands[1], 0), GEN_INT (0)));
+	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
+	DONE;
+      }
+  }
 )
 
-(define_insn "*call_value_register"
+(define_insn "call_value_internal"
   [(parallel [(set (match_operand 0)
-		   (call (mem (match_operand:SI 1 "register_operand" "r, r"))
+		   (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, S"))
 		         (match_operand 2)))
 	      (clobber (reg:SI LP_REGNUM))
 	      (clobber (reg:SI TA_REGNUM))])]
   ""
-  "@
-  jral5\t%1
-  jral\t%1"
-  [(set_attr "type"   "branch,branch")
-   (set_attr "length" "     2,     4")])
+{
+  rtx_insn *next_insn = next_active_insn (insn);
+  bool align_p = (!(next_insn && get_attr_length (next_insn) == 2))
+		 && NDS32_ALIGN_P ();
+  switch (which_alternative)
+    {
+    case 0:
+      if (TARGET_16_BIT)
+	{
+	  if (align_p)
+	    return "jral5\t%1\;.align 2";
+	  else
+	    return "jral5\t%1";
+	}
+      else
+	{
+	  if (align_p)
+	    return "jral\t%1\;.align 2";
+	  else
+	    return "jral\t%1";
+	}
+    case 1:
+      return nds32_output_call (insn, operands, operands[1],
+				"bal\t%1", "jal\t%1", align_p);
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "enabled" "yes")
+   (set_attr "type" "branch")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 1
+       (if_then_else (match_test "flag_pic")
+		     (const_int 16)
+		     (if_then_else (match_test "nds32_long_call_p (operands[1])")
+				   (const_int 12)
+				   (const_int 4)))
+     ])]
+)
 
-(define_insn "*call_value_immediate"
-  [(parallel [(set (match_operand 0)
-		   (call (mem (match_operand:SI 1 "immediate_operand" "i"))
-			 (match_operand 2)))
-	      (clobber (reg:SI LP_REGNUM))
-	      (clobber (reg:SI TA_REGNUM))])]
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0 "" "")
+		    (const_int 0))
+	      (match_operand 1 "" "")
+	      (match_operand 2 "" "")])]
   ""
 {
-  if (TARGET_CMODEL_LARGE)
-    return "bal\t%1";
-  else
-    return "jal\t%1";
-}
-  [(set_attr "type"   "branch")
-   (set (attr "length")
-	(if_then_else (match_test "TARGET_CMODEL_LARGE")
-		      (const_int 12)
-		      (const_int 4)))])
+  int i;
+
+  emit_call_insn (gen_call (operands[0], const0_rtx));
 
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+  DONE;
+})
 
 ;; ----------------------------------------------------------------------------
 
 ;; The sibcall patterns.
 
 ;; sibcall
-;; sibcall_register
-;; sibcall_immediate
+;; sibcall_internal
 
 (define_expand "sibcall"
   [(parallel [(call (match_operand 0 "memory_operand" "")
@@ -2001,41 +1701,60 @@
 	      (clobber (reg:SI TA_REGNUM))
 	      (return)])]
   ""
-  ""
-)
+{
+    rtx sym = XEXP (operands[0], 0);
 
-(define_insn "*sibcall_register"
-  [(parallel [(call (mem (match_operand:SI 0 "register_operand" "r, r"))
-		    (match_operand 1))
-	      (clobber (reg:SI TA_REGNUM))
-	      (return)])]
-  ""
-  "@
-   jr5\t%0
-   jr\t%0"
-  [(set_attr "type"   "branch,branch")
-   (set_attr "length" "     2,     4")])
+    if (TARGET_ICT_MODEL_LARGE
+	&& nds32_indirect_call_referenced_p (sym))
+      {
+	rtx reg = gen_reg_rtx (Pmode);
+	emit_move_insn (reg, sym);
+	operands[0] = gen_const_mem (Pmode, reg);
+      }
+})
 
-(define_insn "*sibcall_immediate"
-  [(parallel [(call (mem (match_operand:SI 0 "immediate_operand" "i"))
+(define_insn "sibcall_internal"
+  [(parallel [(call (mem (match_operand:SI 0 "nds32_call_address_operand" "r, S"))
 		    (match_operand 1))
 	      (clobber (reg:SI TA_REGNUM))
 	      (return)])]
   ""
 {
-  if (TARGET_CMODEL_LARGE)
-    return "b\t%0";
-  else
-    return "j\t%0";
+  switch (which_alternative)
+    {
+    case 0:
+      if (TARGET_16_BIT)
+	return "jr5\t%0";
+      else
+	return "jr\t%0";
+    case 1:
+      if (nds32_long_call_p (operands[0]))
+	return "b\t%0";
+      else
+	return "j\t%0";
+    default:
+      gcc_unreachable ();
+    }
 }
-  [(set_attr "type"   "branch")
-   (set (attr "length")
-	(if_then_else (match_test "TARGET_CMODEL_LARGE")
-		      (const_int 12)
-		      (const_int 4)))])
+  [(set_attr "enabled" "yes")
+   (set_attr "type" "branch")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 1
+       (if_then_else (match_test "flag_pic")
+		     (const_int 16)
+		     (if_then_else (match_test "nds32_long_call_p (operands[0])")
+				   (const_int 12)
+				   (const_int 4)))
+     ])]
+)
 
 ;; sibcall_value
-;; sibcall_value_register
+;; sibcall_value_internal
 ;; sibcall_value_immediate
 
 (define_expand "sibcall_value"
@@ -2045,41 +1764,58 @@
 	      (clobber (reg:SI TA_REGNUM))
 	      (return)])]
   ""
-  ""
-)
+{
+    rtx sym = XEXP (operands[1], 0);
 
-(define_insn "*sibcall_value_register"
+    if (TARGET_ICT_MODEL_LARGE
+	&& nds32_indirect_call_referenced_p (sym))
+      {
+	rtx reg = gen_reg_rtx (Pmode);
+	emit_move_insn (reg, sym);
+	operands[1] = gen_const_mem (Pmode, reg);
+      }
+})
+
+(define_insn "sibcall_value_internal"
   [(parallel [(set (match_operand 0)
-		   (call (mem (match_operand:SI 1 "register_operand" "r, r"))
-			 (match_operand 2)))
-	      (clobber (reg:SI TA_REGNUM))
-	      (return)])]
-  ""
-  "@
-   jr5\t%1
-   jr\t%1"
-  [(set_attr "type"   "branch,branch")
-   (set_attr "length" "     2,     4")])
-
-(define_insn "*sibcall_value_immediate"
-  [(parallel [(set (match_operand 0)
-		   (call (mem (match_operand:SI 1 "immediate_operand" "i"))
+		   (call (mem (match_operand:SI 1 "nds32_call_address_operand" "r, S"))
 			 (match_operand 2)))
 	      (clobber (reg:SI TA_REGNUM))
 	      (return)])]
   ""
 {
-  if (TARGET_CMODEL_LARGE)
-    return "b\t%1";
-  else
-    return "j\t%1";
+  switch (which_alternative)
+    {
+    case 0:
+      if (TARGET_16_BIT)
+	return "jr5\t%1";
+      else
+	return "jr\t%1";
+    case 1:
+      if (nds32_long_call_p (operands[1]))
+	return "b\t%1";
+      else
+	return "j\t%1";
+    default:
+      gcc_unreachable ();
+    }
 }
-  [(set_attr "type"   "branch")
-   (set (attr "length")
-	(if_then_else (match_test "TARGET_CMODEL_LARGE")
-		      (const_int 12)
-		      (const_int 4)))])
-
+  [(set_attr "enabled" "yes")
+   (set_attr "type" "branch")
+   (set_attr_alternative "length"
+     [
+       ;; Alternative 0
+       (if_then_else (match_test "TARGET_16_BIT")
+		     (const_int 2)
+		     (const_int 4))
+       ;; Alternative 1
+       (if_then_else (match_test "flag_pic")
+		     (const_int 16)
+		     (if_then_else (match_test "nds32_long_call_p (operands[1])")
+				   (const_int 12)
+				   (const_int 4)))
+     ])]
+)
 
 ;; ----------------------------------------------------------------------------
 
@@ -2089,26 +1825,41 @@
   ""
 {
   /* Note that only under V3/V3M ISA, we could use v3push prologue.
-     In addition, we do not want to use v3push for isr function
-     and variadic function.  */
-  if (TARGET_V3PUSH
-      && !nds32_isr_function_p (current_function_decl)
-      && (cfun->machine->va_args_size == 0))
+     In addition, we need to check if v3push is indeed available.  */
+  if (NDS32_V3PUSH_AVAILABLE_P)
     nds32_expand_prologue_v3push ();
   else
     nds32_expand_prologue ();
+
+  /* If cfun->machine->fp_as_gp_p is true, we can generate special
+     directive to guide linker doing fp-as-gp optimization.
+     However, for a naked function, which means
+     it should not have prologue/epilogue,
+     using fp-as-gp still requires saving $fp by push/pop behavior and
+     there is no benefit to use fp-as-gp on such small function.
+     So we need to make sure this function is NOT naked as well.  */
+  if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p)
+    emit_insn (gen_omit_fp_begin (gen_rtx_REG (SImode, FP_REGNUM)));
+
   DONE;
 })
 
 (define_expand "epilogue" [(const_int 0)]
   ""
 {
+  /* If cfun->machine->fp_as_gp_p is true, we can generate special
+     directive to guide linker doing fp-as-gp optimization.
+     However, for a naked function, which means
+     it should not have prologue/epilogue,
+     using fp-as-gp still requires saving $fp by push/pop behavior and
+     there is no benefit to use fp-as-gp on such small function.
+     So we need to make sure this function is NOT naked as well.  */
+  if (cfun->machine->fp_as_gp_p && !cfun->machine->naked_p)
+    emit_insn (gen_omit_fp_end (gen_rtx_REG (SImode, FP_REGNUM)));
+
   /* Note that only under V3/V3M ISA, we could use v3pop epilogue.
-     In addition, we do not want to use v3pop for isr function
-     and variadic function.  */
-  if (TARGET_V3PUSH
-      && !nds32_isr_function_p (current_function_decl)
-      && (cfun->machine->va_args_size == 0))
+     In addition, we need to check if v3push is indeed available.  */
+  if (NDS32_V3PUSH_AVAILABLE_P)
     nds32_expand_epilogue_v3pop (false);
   else
     nds32_expand_epilogue (false);
@@ -2121,10 +1872,7 @@
   /* Pass true to indicate that this is sibcall epilogue and
      exit from a function without the final branch back to the
      calling function.  */
-  if (TARGET_V3PUSH && !nds32_isr_function_p (current_function_decl))
-    nds32_expand_epilogue_v3pop (true);
-  else
-    nds32_expand_epilogue (true);
+  nds32_expand_epilogue (true);
 
   DONE;
 })
@@ -2142,7 +1890,7 @@
     return "nop";
 }
   [(set_attr "type" "misc")
-   (set_attr "enabled" "1")
+   (set_attr "enabled" "yes")
    (set (attr "length")
 	(if_then_else (match_test "TARGET_16_BIT")
 		      (const_int 2)
@@ -2166,12 +1914,11 @@
 {
   return nds32_output_stack_push (operands[0]);
 }
-  [(set_attr "type" "misc")
-   (set_attr "enabled" "1")
+  [(set_attr "type" "store_multiple")
+   (set_attr "combo" "12")
+   (set_attr "enabled" "yes")
    (set (attr "length")
-	(if_then_else (match_test "TARGET_V3PUSH
-				   && !nds32_isr_function_p (cfun->decl)
-				   && (cfun->machine->va_args_size == 0)")
+	(if_then_else (match_test "NDS32_V3PUSH_AVAILABLE_P")
 		      (const_int 2)
 		      (const_int 4)))])
 
@@ -2188,12 +1935,11 @@
 {
   return nds32_output_stack_pop (operands[0]);
 }
-  [(set_attr "type" "misc")
-   (set_attr "enabled" "1")
+  [(set_attr "type" "load_multiple")
+   (set_attr "combo" "12")
+   (set_attr "enabled" "yes")
    (set (attr "length")
-	(if_then_else (match_test "TARGET_V3PUSH
-				   && !nds32_isr_function_p (cfun->decl)
-				   && (cfun->machine->va_args_size == 0)")
+	(if_then_else (match_test "NDS32_V3PUSH_AVAILABLE_P")
 		      (const_int 2)
 		      (const_int 4)))])
 
@@ -2205,34 +1951,64 @@
 ;; Use this pattern to expand a return instruction
 ;; with simple_return rtx if no epilogue is required.
 (define_expand "return"
+  [(parallel [(return)
+              (clobber (reg:SI FP_REGNUM))])]
+  "nds32_can_use_return_insn ()"
+{
+  /* Emit as the simple return.  */
+  if (!cfun->machine->fp_as_gp_p
+      && cfun->machine->naked_p
+      && (cfun->machine->va_args_size == 0))
+    {
+      emit_jump_insn (gen_return_internal ());
+      DONE;
+    }
+})
+
+;; This pattern is expanded only by the shrink-wrapping optimization
+;; on paths where the function prologue has not been executed.
+;; However, such optimization may reorder the prologue/epilogue blocks
+;; together with basic blocks within function body.
+;; So we must disable this pattern if we have already decided
+;; to perform fp_as_gp optimization, which requires prologue to be
+;; first block and epilogue to be last block.
+(define_expand "simple_return"
   [(simple_return)]
-  "nds32_can_use_return_insn ()"
+  "!cfun->machine->fp_as_gp_p"
   ""
 )
 
-;; This pattern is expanded only by the shrink-wrapping optimization
-;; on paths where the function prologue has not been executed.
-(define_expand "simple_return"
-  [(simple_return)]
+(define_insn "*nds32_return"
+  [(parallel [(return)
+   (clobber (reg:SI FP_REGNUM))])]
   ""
-  ""
-)
+{
+  return nds32_output_return ();
+}
+  [(set_attr "type" "branch")
+   (set_attr "enabled" "yes")
+   (set_attr "length" "4")])
 
 (define_insn "return_internal"
   [(simple_return)]
   ""
 {
+  if (nds32_isr_function_critical_p (current_function_decl))
+    return "iret";
+
   if (TARGET_16_BIT)
     return "ret5";
   else
     return "ret";
 }
   [(set_attr "type" "branch")
-   (set_attr "enabled" "1")
+   (set_attr "enabled" "yes")
    (set (attr "length")
-	(if_then_else (match_test "TARGET_16_BIT")
-		      (const_int 2)
-		      (const_int 4)))])
+	(if_then_else (match_test "nds32_isr_function_critical_p (current_function_decl)")
+		      (const_int 4)
+		      (if_then_else (match_test "TARGET_16_BIT")
+				    (const_int 2)
+				    (const_int 4))))])
 
 
 ;; ----------------------------------------------------------------------------
@@ -2267,6 +2043,7 @@
 {
   rtx add_tmp;
   rtx reg, test;
+  rtx tmp_reg;
 
   /* Step A: "k <-- (plus (operands[0]) (-operands[1]))".  */
   if (operands[1] != const0_rtx)
@@ -2288,9 +2065,14 @@
   emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2],
 				  operands[4]));
 
-  /* Step C, D, E, and F, using another temporary register.  */
-  rtx tmp = gen_reg_rtx (SImode);
-  emit_jump_insn (gen_casesi_internal (operands[0], operands[3], tmp));
+  tmp_reg = gen_reg_rtx (SImode);
+  /* Step C, D, E, and F, using another temporary register tmp_reg.  */
+  if (flag_pic)
+    emit_use (pic_offset_table_rtx);
+
+  emit_jump_insn (gen_casesi_internal (operands[0],
+				       operands[3],
+				       tmp_reg));
   DONE;
 })
 
@@ -2326,17 +2108,34 @@
   else
     return nds32_output_casesi (operands);
 }
-  [(set_attr "length" "20")
-   (set_attr "type" "alu")])
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (match_test "flag_pic")
+		      (const_int 28)
+		      (const_int 20)))])
 
 ;; ----------------------------------------------------------------------------
 
 ;; Performance Extension
 
+; If -fwrapv option is issued, GCC expects there will be
+; signed overflow situation.  So the ABS(INT_MIN) is still INT_MIN
+; (e.g. ABS(0x80000000)=0x80000000).
+; However, the hardware ABS instruction of nds32 target
+; always performs saturation: abs 0x80000000 -> 0x7fffffff.
+; So that we can only enable abssi2 pattern if flag_wrapv is NOT presented.
+(define_insn "abssi2"
+  [(set (match_operand:SI 0 "register_operand"         "=r")
+	(abs:SI (match_operand:SI 1 "register_operand" " r")))]
+  "TARGET_EXT_PERF && TARGET_HW_ABS && !flag_wrapv"
+  "abs\t%0, %1"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
 (define_insn "clzsi2"
   [(set (match_operand:SI 0 "register_operand"         "=r")
 	(clz:SI (match_operand:SI 1 "register_operand" " r")))]
-  "TARGET_PERF_EXT"
+  "TARGET_EXT_PERF"
   "clz\t%0, %1"
   [(set_attr "type" "alu")
    (set_attr "length" "4")])
@@ -2345,7 +2144,7 @@
   [(set (match_operand:SI 0 "register_operand"          "=r")
 	(smax:SI (match_operand:SI 1 "register_operand" " r")
 		 (match_operand:SI 2 "register_operand" " r")))]
-  "TARGET_PERF_EXT"
+  "TARGET_EXT_PERF"
   "max\t%0, %1, %2"
   [(set_attr "type" "alu")
    (set_attr "length" "4")])
@@ -2354,25 +2153,66 @@
   [(set (match_operand:SI 0 "register_operand"          "=r")
 	(smin:SI (match_operand:SI 1 "register_operand" " r")
 		 (match_operand:SI 2 "register_operand" " r")))]
-  "TARGET_PERF_EXT"
+  "TARGET_EXT_PERF"
   "min\t%0, %1, %2"
   [(set_attr "type" "alu")
    (set_attr "length" "4")])
 
-(define_insn "*btst"
-  [(set (match_operand:SI 0 "register_operand"                   "=   r")
-	(zero_extract:SI (match_operand:SI 1 "register_operand"  "    r")
+(define_insn "btst"
+  [(set (match_operand:SI 0 "register_operand"                     "=   r")
+	(zero_extract:SI (match_operand:SI 1 "register_operand"    "    r")
 			 (const_int 1)
-			 (match_operand:SI 2 "immediate_operand" " Iu05")))]
-  "TARGET_PERF_EXT"
+			 (match_operand:SI 2 "nds32_imm5u_operand" " Iu05")))]
+  "TARGET_EXT_PERF"
   "btst\t%0, %1, %2"
   [(set_attr "type" "alu")
    (set_attr "length" "4")])
 
+(define_insn "ave"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(truncate:SI
+	  (ashiftrt:DI
+	    (plus:DI
+	      (plus:DI
+		(sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		(sign_extend:DI (match_operand:SI 2 "register_operand" "r")))
+	      (const_int 1))
+	  (const_int 1))))]
+  "TARGET_EXT_PERF"
+  "ave\t%0, %1, %2"
+  [(set_attr "type" "alu")
+   (set_attr "length" "4")])
+
 ;; ----------------------------------------------------------------------------
 
 ;; Pseudo NOPs
 
+(define_insn "relax_group"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP)]
+  ""
+  ".relax_hint %0"
+  [(set_attr "length" "0")]
+)
+
+;; Output .omit_fp_begin for fp-as-gp optimization.
+;; Also we have to set $fp register.
+(define_insn "omit_fp_begin"
+  [(set (match_operand:SI 0 "register_operand" "=x")
+	(unspec_volatile:SI [(const_int 0)] UNSPEC_VOLATILE_OMIT_FP_BEGIN))]
+  ""
+  "! -----\;.omit_fp_begin\;la\t$fp,_FP_BASE_\;! -----"
+  [(set_attr "length" "8")]
+)
+
+;; Output .omit_fp_end for fp-as-gp optimization.
+;; Claim that we have to use $fp register.
+(define_insn "omit_fp_end"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "x")] UNSPEC_VOLATILE_OMIT_FP_END)]
+  ""
+  "! -----\;.omit_fp_end\;! -----"
+  [(set_attr "length" "0")]
+)
+
 (define_insn "pop25return"
   [(return)
    (unspec_volatile:SI [(reg:SI LP_REGNUM)] UNSPEC_VOLATILE_POP25_RETURN)]
@@ -2381,4 +2221,151 @@
   [(set_attr "length" "0")]
 )
 
+;; Add pc
+(define_insn "add_pc"
+  [(set (match_operand:SI 0 "register_operand"          "=r")
+	(plus:SI (match_operand:SI 1 "register_operand"  "0")
+		 (pc)))]
+  "TARGET_LINUX_ABI || flag_pic"
+  "add5.pc\t%0"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
+(define_expand "bswapsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(bswap:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+{
+  emit_insn (gen_unspec_wsbh (operands[0], operands[1]));
+  emit_insn (gen_rotrsi3 (operands[0], operands[0], GEN_INT (16)));
+  DONE;
+})
+
+(define_insn "bswaphi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(bswap:HI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "wsbh\t%0, %1"
+  [(set_attr "type"    "alu")
+   (set_attr "length"    "4")]
+)
+
 ;; ----------------------------------------------------------------------------
+
+;; Patterns for exception handling
+
+(define_expand "eh_return"
+  [(use (match_operand 0 "general_operand"))]
+  ""
+{
+  emit_insn (gen_nds32_eh_return (operands[0]));
+  DONE;
+})
+
+(define_insn_and_split "nds32_eh_return"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPEC_VOLATILE_EH_RETURN)]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  rtx place;
+  rtx addr;
+
+  /* The operands[0] is the handler address.  We need to assign it
+     to return address rtx so that we can jump to exception handler
+     when returning from current function.  */
+
+  if (cfun->machine->lp_size == 0)
+    {
+      /* If $lp is not saved in the stack frame, we can take $lp directly.  */
+      place = gen_rtx_REG (SImode, LP_REGNUM);
+    }
+  else
+    {
+      /* Otherwise, we need to locate the stack slot of return address.
+	 The return address is generally saved in [$fp-4] location.
+	 However, DSE (dead store elimination) does not detect an alias
+	 between [$fp-x] and [$sp+y].  This can result in a store to save
+	 $lp introduced by builtin_eh_return() being incorrectly deleted
+	 if it is based on $fp.  The solution we take here is to compute
+	 the offset relative to stack pointer and then use $sp to access
+	 location so that the alias can be detected.
+	 FIXME: What if the immediate value "offset" is too large to be
+	        fit in a single addi instruction?  */
+      HOST_WIDE_INT offset;
+
+      offset = (cfun->machine->fp_size
+		+ cfun->machine->gp_size
+		+ cfun->machine->lp_size
+		+ cfun->machine->callee_saved_gpr_regs_size
+		+ cfun->machine->callee_saved_area_gpr_padding_bytes
+		+ cfun->machine->callee_saved_fpr_regs_size
+		+ cfun->machine->eh_return_data_regs_size
+		+ cfun->machine->local_size
+		+ cfun->machine->out_args_size);
+
+      addr = plus_constant (Pmode, stack_pointer_rtx, offset - 4);
+      place = gen_frame_mem (SImode, addr);
+    }
+
+  emit_move_insn (place, operands[0]);
+  DONE;
+})
+
+;; ----------------------------------------------------------------------------
+
+;; Patterns for TLS.
+;; The following two tls patterns don't be expanded directly because the
+;; intermediate value may be spilled into the stack.  As a result, it is
+;; hard to analyze the define-use chain in the relax_opt pass.
+
+
+;; There is a unspec operand to record RELAX_GROUP number because each
+;; emitted instruction need a relax_hint above it.
+(define_insn "tls_desc"
+  [(set (reg:SI 0)
+	(call (unspec_volatile:SI [(match_operand:SI 0 "nds32_symbolic_operand" "i")] UNSPEC_TLS_DESC)
+	      (const_int 1)))
+   (use (unspec [(match_operand:SI 1 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP))
+   (use (reg:SI GP_REGNUM))
+   (clobber (reg:SI LP_REGNUM))
+   (clobber (reg:SI TA_REGNUM))]
+  ""
+  {
+    return nds32_output_tls_desc (operands);
+  }
+  [(set_attr "length" "20")
+   (set_attr "type" "branch")]
+)
+
+;; There is a unspec operand to record RELAX_GROUP number because each
+;; emitted instruction need a relax_hint above it.
+(define_insn "tls_ie"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "nds32_symbolic_operand" "i")] UNSPEC_TLS_IE))
+   (use (unspec [(match_operand:SI 2 "immediate_operand" "i")] UNSPEC_VOLATILE_RELAX_GROUP))
+   (use (reg:SI GP_REGNUM))]
+  ""
+  {
+    return nds32_output_tls_ie (operands);
+  }
+  [(set (attr "length") (if_then_else (match_test "flag_pic")
+				      (const_int 12)
+				      (const_int 8)))
+   (set_attr "type" "misc")]
+)
+
+;; The pattern is for some relaxation groups that have to keep addsi3 in 32-bit mode.
+(define_insn "addsi3_32bit"
+  [(set (match_operand:SI 0 "register_operand"             "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "%r")
+		    (match_operand:SI 2 "register_operand" " r")] UNSPEC_ADD32))]
+  ""
+  "add\t%0, %1, %2";
+  [(set_attr "type"    "alu")
+   (set_attr "length"  "4")
+   (set_attr "feature" "v1")])
+
+;; ----------------------------------------------------------------------------