Mercurial > hg > CbC > CbC_gcc

diff gcc/config/arm/vfp.md @ 111:04ced10e8804
gcc 7
author: kono
date: Fri, 27 Oct 2017 22:46:09 +0900
parents: f6334be47118
children: 84e7813d76e9
--- a/gcc/config/arm/vfp.md	Sun Aug 21 07:07:55 2011 +0900
+++ b/gcc/config/arm/vfp.md	Fri Oct 27 22:46:09 2017 +0900
@@ -1,6 +1,5 @@
 ;; ARM VFP instruction patterns
-;; Copyright (C) 2003, 2005, 2006, 2007, 2008, 2010
-;; Free Software Foundation, Inc.
+;; Copyright (C) 2003-2017 Free Software Foundation, Inc.
 ;; Written by CodeSourcery.
 ;;
 ;; This file is part of GCC.
@@ -19,33 +18,198 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.  */
 
-;; Additional register numbers
-(define_constants
-  [(VFPCC_REGNUM 127)]
+;; Patterns for HI moves which provide more data transfer instructions when VFP
+;; support is enabled.
+(define_insn "*arm_movhi_vfp"
+ [(set
+   (match_operand:HI 0 "nonimmediate_operand"
+    "=rk,  r, r, m, r, *t,  r, *t")
+   (match_operand:HI 1 "general_operand"
+    "rIk, K, n, r, mi, r, *t, *t"))]
+ "TARGET_ARM && TARGET_HARD_FLOAT
+  && !TARGET_VFP_FP16INST
+  && (register_operand (operands[0], HImode)
+       || register_operand (operands[1], HImode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mov%?\t%0, %1\t%@ movhi";
+    case 1:
+      return "mvn%?\t%0, #%B1\t%@ movhi";
+    case 2:
+      return "movw%?\t%0, %L1\t%@ movhi";
+    case 3:
+      return "strh%?\t%1, %0\t%@ movhi";
+    case 4:
+      return "ldrh%?\t%0, %1\t%@ movhi";
+    case 5:
+    case 6:
+      return "vmov%?\t%0, %1\t%@ int";
+    case 7:
+      return "vmov%?.f32\t%0, %1\t%@ int";
+    default:
+      gcc_unreachable ();
+    }
+}
+ [(set_attr "predicable" "yes")
+  (set_attr_alternative "type"
+   [(if_then_else
+     (match_operand 1 "const_int_operand" "")
+     (const_string "mov_imm")
+     (const_string "mov_reg"))
+    (const_string "mvn_imm")
+    (const_string "mov_imm")
+    (const_string "store_4")
+    (const_string "load_4")
+    (const_string "f_mcr")
+    (const_string "f_mrc")
+    (const_string "fmov")])
+  (set_attr "arch" "*, *, v6t2, *, *, *, *, *")
+  (set_attr "pool_range" "*, *, *, *, 256, *, *, *")
+  (set_attr "neg_pool_range" "*, *, *, *, 244, *, *, *")
+  (set_attr "length" "4")]
 )
 
-;; The VFP "type" attributes differ from those used in the FPA model.
-;; fcpys	Single precision cpy.
-;; ffariths	Single precision abs, neg.
-;; ffarithd	Double precision abs, neg, cpy.
-;; fadds	Single precision add/sub.
-;; faddd	Double precision add/sub.
-;; fconsts	Single precision load immediate.
-;; fconstd	Double precision load immediate.
-;; fcmps	Single precision comparison.
-;; fcmpd	Double precision comparison.
-;; fmuls	Single precision multiply.
-;; fmuld	Double precision multiply.
-;; fmacs	Single precision multiply-accumulate.
-;; fmacd	Double precision multiply-accumulate.
-;; fdivs	Single precision sqrt or division.
-;; fdivd	Double precision sqrt or division.
-;; f_flag	fmstat operation
-;; f_load[sd]	Floating point load from memory.
-;; f_store[sd]	Floating point store to memory.
-;; f_2_r	Transfer vfp to arm reg.
-;; r_2_f	Transfer arm to vfp reg.
-;; f_cvt	Convert floating<->integral
+(define_insn "*thumb2_movhi_vfp"
+ [(set
+   (match_operand:HI 0 "nonimmediate_operand"
+    "=rk, r, l, r, m, r, *t, r, *t")
+   (match_operand:HI 1 "general_operand"
+    "rk, I, Py, n, r, m, r, *t, *t"))]
+ "TARGET_THUMB2 && TARGET_HARD_FLOAT
+  && !TARGET_VFP_FP16INST
+  && (register_operand (operands[0], HImode)
+       || register_operand (operands[1], HImode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+      return "mov%?\t%0, %1\t%@ movhi";
+    case 3:
+      return "movw%?\t%0, %L1\t%@ movhi";
+    case 4:
+      return "strh%?\t%1, %0\t%@ movhi";
+    case 5:
+      return "ldrh%?\t%0, %1\t%@ movhi";
+    case 6:
+    case 7:
+      return "vmov%?\t%0, %1\t%@ int";
+    case 8:
+      return "vmov%?.f32\t%0, %1\t%@ int";
+    default:
+      gcc_unreachable ();
+    }
+}
+ [(set_attr "predicable" "yes")
+  (set_attr "predicable_short_it"
+   "yes, no, yes, no, no, no, no, no, no")
+  (set_attr "type"
+   "mov_reg, mov_imm, mov_imm, mov_imm, store_4, load_4,\
+    f_mcr, f_mrc, fmov")
+  (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *")
+  (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *")
+  (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *")
+  (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4")]
+)
+
+;; Patterns for HI moves which provide more data transfer instructions when FP16
+;; instructions are available.
+(define_insn "*arm_movhi_fp16"
+ [(set
+   (match_operand:HI 0 "nonimmediate_operand"
+    "=r,  r, r, m, r, *t,  r, *t")
+   (match_operand:HI 1 "general_operand"
+    "rIk, K, n, r, mi, r, *t, *t"))]
+ "TARGET_ARM && TARGET_VFP_FP16INST
+  && (register_operand (operands[0], HImode)
+       || register_operand (operands[1], HImode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "mov%?\t%0, %1\t%@ movhi";
+    case 1:
+      return "mvn%?\t%0, #%B1\t%@ movhi";
+    case 2:
+      return "movw%?\t%0, %L1\t%@ movhi";
+    case 3:
+      return "strh%?\t%1, %0\t%@ movhi";
+    case 4:
+      return "ldrh%?\t%0, %1\t%@ movhi";
+    case 5:
+    case 6:
+      return "vmov.f16\t%0, %1\t%@ int";
+    case 7:
+      return "vmov%?.f32\t%0, %1\t%@ int";
+    default:
+      gcc_unreachable ();
+    }
+}
+ [(set_attr "predicable" "yes, yes, yes, yes, yes, no, no, yes")
+  (set_attr_alternative "type"
+   [(if_then_else
+     (match_operand 1 "const_int_operand" "")
+     (const_string "mov_imm")
+     (const_string "mov_reg"))
+    (const_string "mvn_imm")
+    (const_string "mov_imm")
+    (const_string "store_4")
+    (const_string "load_4")
+    (const_string "f_mcr")
+    (const_string "f_mrc")
+    (const_string "fmov")])
+  (set_attr "arch" "*, *, v6t2, *, *, *, *, *")
+  (set_attr "pool_range" "*, *, *, *, 256, *, *, *")
+  (set_attr "neg_pool_range" "*, *, *, *, 244, *, *, *")
+  (set_attr "length" "4")]
+)
+
+(define_insn "*thumb2_movhi_fp16"
+ [(set
+   (match_operand:HI 0 "nonimmediate_operand"
+    "=rk, r, l, r, m, r, *t, r, *t")
+   (match_operand:HI 1 "general_operand"
+    "rk, I, Py, n, r, m, r, *t, *t"))]
+ "TARGET_THUMB2 && TARGET_VFP_FP16INST
+  && (register_operand (operands[0], HImode)
+       || register_operand (operands[1], HImode))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+    case 2:
+      return "mov%?\t%0, %1\t%@ movhi";
+    case 3:
+      return "movw%?\t%0, %L1\t%@ movhi";
+    case 4:
+      return "strh%?\t%1, %0\t%@ movhi";
+    case 5:
+      return "ldrh%?\t%0, %1\t%@ movhi";
+    case 6:
+    case 7:
+      return "vmov.f16\t%0, %1\t%@ int";
+    case 8:
+      return "vmov%?.f32\t%0, %1\t%@ int";
+    default:
+      gcc_unreachable ();
+    }
+}
+ [(set_attr "predicable"
+   "yes, yes, yes, yes, yes, yes, no, no, yes")
+  (set_attr "predicable_short_it"
+   "yes, no, yes, no, no, no, no, no, no")
+  (set_attr "type"
+   "mov_reg, mov_imm, mov_imm, mov_imm, store_4, load_4,\
+    f_mcr, f_mrc, fmov")
+  (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *")
+  (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *")
+  (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *")
+  (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4")]
+)
 
 ;; SImode moves
 ;; ??? For now do not allow loading constants into vfp regs.  This causes
@@ -53,7 +217,7 @@
 (define_insn "*arm_movsi_vfp"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m ,*t,r,*t,*t, *Uv")
       (match_operand:SI 1 "general_operand"	   "rk, I,K,j,mi,rk,r,*t,*t,*Uvi,*t"))]
-  "TARGET_ARM && TARGET_VFP && TARGET_HARD_FLOAT
+  "TARGET_ARM && TARGET_HARD_FLOAT
    && (   s_register_operand (operands[0], SImode)
        || s_register_operand (operands[1], SImode))"
   "*
@@ -70,11 +234,11 @@
     case 5:
       return \"str%?\\t%1, %0\";
     case 6:
-      return \"fmsr%?\\t%0, %1\\t%@ int\";
+      return \"vmov%?\\t%0, %1\\t%@ int\";
     case 7:
-      return \"fmrs%?\\t%0, %1\\t%@ int\";
+      return \"vmov%?\\t%0, %1\\t%@ int\";
     case 8:
-      return \"fcpys%?\\t%0, %1\\t%@ int\";
+      return \"vmov%?.f32\\t%0, %1\\t%@ int\";
     case 9: case 10:
       return output_move_vfp (operands);
     default:
@@ -82,142 +246,246 @@
     }
   "
   [(set_attr "predicable" "yes")
-   (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
-   (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*")
+   (set_attr "type" "mov_reg,mov_reg,mvn_imm,mov_imm,load_4,store_4,
+		     f_mcr,f_mrc,fmov,f_loads,f_stores")
    (set_attr "pool_range"     "*,*,*,*,4096,*,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")]
 )
 
 ;; See thumb2.md:thumb2_movsi_insn for an explanation of the split
 ;; high/low register alternatives for loads and stores here.
+;; The l/Py alternative should come after r/I to ensure that the short variant
+;; is chosen with length 2 when the instruction is predicated for
+;; arm_restrict_it.
 (define_insn "*thumb2_movsi_vfp"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r, l,*hk,m, *m,*t, r,*t,*t,  *Uv")
-	(match_operand:SI 1 "general_operand"	   "rk, I,K,j,mi,*mi,l,*hk, r,*t,*t,*Uvi,*t"))]
-  "TARGET_THUMB2 && TARGET_VFP && TARGET_HARD_FLOAT
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r, l,*hk,m, *m,*t, r,*t,*t,  *Uv")
+	(match_operand:SI 1 "general_operand"	   "rk,I,Py,K,j,mi,*mi,l,*hk, r,*t,*t,*Uvi,*t"))]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT
    && (   s_register_operand (operands[0], SImode)
        || s_register_operand (operands[1], SImode))"
   "*
   switch (which_alternative)
     {
-    case 0: case 1:
+    case 0:
+    case 1:
+    case 2:
       return \"mov%?\\t%0, %1\";
-    case 2:
-      return \"mvn%?\\t%0, #%B1\";
     case 3:
-      return \"movw%?\\t%0, %1\";
+      return \"mvn%?\\t%0, #%B1\";
     case 4:
+      return \"movw%?\\t%0, %1\";
     case 5:
-      return \"ldr%?\\t%0, %1\";
     case 6:
+      return \"ldr%?\\t%0, %1\";
     case 7:
+    case 8:
       return \"str%?\\t%1, %0\";
-    case 8:
-      return \"fmsr%?\\t%0, %1\\t%@ int\";
     case 9:
-      return \"fmrs%?\\t%0, %1\\t%@ int\";
+      return \"vmov%?\\t%0, %1\\t%@ int\";
     case 10:
-      return \"fcpys%?\\t%0, %1\\t%@ int\";
-    case 11: case 12:
+      return \"vmov%?\\t%0, %1\\t%@ int\";
+    case 11:
+      return \"vmov%?.f32\\t%0, %1\\t%@ int\";
+    case 12: case 13:
       return output_move_vfp (operands);
     default:
       gcc_unreachable ();
     }
   "
   [(set_attr "predicable" "yes")
-   (set_attr "type" "*,*,*,*,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
-   (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*,*,*")
-   (set_attr "pool_range"     "*,*,*,*,1020,4096,*,*,*,*,*,1020,*")
-   (set_attr "neg_pool_range" "*,*,*,*,   0,   0,*,*,*,*,*,1008,*")]
+   (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,no")
+   (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load_4,load_4,store_4,store_4,f_mcr,f_mrc,fmov,f_loads,f_stores")
+   (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4")
+   (set_attr "pool_range"     "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*")
+   (set_attr "neg_pool_range" "*,*,*,*,*,   0,   0,*,*,*,*,*,1008,*")]
 )
 
 
 ;; DImode moves
 
-(define_insn "*arm_movdi_vfp"
-  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r,m,w,r,w,w, Uv")
-	(match_operand:DI 1 "di_operand"              "rIK,mi,r,r,w,w,Uvi,w"))]
-  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
+(define_insn "*movdi_vfp"
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,r,w,w, Uv")
+       (match_operand:DI 1 "di_operand"              "r,rDa,Db,Dc,mi,mi,q,r,w,w,Uvi,w"))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && arm_tune != TARGET_CPU_cortexa8
    && (   register_operand (operands[0], DImode)
-       || register_operand (operands[1], DImode))"
+       || register_operand (operands[1], DImode))
+   && !(TARGET_NEON && CONST_INT_P (operands[1])
+        && neon_immediate_valid_for_move (operands[1], DImode, NULL, NULL))"
   "*
   switch (which_alternative)
     {
     case 0: 
-      return \"#\";
     case 1:
     case 2:
-      return output_move_double (operands);
     case 3:
-      return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\";
+      return \"#\";
     case 4:
-      return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\";
     case 5:
+    case 6:
+      return output_move_double (operands, true, NULL);
+    case 7:
+      return \"vmov%?\\t%P0, %Q1, %R1\\t%@ int\";
+    case 8:
+      return \"vmov%?\\t%Q0, %R0, %P1\\t%@ int\";
+    case 9:
       if (TARGET_VFP_SINGLE)
-	return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\";
+	return \"vmov%?.f32\\t%0, %1\\t%@ int\;vmov%?.f32\\t%p0, %p1\\t%@ int\";
       else
-	return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
-    case 6: case 7:
+	return \"vmov%?.f64\\t%P0, %P1\\t%@ int\";
+    case 10: case 11:
+      return output_move_vfp (operands);
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "type" "multiple,multiple,multiple,multiple,load_8,load_8,store_8,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored")
+   (set (attr "length") (cond [(eq_attr "alternative" "1,4,5,6") (const_int 8)
+                              (eq_attr "alternative" "2") (const_int 12)
+                              (eq_attr "alternative" "3") (const_int 16)
+                              (eq_attr "alternative" "9")
+                               (if_then_else
+                                 (match_test "TARGET_VFP_SINGLE")
+                                 (const_int 8)
+                                 (const_int 4))]
+                              (const_int 4)))
+   (set_attr "arm_pool_range"     "*,*,*,*,1020,4096,*,*,*,*,1020,*")
+   (set_attr "thumb2_pool_range"     "*,*,*,*,1018,4094,*,*,*,*,1018,*")
+   (set_attr "neg_pool_range" "*,*,*,*,1004,0,*,*,*,*,1004,*")
+   (set_attr "arch"           "t2,any,any,any,a,t2,any,any,any,any,any,any")]
+)
+
+(define_insn "*movdi_vfp_cortexa8"
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,!r,w,w, Uv")
+	(match_operand:DI 1 "di_operand"		"r,rDa,Db,Dc,mi,mi,q,r,w,w,Uvi,w"))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && arm_tune == TARGET_CPU_cortexa8
+    && (   register_operand (operands[0], DImode)
+        || register_operand (operands[1], DImode))
+    && !(TARGET_NEON && CONST_INT_P (operands[1])
+	 && neon_immediate_valid_for_move (operands[1], DImode, NULL, NULL))"
+  "*
+  switch (which_alternative)
+    {
+    case 0: 
+    case 1:
+    case 2:
+    case 3:
+      return \"#\";
+    case 4:
+    case 5:
+    case 6:
+      return output_move_double (operands, true, NULL);
+    case 7:
+      return \"vmov%?\\t%P0, %Q1, %R1\\t%@ int\";
+    case 8:
+      return \"vmov%?\\t%Q0, %R0, %P1\\t%@ int\";
+    case 9:
+      return \"vmov%?.f64\\t%P0, %P1\\t%@ int\";
+    case 10: case 11:
       return output_move_vfp (operands);
     default:
       gcc_unreachable ();
     }
   "
-  [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
-   (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8)
-			       (eq_attr "alternative" "5")
-				(if_then_else
-				 (eq (symbol_ref "TARGET_VFP_SINGLE")
-				     (const_int 1))
-				 (const_int 8)
-				 (const_int 4))]
-			      (const_int 4)))
+  [(set_attr "type" "multiple,multiple,multiple,multiple,load_8,load_8,store_8,f_mcrr,f_mrrc,ffarithd,f_loadd,f_stored")
+   (set (attr "length") (cond [(eq_attr "alternative" "1") (const_int 8)
+                               (eq_attr "alternative" "2") (const_int 12)
+                               (eq_attr "alternative" "3") (const_int 16)
+                               (eq_attr "alternative" "4,5,6") 
+			       (symbol_ref 
+				"arm_count_output_move_double_insns (operands) \
+                                 * 4")]
+                              (const_int 4)))
    (set_attr "predicable"    "yes")
-   (set_attr "pool_range"     "*,1020,*,*,*,*,1020,*")
-   (set_attr "neg_pool_range" "*,1008,*,*,*,*,1008,*")]
-)
+   (set_attr "arm_pool_range"     "*,*,*,*,1018,4094,*,*,*,*,1018,*")
+   (set_attr "thumb2_pool_range"     "*,*,*,*,1018,4094,*,*,*,*,1018,*")
+   (set_attr "neg_pool_range" "*,*,*,*,1004,0,*,*,*,*,1004,*")
+   (set (attr "ce_count") 
+	(symbol_ref "get_attr_length (insn) / 4"))
+   (set_attr "arch"           "t2,any,any,any,a,t2,any,any,any,any,any,any")]
+ )
+
+;; HFmode moves
 
-(define_insn "*thumb2_movdi_vfp"
-  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r,m,w,r,w,w, Uv")
-	(match_operand:DI 1 "di_operand"              "rIK,mi,r,r,w,w,Uvi,w"))]
-  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
-  "*
+(define_insn "*movhf_vfp_fp16"
+  [(set (match_operand:HF 0 "nonimmediate_operand"
+			  "= r,m,t,r,t,r,t,t,Um,r")
+	(match_operand:HF 1 "general_operand"
+			  "  m,r,t,r,r,t,Dv,Um,t,F"))]
+  "TARGET_32BIT
+   && TARGET_VFP_FP16INST
+   && (s_register_operand (operands[0], HFmode)
+       || s_register_operand (operands[1], HFmode))"
+ {
   switch (which_alternative)
     {
-    case 0: case 1: case 2:
-      return (output_move_double (operands));
-    case 3:
-      return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\";
-    case 4:
-      return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\";
-    case 5:
-      if (TARGET_VFP_SINGLE)
-	return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\";
-      else
-	return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
-    case 6: case 7:
-      return output_move_vfp (operands);
+    case 0: /* ARM register from memory.  */
+      return \"ldrh%?\\t%0, %1\\t%@ __fp16\";
+    case 1: /* Memory from ARM register.  */
+      return \"strh%?\\t%1, %0\\t%@ __fp16\";
+    case 2: /* S register from S register.  */
+      return \"vmov\\t%0, %1\t%@ __fp16\";
+    case 3: /* ARM register from ARM register.  */
+      return \"mov%?\\t%0, %1\\t%@ __fp16\";
+    case 4: /* S register from ARM register.  */
+    case 5: /* ARM register from S register.  */
+    case 6: /* S register from immediate.  */
+      return \"vmov.f16\\t%0, %1\t%@ __fp16\";
+    case 7: /* S register from memory.  */
+      return \"vld1.16\\t{%z0}, %A1\";
+    case 8: /* Memory from S register.  */
+      return \"vst1.16\\t{%z1}, %A0\";
+    case 9: /* ARM register from constant.  */
+      {
+	long bits;
+	rtx ops[4];
+
+	bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (operands[1]),
+			       HFmode);
+	ops[0] = operands[0];
+	ops[1] = GEN_INT (bits);
+	ops[2] = GEN_INT (bits & 0xff00);
+	ops[3] = GEN_INT (bits & 0x00ff);
+
+	if (arm_arch_thumb2)
+	  output_asm_insn (\"movw\\t%0, %1\", ops);
+	else
+	  output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops);
+	return \"\";
+       }
     default:
-      abort ();
+      gcc_unreachable ();
     }
-  "
-  [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
-   (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8)
-			       (eq_attr "alternative" "5")
-				(if_then_else
-				 (eq (symbol_ref "TARGET_VFP_SINGLE")
-				     (const_int 1))
-				 (const_int 8)
-				 (const_int 4))]
-			      (const_int 4)))
-   (set_attr "pool_range"     "*,4096,*,*,*,*,1020,*")
-   (set_attr "neg_pool_range" "*,   0,*,*,*,*,1008,*")]
+ }
+  [(set_attr "predicable" "yes, yes, no, yes, no, no, no, no, no, no")
+   (set_attr "predicable_short_it" "no, no, no, yes,\
+				    no, no, no, no,\
+				    no, no")
+   (set_attr_alternative "type"
+    [(const_string "load_4") (const_string "store_4")
+     (const_string "fmov") (const_string "mov_reg")
+     (const_string "f_mcr") (const_string "f_mrc")
+     (const_string "fconsts") (const_string "neon_load1_1reg")
+     (const_string "neon_store1_1reg")
+     (if_then_else (match_test "arm_arch_thumb2")
+      (const_string "mov_imm")
+      (const_string "multiple"))])
+   (set_attr_alternative "length"
+    [(const_int 4) (const_int 4)
+     (const_int 4) (const_int 4)
+     (const_int 4) (const_int 4)
+     (const_int 4) (const_int 4)
+     (const_int 4)
+     (if_then_else (match_test "arm_arch_thumb2")
+      (const_int 4)
+      (const_int 8))])]
 )
 
-;; HFmode moves
 (define_insn "*movhf_vfp_neon"
   [(set (match_operand:HF 0 "nonimmediate_operand" "= t,Um,r,m,t,r,t,r,r")
 	(match_operand:HF 1 "general_operand"	   " Um, t,m,r,t,r,r,t,F"))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16
+  "TARGET_32BIT
+   && TARGET_HARD_FLOAT && TARGET_NEON_FP16
+   && !TARGET_VFP_FP16INST
    && (   s_register_operand (operands[0], HFmode)
        || s_register_operand (operands[1], HFmode))"
   "*
@@ -232,72 +500,20 @@
     case 3:     /* memory from ARM register */
       return \"strh\\t%1, %0\\t%@ __fp16\";
     case 4:	/* S register from S register */
-      return \"fcpys\\t%0, %1\";
+      return \"vmov.f32\\t%0, %1\";
     case 5:	/* ARM register from ARM register */
       return \"mov\\t%0, %1\\t%@ __fp16\";
     case 6:	/* S register from ARM register */
-      return \"fmsr\\t%0, %1\";
+      return \"vmov\\t%0, %1\";
     case 7:	/* ARM register from S register */
-      return \"fmrs\\t%0, %1\";
+      return \"vmov\\t%0, %1\";
     case 8:	/* ARM register from constant */
       {
-        REAL_VALUE_TYPE r;
 	long bits;
 	rtx ops[4];
 
-        REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
-	bits = real_to_target (NULL, &r, HFmode);
-	ops[0] = operands[0];
-	ops[1] = GEN_INT (bits);
-	ops[2] = GEN_INT (bits & 0xff00);
-	ops[3] = GEN_INT (bits & 0x00ff);
-
-	if (arm_arch_thumb2)
-	  output_asm_insn (\"movw\\t%0, %1\", ops);
-	else
-	  output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops);
-	return \"\";
-       }
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set_attr "conds" "unconditional")
-   (set_attr "type" "*,*,load1,store1,fcpys,*,r_2_f,f_2_r,*")
-   (set_attr "neon_type" "neon_vld1_1_2_regs,neon_vst1_1_2_regs_vst2_2_regs,*,*,*,*,*,*,*")
-   (set_attr "length" "4,4,4,4,4,4,4,4,8")]
-)
-
-;; FP16 without element load/store instructions.
-(define_insn "*movhf_vfp"
-  [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,t,r,t,r,r")
-	(match_operand:HF 1 "general_operand"	   " m,r,t,r,r,t,F"))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16 && !TARGET_NEON_FP16
-   && (   s_register_operand (operands[0], HFmode)
-       || s_register_operand (operands[1], HFmode))"
-  "*
-  switch (which_alternative)
-    {
-    case 0:     /* ARM register from memory */
-      return \"ldrh\\t%0, %1\\t%@ __fp16\";
-    case 1:     /* memory from ARM register */
-      return \"strh\\t%1, %0\\t%@ __fp16\";
-    case 2:	/* S register from S register */
-      return \"fcpys\\t%0, %1\";
-    case 3:	/* ARM register from ARM register */
-      return \"mov\\t%0, %1\\t%@ __fp16\";
-    case 4:	/* S register from ARM register */
-      return \"fmsr\\t%0, %1\";
-    case 5:	/* ARM register from S register */
-      return \"fmrs\\t%0, %1\";
-    case 6:	/* ARM register from constant */
-      {
-        REAL_VALUE_TYPE r;
-	long bits;
-	rtx ops[4];
-
-        REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]);
-	bits = real_to_target (NULL, &r, HFmode);
+	bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (operands[1]),
+			       HFmode);
 	ops[0] = operands[0];
 	ops[1] = GEN_INT (bits);
 	ops[2] = GEN_INT (bits & 0xff00);
@@ -314,7 +530,60 @@
     }
   "
   [(set_attr "conds" "unconditional")
-   (set_attr "type" "load1,store1,fcpys,*,r_2_f,f_2_r,*")
+   (set_attr "type" "neon_load1_1reg,neon_store1_1reg,\
+                     load_4,store_4,fmov,mov_reg,f_mcr,f_mrc,multiple")
+   (set_attr "length" "4,4,4,4,4,4,4,4,8")]
+)
+
+;; FP16 without element load/store instructions.
+(define_insn "*movhf_vfp"
+  [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,t,r,t,r,r")
+	(match_operand:HF 1 "general_operand"	   " m,r,t,r,r,t,F"))]
+  "TARGET_32BIT
+   && TARGET_HARD_FLOAT
+   && !TARGET_NEON_FP16
+   && !TARGET_VFP_FP16INST
+   && (   s_register_operand (operands[0], HFmode)
+       || s_register_operand (operands[1], HFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:     /* ARM register from memory */
+      return \"ldrh\\t%0, %1\\t%@ __fp16\";
+    case 1:     /* memory from ARM register */
+      return \"strh\\t%1, %0\\t%@ __fp16\";
+    case 2:	/* S register from S register */
+      return \"vmov.f32\\t%0, %1\";
+    case 3:	/* ARM register from ARM register */
+      return \"mov\\t%0, %1\\t%@ __fp16\";
+    case 4:	/* S register from ARM register */
+      return \"vmov\\t%0, %1\";
+    case 5:	/* ARM register from S register */
+      return \"vmov\\t%0, %1\";
+    case 6:	/* ARM register from constant */
+      {
+	long bits;
+	rtx ops[4];
+
+	bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (operands[1]),
+			       HFmode);
+	ops[0] = operands[0];
+	ops[1] = GEN_INT (bits);
+	ops[2] = GEN_INT (bits & 0xff00);
+	ops[3] = GEN_INT (bits & 0x00ff);
+
+	if (arm_arch_thumb2)
+	  output_asm_insn (\"movw\\t%0, %1\", ops);
+	else
+	  output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops);
+	return \"\";
+       }
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "load_4,store_4,fmov,mov_reg,f_mcr,f_mrc,multiple")
    (set_attr "length" "4,4,4,4,4,4,8")]
 )
 
@@ -325,19 +594,19 @@
 
 (define_insn "*movsf_vfp"
   [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t ,t  ,Uv,r ,m,t,r")
-	(match_operand:SF 1 "general_operand"	   " ?r,t,Dv,UvE,t, mE,r,t,r"))]
-  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
+        (match_operand:SF 1 "general_operand"	   " ?r,t,Dv,UvE,t, mE,r,t,r"))]
+  "TARGET_ARM && TARGET_HARD_FLOAT
    && (   s_register_operand (operands[0], SFmode)
        || s_register_operand (operands[1], SFmode))"
   "*
   switch (which_alternative)
     {
     case 0:
-      return \"fmsr%?\\t%0, %1\";
+      return \"vmov%?\\t%0, %1\";
     case 1:
-      return \"fmrs%?\\t%0, %1\";
+      return \"vmov%?\\t%0, %1\";
     case 2:
-      return \"fconsts%?\\t%0, #%G1\";
+      return \"vmov%?.f32\\t%0, %1\";
     case 3: case 4:
       return output_move_vfp (operands);
     case 5:
@@ -345,44 +614,7 @@
     case 6:
       return \"str%?\\t%1, %0\\t%@ float\";
     case 7:
-      return \"fcpys%?\\t%0, %1\";
-    case 8:
-      return \"mov%?\\t%0, %1\\t%@ float\";
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set_attr "predicable" "yes")
-   (set_attr "type"
-     "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*")
-   (set_attr "insn" "*,*,*,*,*,*,*,*,mov")
-   (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*")
-   (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")]
-)
-
-(define_insn "*thumb2_movsf_vfp"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t, t  ,Uv,r ,m,t,r")
-	(match_operand:SF 1 "general_operand"	   " ?r,t,Dv,UvE,t, mE,r,t,r"))]
-  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP
-   && (   s_register_operand (operands[0], SFmode)
-       || s_register_operand (operands[1], SFmode))"
-  "*
-  switch (which_alternative)
-    {
-    case 0:
-      return \"fmsr%?\\t%0, %1\";
-    case 1:
-      return \"fmrs%?\\t%0, %1\";
-    case 2:
-      return \"fconsts%?\\t%0, #%G1\";
-    case 3: case 4:
-      return output_move_vfp (operands);
-    case 5:
-      return \"ldr%?\\t%0, %1\\t%@ float\";
-    case 6:
-      return \"str%?\\t%1, %0\\t%@ float\";
-    case 7:
-      return \"fcpys%?\\t%0, %1\";
+      return \"vmov%?.f32\\t%0, %1\";
     case 8:
       return \"mov%?\\t%0, %1\\t%@ float\";
     default:
@@ -391,19 +623,54 @@
   "
   [(set_attr "predicable" "yes")
    (set_attr "type"
-     "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*")
-   (set_attr "insn" "*,*,*,*,*,*,*,*,mov")
-   (set_attr "pool_range" "*,*,*,1020,*,4092,*,*,*")
+     "f_mcr,f_mrc,fconsts,f_loads,f_stores,load_4,store_4,fmov,mov_reg")
+   (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")]
+)
+
+(define_insn "*thumb2_movsf_vfp"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t, t  ,Uv,r ,m,t,r")
+	(match_operand:SF 1 "general_operand"	   " ?r,t,Dv,UvE,t, mE,r,t,r"))]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT
+   && (   s_register_operand (operands[0], SFmode)
+       || s_register_operand (operands[1], SFmode))"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      return \"vmov%?\\t%0, %1\";
+    case 1:
+      return \"vmov%?\\t%0, %1\";
+    case 2:
+      return \"vmov%?.f32\\t%0, %1\";
+    case 3: case 4:
+      return output_move_vfp (operands);
+    case 5:
+      return \"ldr%?\\t%0, %1\\t%@ float\";
+    case 6:
+      return \"str%?\\t%1, %0\\t%@ float\";
+    case 7:
+      return \"vmov%?.f32\\t%0, %1\";
+    case 8:
+      return \"mov%?\\t%0, %1\\t%@ float\";
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type"
+     "f_mcr,f_mrc,fconsts,f_loads,f_stores,load_4,store_4,fmov,mov_reg")
+   (set_attr "pool_range" "*,*,*,1018,*,4090,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
 )
 
-
 ;; DFmode moves
 
 (define_insn "*movdf_vfp"
-  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w  ,Uv,w,r")
-	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,mF,r,UvF,w, w,r"))]
-  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w  ,Uv,r, m,w,r")
+	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,G,UvF,w ,mF,r,w,r"))]
+  "TARGET_ARM && TARGET_HARD_FLOAT
    && (   register_operand (operands[0], DFmode)
        || register_operand (operands[1], DFmode))"
   "*
@@ -411,84 +678,92 @@
     switch (which_alternative)
       {
       case 0:
-	return \"fmdrr%?\\t%P0, %Q1, %R1\";
+	return \"vmov%?\\t%P0, %Q1, %R1\";
       case 1:
-	return \"fmrrd%?\\t%Q0, %R0, %P1\";
+	return \"vmov%?\\t%Q0, %R0, %P1\";
       case 2:
 	gcc_assert (TARGET_VFP_DOUBLE);
-        return \"fconstd%?\\t%P0, #%G1\";
-      case 3: case 4:
-	return output_move_double (operands);
-      case 5: case 6:
+        return \"vmov%?.f64\\t%P0, %1\";
+      case 3:
+	gcc_assert (TARGET_VFP_DOUBLE);
+	return \"vmov.i64\\t%P0, #0\\t%@ float\";
+      case 4: case 5:
 	return output_move_vfp (operands);
-      case 7:
+      case 6: case 7:
+	return output_move_double (operands, true, NULL);
+      case 8:
 	if (TARGET_VFP_SINGLE)
-	  return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
+	  return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
 	else
-	  return \"fcpyd%?\\t%P0, %P1\";
-      case 8:
+	  return \"vmov%?.f64\\t%P0, %P1\";
+      case 9:
         return \"#\";
       default:
 	gcc_unreachable ();
       }
     }
   "
-  [(set_attr "type"
-     "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*")
-   (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
-			       (eq_attr "alternative" "7")
+  [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,f_stored,\
+                     load_8,store_8,ffarithd,multiple")
+   (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8)
+			       (eq_attr "alternative" "8")
 				(if_then_else
-				 (eq (symbol_ref "TARGET_VFP_SINGLE")
-				     (const_int 1))
+				 (match_test "TARGET_VFP_SINGLE")
 				 (const_int 8)
 				 (const_int 4))]
 			      (const_int 4)))
-   (set_attr "predicable" "yes")
-   (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*")
-   (set_attr "neg_pool_range" "*,*,*,1008,*,1008,*,*,*")]
+   (set_attr "predicable" "yes,yes,yes,no,yes,yes,yes,yes,yes,yes")
+   (set_attr "pool_range" "*,*,*,*,1020,*,1020,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,*,1004,*,1004,*,*,*")
+   (set_attr "arch" "any,any,any,neon,any,any,any,any,any,any")]
 )
 
 (define_insn "*thumb2_movdf_vfp"
-  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,r, m,w  ,Uv,w,r")
-	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,mF,r,UvF,w, w,r"))]
-  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w  ,Uv,r ,m,w,r")
+	(match_operand:DF 1 "soft_df_operand"		   " ?r,w,Dy,G,UvF,w, mF,r, w,r"))]
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT
+   && (   register_operand (operands[0], DFmode)
+       || register_operand (operands[1], DFmode))"
   "*
   {
     switch (which_alternative)
       {
       case 0:
-	return \"fmdrr%?\\t%P0, %Q1, %R1\";
+	return \"vmov%?\\t%P0, %Q1, %R1\";
       case 1:
-	return \"fmrrd%?\\t%Q0, %R0, %P1\";
+	return \"vmov%?\\t%Q0, %R0, %P1\";
       case 2:
 	gcc_assert (TARGET_VFP_DOUBLE);
-	return \"fconstd%?\\t%P0, #%G1\";
-      case 3: case 4: case 8:
-	return output_move_double (operands);
-      case 5: case 6:
+	return \"vmov%?.f64\\t%P0, %1\";
+      case 3:
+	gcc_assert (TARGET_VFP_DOUBLE);
+	return \"vmov.i64\\t%P0, #0\\t%@ float\";
+      case 4: case 5:
 	return output_move_vfp (operands);
-      case 7:
+      case 6: case 7: case 9:
+	return output_move_double (operands, true, NULL);
+      case 8:
 	if (TARGET_VFP_SINGLE)
-	  return \"fcpys%?\\t%0, %1\;fcpys%?\\t%p0, %p1\";
+	  return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
 	else
-	  return \"fcpyd%?\\t%P0, %P1\";
+	  return \"vmov%?.f64\\t%P0, %P1\";
       default:
 	abort ();
       }
     }
   "
-  [(set_attr "type"
-     "r_2_f,f_2_r,fconstd,load2,store2,f_loadd,f_stored,ffarithd,*")
-   (set (attr "length") (cond [(eq_attr "alternative" "3,4,8") (const_int 8)
-			       (eq_attr "alternative" "7")
+  [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,\
+                     f_stored,load_8,store_8,ffarithd,multiple")
+   (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8)
+			       (eq_attr "alternative" "8")
 				(if_then_else
-				 (eq (symbol_ref "TARGET_VFP_SINGLE")
-				     (const_int 1))
+				 (match_test "TARGET_VFP_SINGLE")
 				 (const_int 8)
 				 (const_int 4))]
 			      (const_int 4)))
-   (set_attr "pool_range" "*,*,*,4096,*,1020,*,*,*")
-   (set_attr "neg_pool_range" "*,*,*,0,*,1008,*,*,*")]
+   (set_attr "pool_range" "*,*,*,*,1018,*,4094,*,*,*")
+   (set_attr "neg_pool_range" "*,*,*,*,1008,*,0,*,*,*")
+   (set_attr "arch" "any,any,any,neon,any,any,any,any,any,any")]
 )
 
 
@@ -501,20 +776,20 @@
 	    [(match_operand 4 "cc_register" "") (const_int 0)])
 	  (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t")
 	  (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
-  "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_ARM && TARGET_HARD_FLOAT"
   "@
-   fcpys%D3\\t%0, %2
-   fcpys%d3\\t%0, %1
-   fcpys%D3\\t%0, %2\;fcpys%d3\\t%0, %1
-   fmsr%D3\\t%0, %2
-   fmsr%d3\\t%0, %1
-   fmsr%D3\\t%0, %2\;fmsr%d3\\t%0, %1
-   fmrs%D3\\t%0, %2
-   fmrs%d3\\t%0, %1
-   fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
+   vmov%D3.f32\\t%0, %2
+   vmov%d3.f32\\t%0, %1
+   vmov%D3.f32\\t%0, %2\;vmov%d3.f32\\t%0, %1
+   vmov%D3\\t%0, %2
+   vmov%d3\\t%0, %1
+   vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1
+   vmov%D3\\t%0, %2
+   vmov%d3\\t%0, %1
+   vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1"
    [(set_attr "conds" "use")
     (set_attr "length" "4,4,8,4,4,8,4,4,8")
-    (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")]
 )
 
 (define_insn "*thumb2_movsfcc_vfp"
@@ -524,20 +799,20 @@
 	    [(match_operand 4 "cc_register" "") (const_int 0)])
 	  (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t")
 	  (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
-  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && !arm_restrict_it"
   "@
-   it\\t%D3\;fcpys%D3\\t%0, %2
-   it\\t%d3\;fcpys%d3\\t%0, %1
-   ite\\t%D3\;fcpys%D3\\t%0, %2\;fcpys%d3\\t%0, %1
-   it\\t%D3\;fmsr%D3\\t%0, %2
-   it\\t%d3\;fmsr%d3\\t%0, %1
-   ite\\t%D3\;fmsr%D3\\t%0, %2\;fmsr%d3\\t%0, %1
-   it\\t%D3\;fmrs%D3\\t%0, %2
-   it\\t%d3\;fmrs%d3\\t%0, %1
-   ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
+   it\\t%D3\;vmov%D3.f32\\t%0, %2
+   it\\t%d3\;vmov%d3.f32\\t%0, %1
+   ite\\t%D3\;vmov%D3.f32\\t%0, %2\;vmov%d3.f32\\t%0, %1
+   it\\t%D3\;vmov%D3\\t%0, %2
+   it\\t%d3\;vmov%d3\\t%0, %1
+   ite\\t%D3\;vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1
+   it\\t%D3\;vmov%D3\\t%0, %2
+   it\\t%d3\;vmov%d3\\t%0, %1
+   ite\\t%D3\;vmov%D3\\t%0, %2\;vmov%d3\\t%0, %1"
    [(set_attr "conds" "use")
     (set_attr "length" "6,6,10,6,6,10,6,6,10")
-    (set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    (set_attr "type" "fmov,fmov,fmov,f_mcr,f_mcr,f_mcr,f_mrc,f_mrc,f_mrc")]
 )
 
 (define_insn "*movdfcc_vfp"
@@ -549,18 +824,18 @@
 	  (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
   "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "@
-   fcpyd%D3\\t%P0, %P2
-   fcpyd%d3\\t%P0, %P1
-   fcpyd%D3\\t%P0, %P2\;fcpyd%d3\\t%P0, %P1
-   fmdrr%D3\\t%P0, %Q2, %R2
-   fmdrr%d3\\t%P0, %Q1, %R1
-   fmdrr%D3\\t%P0, %Q2, %R2\;fmdrr%d3\\t%P0, %Q1, %R1
-   fmrrd%D3\\t%Q0, %R0, %P2
-   fmrrd%d3\\t%Q0, %R0, %P1
-   fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
+   vmov%D3.f64\\t%P0, %P2
+   vmov%d3.f64\\t%P0, %P1
+   vmov%D3.f64\\t%P0, %P2\;vmov%d3.f64\\t%P0, %P1
+   vmov%D3\\t%P0, %Q2, %R2
+   vmov%d3\\t%P0, %Q1, %R1
+   vmov%D3\\t%P0, %Q2, %R2\;vmov%d3\\t%P0, %Q1, %R1
+   vmov%D3\\t%Q0, %R0, %P2
+   vmov%d3\\t%Q0, %R0, %P1
+   vmov%D3\\t%Q0, %R0, %P2\;vmov%d3\\t%Q0, %R0, %P1"
    [(set_attr "conds" "use")
     (set_attr "length" "4,4,8,4,4,8,4,4,8")
-    (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    (set_attr "type" "ffarithd,ffarithd,ffarithd,f_mcr,f_mcr,f_mcr,f_mrrc,f_mrrc,f_mrrc")]
 )
 
 (define_insn "*thumb2_movdfcc_vfp"
@@ -570,20 +845,20 @@
 	    [(match_operand 4 "cc_register" "") (const_int 0)])
 	  (match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w")
 	  (match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
-  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+  "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && !arm_restrict_it"
   "@
-   it\\t%D3\;fcpyd%D3\\t%P0, %P2
-   it\\t%d3\;fcpyd%d3\\t%P0, %P1
-   ite\\t%D3\;fcpyd%D3\\t%P0, %P2\;fcpyd%d3\\t%P0, %P1
-   it\t%D3\;fmdrr%D3\\t%P0, %Q2, %R2
-   it\t%d3\;fmdrr%d3\\t%P0, %Q1, %R1
-   ite\\t%D3\;fmdrr%D3\\t%P0, %Q2, %R2\;fmdrr%d3\\t%P0, %Q1, %R1
-   it\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2
-   it\t%d3\;fmrrd%d3\\t%Q0, %R0, %P1
-   ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
+   it\\t%D3\;vmov%D3.f64\\t%P0, %P2
+   it\\t%d3\;vmov%d3.f64\\t%P0, %P1
+   ite\\t%D3\;vmov%D3.f64\\t%P0, %P2\;vmov%d3.f64\\t%P0, %P1
+   it\t%D3\;vmov%D3\\t%P0, %Q2, %R2
+   it\t%d3\;vmov%d3\\t%P0, %Q1, %R1
+   ite\\t%D3\;vmov%D3\\t%P0, %Q2, %R2\;vmov%d3\\t%P0, %Q1, %R1
+   it\t%D3\;vmov%D3\\t%Q0, %R0, %P2
+   it\t%d3\;vmov%d3\\t%Q0, %R0, %P1
+   ite\\t%D3\;vmov%D3\\t%Q0, %R0, %P2\;vmov%d3\\t%Q0, %R0, %P1"
    [(set_attr "conds" "use")
     (set_attr "length" "6,6,10,6,6,10,6,6,10")
-    (set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    (set_attr "type" "ffarithd,ffarithd,ffarithd,f_mcr,f_mcr,f_mcrr,f_mrrc,f_mrrc,f_mrrc")]
 )
 
 
@@ -592,9 +867,10 @@
 (define_insn "*abssf2_vfp"
   [(set (match_operand:SF	  0 "s_register_operand" "=t")
 	(abs:SF (match_operand:SF 1 "s_register_operand" "t")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fabss%?\\t%0, %1"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "vabs%?.f32\\t%0, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "ffariths")]
 )
 
@@ -602,19 +878,21 @@
   [(set (match_operand:DF	  0 "s_register_operand" "=w")
 	(abs:DF (match_operand:DF 1 "s_register_operand" "w")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fabsd%?\\t%P0, %P1"
+  "vabs%?.f64\\t%P0, %P1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "ffarithd")]
 )
 
 (define_insn "*negsf2_vfp"
   [(set (match_operand:SF	  0 "s_register_operand" "=t,?r")
 	(neg:SF (match_operand:SF 1 "s_register_operand" "t,r")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
   "@
-   fnegs%?\\t%0, %1
+   vneg%?.f32\\t%0, %1
    eor%?\\t%0, %1, #-2147483648"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "ffariths")]
 )
 
@@ -623,7 +901,7 @@
 	(neg:DF (match_operand:DF 1 "s_register_operand" "w,0,r")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "@
-   fnegd%?\\t%P0, %P1
+   vneg%?.f64\\t%P0, %P1
    #
    #"
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && reload_completed
@@ -647,33 +925,89 @@
 
       if (REGNO (in_lo) == REGNO (out_hi))
         {
-          emit_insn (gen_rtx_SET (SImode, out_lo, in_lo));
+          emit_insn (gen_rtx_SET (out_lo, in_lo));
 	  operands[0] = out_hi;
           operands[1] = in_hi;
         }
       else
         {
-          emit_insn (gen_rtx_SET (SImode, out_hi, in_hi));
+          emit_insn (gen_rtx_SET (out_hi, in_hi));
 	  operands[0] = out_lo;
           operands[1] = in_lo;
         }
     }
   "
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "length" "4,4,8")
    (set_attr "type" "ffarithd")]
 )
 
+;; ABS and NEG for FP16.
+(define_insn "<absneg_str>hf2"
+  [(set (match_operand:HF 0 "s_register_operand" "=w")
+    (ABSNEG:HF (match_operand:HF 1 "s_register_operand" "w")))]
+ "TARGET_VFP_FP16INST"
+ "v<absneg_str>.f16\t%0, %1"
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "ffariths")]
+)
+
+(define_expand "neon_vabshf"
+ [(set
+   (match_operand:HF 0 "s_register_operand")
+   (abs:HF (match_operand:HF 1 "s_register_operand")))]
+ "TARGET_VFP_FP16INST"
+{
+  emit_insn (gen_abshf2 (operands[0], operands[1]));
+  DONE;
+})
+
+;; VRND for FP16.
+(define_insn "neon_v<fp16_rnd_str>hf"
+  [(set (match_operand:HF 0 "s_register_operand" "=w")
+    (unspec:HF
+     [(match_operand:HF 1 "s_register_operand" "w")]
+     FP16_RND))]
+ "TARGET_VFP_FP16INST"
+ "<fp16_rnd_insn>.f16\t%0, %1"
+ [(set_attr "conds" "unconditional")
+  (set_attr "type" "neon_fp_round_s")]
+)
+
+(define_insn "neon_vrndihf"
+  [(set (match_operand:HF 0 "s_register_operand" "=w")
+    (unspec:HF
+     [(match_operand:HF 1 "s_register_operand" "w")]
+     UNSPEC_VRNDI))]
+  "TARGET_VFP_FP16INST"
+  "vrintr.f16\t%0, %1"
+ [(set_attr "conds" "unconditional")
+  (set_attr "type" "neon_fp_round_s")]
+)
 
 ;; Arithmetic insns
 
+(define_insn "addhf3"
+  [(set
+    (match_operand:HF 0 "s_register_operand" "=w")
+    (plus:HF
+     (match_operand:HF 1 "s_register_operand" "w")
+     (match_operand:HF 2 "s_register_operand" "w")))]
+ "TARGET_VFP_FP16INST"
+ "vadd.f16\t%0, %1, %2"
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "fadds")]
+)
+
 (define_insn "*addsf3_vfp"
   [(set (match_operand:SF	   0 "s_register_operand" "=t")
 	(plus:SF (match_operand:SF 1 "s_register_operand" "t")
 		 (match_operand:SF 2 "s_register_operand" "t")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fadds%?\\t%0, %1, %2"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "vadd%?.f32\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fadds")]
 )
 
@@ -682,19 +1016,32 @@
 	(plus:DF (match_operand:DF 1 "s_register_operand" "w")
 		 (match_operand:DF 2 "s_register_operand" "w")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "faddd%?\\t%P0, %P1, %P2"
+  "vadd%?.f64\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "faddd")]
 )
 
+(define_insn "subhf3"
+ [(set
+   (match_operand:HF 0 "s_register_operand" "=w")
+   (minus:HF
+    (match_operand:HF 1 "s_register_operand" "w")
+    (match_operand:HF 2 "s_register_operand" "w")))]
+ "TARGET_VFP_FP16INST"
+ "vsub.f16\t%0, %1, %2"
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "fadds")]
+)
 
 (define_insn "*subsf3_vfp"
   [(set (match_operand:SF	    0 "s_register_operand" "=t")
 	(minus:SF (match_operand:SF 1 "s_register_operand" "t")
 		  (match_operand:SF 2 "s_register_operand" "t")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fsubs%?\\t%0, %1, %2"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "vsub%?.f32\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fadds")]
 )
 
@@ -703,75 +1050,153 @@
 	(minus:DF (match_operand:DF 1 "s_register_operand" "w")
 		  (match_operand:DF 2 "s_register_operand" "w")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fsubd%?\\t%P0, %P1, %P2"
+  "vsub%?.f64\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "faddd")]
 )
 
 
 ;; Division insns
 
+;; FP16 Division.
+(define_insn "divhf3"
+  [(set
+    (match_operand:HF	   0 "s_register_operand" "=w")
+    (div:HF
+     (match_operand:HF 1 "s_register_operand" "w")
+     (match_operand:HF 2 "s_register_operand" "w")))]
+  "TARGET_VFP_FP16INST"
+  "vdiv.f16\t%0, %1, %2"
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "fdivs")]
+)
+
+; VFP9 Erratum 760019: It's potentially unsafe to overwrite the input
+; operands, so mark the output as early clobber for VFPv2 on ARMv5 or
+; earlier.
 (define_insn "*divsf3_vfp"
-  [(set (match_operand:SF	  0 "s_register_operand" "+t")
-	(div:SF (match_operand:SF 1 "s_register_operand" "t")
-		(match_operand:SF 2 "s_register_operand" "t")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fdivs%?\\t%0, %1, %2"
+  [(set (match_operand:SF	  0 "s_register_operand" "=&t,t")
+	(div:SF (match_operand:SF 1 "s_register_operand" "t,t")
+		(match_operand:SF 2 "s_register_operand" "t,t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "vdiv%?.f32\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "arch" "*,armv6_or_vfpv3")
    (set_attr "type" "fdivs")]
 )
 
 (define_insn "*divdf3_vfp"
-  [(set (match_operand:DF	  0 "s_register_operand" "+w")
-	(div:DF (match_operand:DF 1 "s_register_operand" "w")
-		(match_operand:DF 2 "s_register_operand" "w")))]
+  [(set (match_operand:DF	  0 "s_register_operand" "=&w,w")
+	(div:DF (match_operand:DF 1 "s_register_operand" "w,w")
+		(match_operand:DF 2 "s_register_operand" "w,w")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fdivd%?\\t%P0, %P1, %P2"
+  "vdiv%?.f64\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "arch" "*,armv6_or_vfpv3")
    (set_attr "type" "fdivd")]
 )
 
 
 ;; Multiplication insns
 
+(define_insn "mulhf3"
+ [(set
+   (match_operand:HF 0 "s_register_operand" "=w")
+   (mult:HF (match_operand:HF 1 "s_register_operand" "w")
+	    (match_operand:HF 2 "s_register_operand" "w")))]
+  "TARGET_VFP_FP16INST"
+  "vmul.f16\t%0, %1, %2"
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "fmuls")]
+)
+
 (define_insn "*mulsf3_vfp"
-  [(set (match_operand:SF	   0 "s_register_operand" "+t")
+  [(set (match_operand:SF	   0 "s_register_operand" "=t")
 	(mult:SF (match_operand:SF 1 "s_register_operand" "t")
 		 (match_operand:SF 2 "s_register_operand" "t")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fmuls%?\\t%0, %1, %2"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "vmul%?.f32\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmuls")]
 )
 
 (define_insn "*muldf3_vfp"
-  [(set (match_operand:DF	   0 "s_register_operand" "+w")
+  [(set (match_operand:DF	   0 "s_register_operand" "=w")
 	(mult:DF (match_operand:DF 1 "s_register_operand" "w")
 		 (match_operand:DF 2 "s_register_operand" "w")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fmuld%?\\t%P0, %P1, %P2"
+  "vmul%?.f64\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmuld")]
 )
 
+(define_insn "*mulsf3neghf_vfp"
+  [(set (match_operand:HF		   0 "s_register_operand" "=t")
+	(mult:HF (neg:HF (match_operand:HF 1 "s_register_operand" "t"))
+		 (match_operand:HF	   2 "s_register_operand" "t")))]
+  "TARGET_VFP_FP16INST && !flag_rounding_math"
+  "vnmul.f16\\t%0, %1, %2"
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "fmuls")]
+)
+
+(define_insn "*negmulhf3_vfp"
+  [(set (match_operand:HF		   0 "s_register_operand" "=t")
+	(neg:HF (mult:HF (match_operand:HF 1 "s_register_operand" "t")
+		 (match_operand:HF	   2 "s_register_operand" "t"))))]
+  "TARGET_VFP_FP16INST"
+  "vnmul.f16\\t%0, %1, %2"
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "fmuls")]
+)
 
 (define_insn "*mulsf3negsf_vfp"
-  [(set (match_operand:SF		   0 "s_register_operand" "+t")
+  [(set (match_operand:SF		   0 "s_register_operand" "=t")
 	(mult:SF (neg:SF (match_operand:SF 1 "s_register_operand" "t"))
 		 (match_operand:SF	   2 "s_register_operand" "t")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fnmuls%?\\t%0, %1, %2"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && !flag_rounding_math"
+  "vnmul%?.f32\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fmuls")]
+)
+
+(define_insn "*negmulsf3_vfp"
+  [(set (match_operand:SF		   0 "s_register_operand" "=t")
+	(neg:SF (mult:SF (match_operand:SF 1 "s_register_operand" "t")
+		 (match_operand:SF	   2 "s_register_operand" "t"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "vnmul%?.f32\\t%0, %1, %2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmuls")]
 )
 
 (define_insn "*muldf3negdf_vfp"
-  [(set (match_operand:DF		   0 "s_register_operand" "+w")
+  [(set (match_operand:DF		   0 "s_register_operand" "=w")
 	(mult:DF (neg:DF (match_operand:DF 1 "s_register_operand" "w"))
 		 (match_operand:DF	   2 "s_register_operand" "w")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE
+  && !flag_rounding_math"
+  "vnmul%?.f64\\t%P0, %P1, %P2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fmuld")]
+)
+
+(define_insn "*negmuldf3_vfp"
+  [(set (match_operand:DF		   0 "s_register_operand" "=w")
+	(neg:DF (mult:DF (match_operand:DF 1 "s_register_operand" "w")
+		 (match_operand:DF	   2 "s_register_operand" "w"))))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fnmuld%?\\t%P0, %P1, %P2"
+  "vnmul%?.f64\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmuld")]
 )
 
@@ -779,14 +1204,27 @@
 ;; Multiply-accumulate insns
 
 ;; 0 = 1 * 2 + 0
+(define_insn "*mulsf3addhf_vfp"
+ [(set (match_operand:HF 0 "s_register_operand" "=t")
+       (plus:HF
+	(mult:HF (match_operand:HF 2 "s_register_operand" "t")
+		 (match_operand:HF 3 "s_register_operand" "t"))
+	(match_operand:HF 1 "s_register_operand" "0")))]
+  "TARGET_VFP_FP16INST"
+  "vmla.f16\\t%0, %2, %3"
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "fmacs")]
+)
+
 (define_insn "*mulsf3addsf_vfp"
   [(set (match_operand:SF		    0 "s_register_operand" "=t")
 	(plus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t")
 			  (match_operand:SF 3 "s_register_operand" "t"))
 		 (match_operand:SF	    1 "s_register_operand" "0")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fmacs%?\\t%0, %2, %3"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "vmla%?.f32\\t%0, %2, %3"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacs")]
 )
 
@@ -796,20 +1234,33 @@
 			  (match_operand:DF 3 "s_register_operand" "w"))
 		 (match_operand:DF	    1 "s_register_operand" "0")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fmacd%?\\t%P0, %P2, %P3"
+  "vmla%?.f64\\t%P0, %P2, %P3"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacd")]
 )
 
 ;; 0 = 1 * 2 - 0
+(define_insn "*mulhf3subhf_vfp"
+  [(set (match_operand:HF 0 "s_register_operand" "=t")
+	(minus:HF (mult:HF (match_operand:HF 2 "s_register_operand" "t")
+			   (match_operand:HF 3 "s_register_operand" "t"))
+		  (match_operand:HF 1 "s_register_operand" "0")))]
+  "TARGET_VFP_FP16INST"
+  "vnmls.f16\\t%0, %2, %3"
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "fmacs")]
+)
+
 (define_insn "*mulsf3subsf_vfp"
   [(set (match_operand:SF		     0 "s_register_operand" "=t")
 	(minus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t")
 			   (match_operand:SF 3 "s_register_operand" "t"))
 		  (match_operand:SF	     1 "s_register_operand" "0")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fmscs%?\\t%0, %2, %3"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "vnmls%?.f32\\t%0, %2, %3"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacs")]
 )
 
@@ -819,20 +1270,33 @@
 			   (match_operand:DF 3 "s_register_operand" "w"))
 		  (match_operand:DF	     1 "s_register_operand" "0")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fmscd%?\\t%P0, %P2, %P3"
+  "vnmls%?.f64\\t%P0, %P2, %P3"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacd")]
 )
 
 ;; 0 = -(1 * 2) + 0
+(define_insn "*mulhf3neghfaddhf_vfp"
+  [(set (match_operand:HF 0 "s_register_operand" "=t")
+	(minus:HF (match_operand:HF 1 "s_register_operand" "0")
+		  (mult:HF (match_operand:HF 2 "s_register_operand" "t")
+			   (match_operand:HF 3 "s_register_operand" "t"))))]
+  "TARGET_VFP_FP16INST"
+  "vmls.f16\\t%0, %2, %3"
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "fmacs")]
+)
+
 (define_insn "*mulsf3negsfaddsf_vfp"
   [(set (match_operand:SF		     0 "s_register_operand" "=t")
 	(minus:SF (match_operand:SF	     1 "s_register_operand" "0")
 		  (mult:SF (match_operand:SF 2 "s_register_operand" "t")
 			   (match_operand:SF 3 "s_register_operand" "t"))))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fnmacs%?\\t%0, %2, %3"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "vmls%?.f32\\t%0, %2, %3"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacs")]
 )
 
@@ -842,22 +1306,36 @@
 		  (mult:DF (match_operand:DF 2 "s_register_operand" "w")
 			   (match_operand:DF 3 "s_register_operand" "w"))))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fnmacd%?\\t%P0, %P2, %P3"
+  "vmls%?.f64\\t%P0, %P2, %P3"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacd")]
 )
 
 
 ;; 0 = -(1 * 2) - 0
+(define_insn "*mulhf3neghfsubhf_vfp"
+  [(set (match_operand:HF 0 "s_register_operand" "=t")
+	(minus:HF (mult:HF
+		   (neg:HF (match_operand:HF 2 "s_register_operand" "t"))
+		   (match_operand:HF 3 "s_register_operand" "t"))
+		  (match_operand:HF 1 "s_register_operand" "0")))]
+  "TARGET_VFP_FP16INST"
+  "vnmla.f16\\t%0, %2, %3"
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "fmacs")]
+)
+
 (define_insn "*mulsf3negsfsubsf_vfp"
   [(set (match_operand:SF		      0 "s_register_operand" "=t")
 	(minus:SF (mult:SF
 		    (neg:SF (match_operand:SF 2 "s_register_operand" "t"))
 		    (match_operand:SF	      3 "s_register_operand" "t"))
 		  (match_operand:SF	      1 "s_register_operand" "0")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fnmscs%?\\t%0, %2, %3"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "vnmla%?.f32\\t%0, %2, %3"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fmacs")]
 )
 
@@ -868,9 +1346,132 @@
 		    (match_operand:DF	      3 "s_register_operand" "w"))
 		  (match_operand:DF	      1 "s_register_operand" "0")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fnmscd%?\\t%P0, %P2, %P3"
+  "vnmla%?.f64\\t%P0, %P2, %P3"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "fmacd")]
+)
+
+;; Fused-multiply-accumulate
+
+(define_insn "fmahf4"
+  [(set (match_operand:HF 0 "register_operand" "=w")
+    (fma:HF
+     (match_operand:HF 1 "register_operand" "w")
+     (match_operand:HF 2 "register_operand" "w")
+     (match_operand:HF 3 "register_operand" "0")))]
+ "TARGET_VFP_FP16INST"
+ "vfma.f16\\t%0, %1, %2"
+ [(set_attr "conds" "unconditional")
+  (set_attr "type" "ffmas")]
+)
+
+(define_expand "neon_vfmahf"
+  [(match_operand:HF 0 "s_register_operand")
+   (match_operand:HF 1 "s_register_operand")
+   (match_operand:HF 2 "s_register_operand")
+   (match_operand:HF 3 "s_register_operand")]
+  "TARGET_VFP_FP16INST"
+{
+  emit_insn (gen_fmahf4 (operands[0], operands[2], operands[3],
+			 operands[1]));
+  DONE;
+})
+
+(define_insn "fma<SDF:mode>4"
+  [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
+        (fma:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>")
+		 (match_operand:SDF 2 "register_operand" "<F_constraint>")
+		 (match_operand:SDF 3 "register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
+  "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "fmacd")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "ffma<vfp_type>")]
+)
+
+(define_insn "fmsubhf4_fp16"
+ [(set (match_operand:HF 0 "register_operand" "=w")
+   (fma:HF
+    (neg:HF (match_operand:HF 1 "register_operand" "w"))
+    (match_operand:HF 2 "register_operand" "w")
+    (match_operand:HF 3 "register_operand" "0")))]
+ "TARGET_VFP_FP16INST"
+ "vfms.f16\\t%0, %1, %2"
+ [(set_attr "conds" "unconditional")
+  (set_attr "type" "ffmas")]
+)
+
+(define_expand "neon_vfmshf"
+  [(match_operand:HF 0 "s_register_operand")
+   (match_operand:HF 1 "s_register_operand")
+   (match_operand:HF 2 "s_register_operand")
+   (match_operand:HF 3 "s_register_operand")]
+  "TARGET_VFP_FP16INST"
+{
+  emit_insn (gen_fmsubhf4_fp16 (operands[0], operands[2], operands[3],
+				operands[1]));
+  DONE;
+})
+
+(define_insn "*fmsub<SDF:mode>4"
+  [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
+	(fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand"
+					     "<F_constraint>"))
+		 (match_operand:SDF 2 "register_operand" "<F_constraint>")
+		 (match_operand:SDF 3 "register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
+  "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "ffma<vfp_type>")]
+)
+
+(define_insn "*fnmsubhf4"
+  [(set (match_operand:HF 0 "register_operand" "=w")
+	(fma:HF (match_operand:HF 1 "register_operand" "w")
+		 (match_operand:HF 2 "register_operand" "w")
+		 (neg:HF (match_operand:HF 3 "register_operand" "0"))))]
+  "TARGET_VFP_FP16INST"
+  "vfnms.f16\\t%0, %1, %2"
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "ffmas")]
+)
+
+(define_insn "*fnmsub<SDF:mode>4"
+  [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
+	(fma:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>")
+		 (match_operand:SDF 2 "register_operand" "<F_constraint>")
+		 (neg:SDF (match_operand:SDF 3 "register_operand" "0"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
+  "vfnms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "ffma<vfp_type>")]
+)
+
+(define_insn "*fnmaddhf4"
+  [(set (match_operand:HF 0 "register_operand" "=w")
+	(fma:HF (neg:HF (match_operand:HF 1 "register_operand" "w"))
+		 (match_operand:HF 2 "register_operand" "w")
+		 (neg:HF (match_operand:HF 3 "register_operand" "0"))))]
+  "TARGET_VFP_FP16INST"
+  "vfnma.f16\\t%0, %1, %2"
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "ffmas")]
+)
+
+(define_insn "*fnmadd<SDF:mode>4"
+  [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
+	(fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand"
+					       "<F_constraint>"))
+		 (match_operand:SDF 2 "register_operand" "<F_constraint>")
+		 (neg:SDF (match_operand:SDF 3 "register_operand" "0"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
+  "vfnma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "ffma<vfp_type>")]
 )
 
 
@@ -880,8 +1481,9 @@
   [(set (match_operand:DF		   0 "s_register_operand" "=w")
 	(float_extend:DF (match_operand:SF 1 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fcvtds%?\\t%P0, %1"
+  "vcvt%?.f64.f32\\t%P0, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvt")]
 )
 
@@ -889,123 +1491,182 @@
   [(set (match_operand:SF		   0 "s_register_operand" "=t")
 	(float_truncate:SF (match_operand:DF 1 "s_register_operand" "w")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fcvtsd%?\\t%0, %P1"
+  "vcvt%?.f32.f64\\t%0, %P1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvt")]
 )
 
 (define_insn "extendhfsf2"
   [(set (match_operand:SF		   0 "s_register_operand" "=t")
 	(float_extend:SF (match_operand:HF 1 "s_register_operand" "t")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FP16 || TARGET_VFP_FP16INST)"
   "vcvtb%?.f32.f16\\t%0, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvt")]
+)
+
+(define_insn "*truncdfhf2"
+  [(set (match_operand:HF		   0 "s_register_operand" "=t")
+	(float_truncate:HF (match_operand:DF 1 "s_register_operand" "w")))]
+  "TARGET_32BIT && TARGET_FP16_TO_DOUBLE"
+  "vcvtb%?.f16.f64\\t%0, %P1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvt")]
+)
+
+(define_insn "*extendhfdf2"
+  [(set (match_operand:DF		   0 "s_register_operand" "=w")
+	(float_extend:DF (match_operand:HF 1 "s_register_operand" "t")))]
+  "TARGET_32BIT && TARGET_FP16_TO_DOUBLE"
+  "vcvtb%?.f64.f16\\t%P0, %1"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvt")]
 )
 
 (define_insn "truncsfhf2"
   [(set (match_operand:HF		   0 "s_register_operand" "=t")
 	(float_truncate:HF (match_operand:SF 1 "s_register_operand" "t")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16"
+  "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FP16 || TARGET_VFP_FP16INST)"
   "vcvtb%?.f16.f32\\t%0, %1"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "f_cvt")]
 )
 
 (define_insn "*truncsisf2_vfp"
   [(set (match_operand:SI		  0 "s_register_operand" "=t")
 	(fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "ftosizs%?\\t%0, %1"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "vcvt%?.s32.f32\\t%0, %1"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "f_cvt")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvtf2i")]
 )
 
 (define_insn "*truncsidf2_vfp"
   [(set (match_operand:SI		  0 "s_register_operand" "=t")
 	(fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "w"))))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "ftosizd%?\\t%0, %P1"
+  "vcvt%?.s32.f64\\t%0, %P1"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "f_cvt")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvtf2i")]
 )
 
 
 (define_insn "fixuns_truncsfsi2"
   [(set (match_operand:SI		  0 "s_register_operand" "=t")
 	(unsigned_fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "ftouizs%?\\t%0, %1"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "vcvt%?.u32.f32\\t%0, %1"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "f_cvt")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvtf2i")]
 )
 
 (define_insn "fixuns_truncdfsi2"
   [(set (match_operand:SI		  0 "s_register_operand" "=t")
 	(unsigned_fix:SI (fix:DF (match_operand:DF 1 "s_register_operand" "t"))))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "ftouizd%?\\t%0, %P1"
+  "vcvt%?.u32.f64\\t%0, %P1"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "f_cvt")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvtf2i")]
 )
 
 
 (define_insn "*floatsisf2_vfp"
   [(set (match_operand:SF	    0 "s_register_operand" "=t")
 	(float:SF (match_operand:SI 1 "s_register_operand" "t")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fsitos%?\\t%0, %1"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "vcvt%?.f32.s32\\t%0, %1"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "f_cvt")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvti2f")]
 )
 
 (define_insn "*floatsidf2_vfp"
   [(set (match_operand:DF	    0 "s_register_operand" "=w")
 	(float:DF (match_operand:SI 1 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fsitod%?\\t%P0, %1"
+  "vcvt%?.f64.s32\\t%P0, %1"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "f_cvt")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvti2f")]
 )
 
 
 (define_insn "floatunssisf2"
   [(set (match_operand:SF	    0 "s_register_operand" "=t")
 	(unsigned_float:SF (match_operand:SI 1 "s_register_operand" "t")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fuitos%?\\t%0, %1"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "vcvt%?.f32.u32\\t%0, %1"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "f_cvt")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvti2f")]
 )
 
 (define_insn "floatunssidf2"
   [(set (match_operand:DF	    0 "s_register_operand" "=w")
 	(unsigned_float:DF (match_operand:SI 1 "s_register_operand" "t")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fuitod%?\\t%P0, %1"
+  "vcvt%?.f64.u32\\t%P0, %1"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "f_cvt")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvti2f")]
 )
 
 
 ;; Sqrt insns.
 
+(define_insn "neon_vsqrthf"
+  [(set (match_operand:HF 0 "s_register_operand" "=w")
+	(sqrt:HF (match_operand:HF 1 "s_register_operand" "w")))]
+  "TARGET_VFP_FP16INST"
+  "vsqrt.f16\t%0, %1"
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "fsqrts")]
+)
+
+(define_insn "neon_vrsqrtshf"
+  [(set
+    (match_operand:HF 0 "s_register_operand" "=w")
+    (unspec:HF [(match_operand:HF 1 "s_register_operand" "w")
+		(match_operand:HF 2 "s_register_operand" "w")]
+     UNSPEC_VRSQRTS))]
+ "TARGET_VFP_FP16INST"
+ "vrsqrts.f16\t%0, %1, %2"
+ [(set_attr "conds" "unconditional")
+  (set_attr "type" "fsqrts")]
+)
+
+; VFP9 Erratum 760019: It's potentially unsafe to overwrite the input
+; operands, so mark the output as early clobber for VFPv2 on ARMv5 or
+; earlier.
 (define_insn "*sqrtsf2_vfp"
-  [(set (match_operand:SF	   0 "s_register_operand" "=t")
-	(sqrt:SF (match_operand:SF 1 "s_register_operand" "t")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fsqrts%?\\t%0, %1"
+  [(set (match_operand:SF	   0 "s_register_operand" "=&t,t")
+	(sqrt:SF (match_operand:SF 1 "s_register_operand" "t,t")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "vsqrt%?.f32\\t%0, %1"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "fdivs")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "arch" "*,armv6_or_vfpv3")
+   (set_attr "type" "fsqrts")]
 )
 
 (define_insn "*sqrtdf2_vfp"
-  [(set (match_operand:DF	   0 "s_register_operand" "=w")
-	(sqrt:DF (match_operand:DF 1 "s_register_operand" "w")))]
+  [(set (match_operand:DF	   0 "s_register_operand" "=&w,w")
+	(sqrt:DF (match_operand:DF 1 "s_register_operand" "w,w")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
-  "fsqrtd%?\\t%P0, %P1"
+  "vsqrt%?.f64\\t%P0, %P1"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "fdivd")]
+   (set_attr "predicable_short_it" "no")
+   (set_attr "arch" "*,armv6_or_vfpv3")
+   (set_attr "type" "fsqrtd")]
 )
 
 
@@ -1014,8 +1675,8 @@
 (define_insn "*movcc_vfp"
   [(set (reg CC_REGNUM)
 	(reg VFPCC_REGNUM))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "fmstat%?"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "vmrs%?\\tAPSR_nzcv, FPSCR"
   [(set_attr "conds" "set")
    (set_attr "type" "f_flag")]
 )
@@ -1024,9 +1685,9 @@
   [(set (reg:CCFP CC_REGNUM)
 	(compare:CCFP (match_operand:SF 0 "s_register_operand"  "t")
 		      (match_operand:SF 1 "vfp_compare_operand" "tG")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
   "#"
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
   [(set (reg:CCFP VFPCC_REGNUM)
 	(compare:CCFP (match_dup 0)
 		      (match_dup 1)))
@@ -1039,9 +1700,9 @@
   [(set (reg:CCFPE CC_REGNUM)
 	(compare:CCFPE (match_operand:SF 0 "s_register_operand"  "t")
 		       (match_operand:SF 1 "vfp_compare_operand" "tG")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
   "#"
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
   [(set (reg:CCFPE VFPCC_REGNUM)
 	(compare:CCFPE (match_dup 0)
 		       (match_dup 1)))
@@ -1083,15 +1744,19 @@
 
 ;; Comparison patterns
 
+;; In the compare with FP zero case the ARM Architecture Reference Manual
+;; specifies the immediate to be #0.0.  However, some buggy assemblers only
+;; accept #0.  We don't want to autodetect broken assemblers, so output #0.
 (define_insn "*cmpsf_vfp"
   [(set (reg:CCFP VFPCC_REGNUM)
 	(compare:CCFP (match_operand:SF 0 "s_register_operand"  "t,t")
 		      (match_operand:SF 1 "vfp_compare_operand" "t,G")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
   "@
-   fcmps%?\\t%0, %1
-   fcmpzs%?\\t%0"
+   vcmp%?.f32\\t%0, %1
+   vcmp%?.f32\\t%0, #0"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fcmps")]
 )
 
@@ -1099,11 +1764,12 @@
   [(set (reg:CCFPE VFPCC_REGNUM)
 	(compare:CCFPE (match_operand:SF 0 "s_register_operand"  "t,t")
 		       (match_operand:SF 1 "vfp_compare_operand" "t,G")))]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
   "@
-   fcmpes%?\\t%0, %1
-   fcmpezs%?\\t%0"
+   vcmpe%?.f32\\t%0, %1
+   vcmpe%?.f32\\t%0, #0"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fcmps")]
 )
 
@@ -1113,9 +1779,10 @@
 		      (match_operand:DF 1 "vfp_compare_operand" "w,G")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "@
-   fcmpd%?\\t%P0, %P1
-   fcmpzd%?\\t%P0"
+   vcmp%?.f64\\t%P0, %P1
+   vcmp%?.f64\\t%P0, #0"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fcmpd")]
 )
 
@@ -1125,25 +1792,286 @@
 		       (match_operand:DF 1 "vfp_compare_operand" "w,G")))]
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
   "@
-   fcmped%?\\t%P0, %P1
-   fcmpezd%?\\t%P0"
+   vcmpe%?.f64\\t%P0, %P1
+   vcmpe%?.f64\\t%P0, #0"
   [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
    (set_attr "type" "fcmpd")]
 )
 
+;; Fixed point to floating point conversions.
+(define_insn "*combine_vcvt_f32_<FCVTI32typename>"
+  [(set (match_operand:SF 0 "s_register_operand" "=t")
+	(mult:SF (FCVT:SF (match_operand:SI 1 "s_register_operand" "0"))
+		 (match_operand 2
+			"const_double_vcvt_power_of_two_reciprocal" "Dt")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math"
+  "vcvt%?.f32.<FCVTI32typename>\\t%0, %1, %v2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvti2f")]
+)
+
+;; Not the ideal way of implementing this. Ideally we would be able to split
+;; this into a move to a DP register and then a vcvt.f64.i32
+(define_insn "*combine_vcvt_f64_<FCVTI32typename>"
+  [(set (match_operand:DF 0 "s_register_operand" "=x,x,w")
+	(mult:DF (FCVT:DF (match_operand:SI 1 "s_register_operand" "r,t,r"))
+		 (match_operand 2
+		     "const_double_vcvt_power_of_two_reciprocal" "Dt,Dt,Dt")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math
+  && !TARGET_VFP_SINGLE"
+  "@
+  vmov%?.f32\\t%0, %1\;vcvt%?.f64.<FCVTI32typename>\\t%P0, %P0, %v2
+  vmov%?.f32\\t%0, %1\;vcvt%?.f64.<FCVTI32typename>\\t%P0, %P0, %v2
+  vmov%?.f64\\t%P0, %1, %1\;vcvt%?.f64.<FCVTI32typename>\\t%P0, %P0, %v2"
+  [(set_attr "predicable" "yes")
+   (set_attr "ce_count" "2")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvti2f")
+   (set_attr "length" "8")]
+)
+
+(define_insn "*combine_vcvtf2i"
+  [(set (match_operand:SI 0 "s_register_operand" "=t")
+	(fix:SI (fix:SF (mult:SF (match_operand:SF 1 "s_register_operand" "0")
+				 (match_operand 2
+				 "const_double_vcvt_power_of_two" "Dp")))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP3 && !flag_rounding_math"
+  "vcvt%?.s32.f32\\t%0, %1, %v2"
+  [(set_attr "predicable" "yes")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_cvtf2i")]
+ )
+
+;; FP16 conversions.
+(define_insn "neon_vcvth<sup>hf"
+ [(set (match_operand:HF 0 "s_register_operand" "=w")
+   (unspec:HF
+    [(match_operand:SI 1 "s_register_operand" "w")]
+    VCVTH_US))]
+ "TARGET_VFP_FP16INST"
+ "vcvt.f16.<sup>%#32\t%0, %1"
+ [(set_attr "conds" "unconditional")
+  (set_attr "type" "f_cvti2f")]
+)
+
+(define_insn "neon_vcvth<sup>si"
+ [(set (match_operand:SI 0 "s_register_operand" "=w")
+   (unspec:SI
+    [(match_operand:HF 1 "s_register_operand" "w")]
+    VCVTH_US))]
+ "TARGET_VFP_FP16INST"
+ "vcvt.<sup>%#32.f16\t%0, %1"
+ [(set_attr "conds" "unconditional")
+  (set_attr "type" "f_cvtf2i")]
+)
+
+;; The neon_vcvth<sup>_nhf patterns are used to generate the instruction for the
+;; vcvth_n_f16_<sup>32 arm_fp16 intrinsics.  They are complicated by the
+;; hardware requirement that the source and destination registers are the same
+;; despite having different machine modes.  The approach is to use a temporary
+;; register for the conversion and move that to the correct destination.
+
+;; Generate an unspec pattern for the intrinsic.
+(define_insn "neon_vcvth<sup>_nhf_unspec"
+ [(set
+   (match_operand:SI 0 "s_register_operand" "=w")
+   (unspec:SI
+    [(match_operand:SI 1 "s_register_operand" "0")
+     (match_operand:SI 2 "immediate_operand" "i")]
+    VCVT_HF_US_N))
+ (set
+  (match_operand:HF 3 "s_register_operand" "=w")
+  (float_truncate:HF (float:SF (match_dup 0))))]
+ "TARGET_VFP_FP16INST"
+{
+  arm_const_bounds (operands[2], 1, 33);
+  return "vcvt.f16.<sup>32\t%0, %0, %2\;vmov.f32\t%3, %0";
+}
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "f_cvti2f")]
+)
+
+;; Generate the instruction patterns needed for vcvth_n_f16_s32 neon intrinsics.
+(define_expand "neon_vcvth<sup>_nhf"
+ [(match_operand:HF 0 "s_register_operand")
+  (unspec:HF [(match_operand:SI 1 "s_register_operand")
+	      (match_operand:SI 2 "immediate_operand")]
+   VCVT_HF_US_N)]
+"TARGET_VFP_FP16INST"
+{
+  rtx op1 = gen_reg_rtx (SImode);
+
+  arm_const_bounds (operands[2], 1, 33);
+
+  emit_move_insn (op1, operands[1]);
+  emit_insn (gen_neon_vcvth<sup>_nhf_unspec (op1, op1, operands[2],
+					     operands[0]));
+  DONE;
+})
+
+;; The neon_vcvth<sup>_nsi patterns are used to generate the instruction for the
+;; vcvth_n_<sup>32_f16 arm_fp16 intrinsics.  They have the same restrictions and
+;; are implemented in the same way as the neon_vcvth<sup>_nhf patterns.
+
+;; Generate an unspec pattern, constraining the registers.
+(define_insn "neon_vcvth<sup>_nsi_unspec"
+ [(set (match_operand:SI 0 "s_register_operand" "=w")
+   (unspec:SI
+    [(fix:SI
+      (fix:SF
+       (float_extend:SF
+	(match_operand:HF 1 "s_register_operand" "w"))))
+     (match_operand:SI 2 "immediate_operand" "i")]
+    VCVT_SI_US_N))]
+ "TARGET_VFP_FP16INST"
+{
+  arm_const_bounds (operands[2], 1, 33);
+  return "vmov.f32\t%0, %1\;vcvt.<sup>%#32.f16\t%0, %0, %2";
+}
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "f_cvtf2i")]
+)
+
+;; Generate the instruction patterns needed for vcvth_n_f16_s32 neon intrinsics.
+(define_expand "neon_vcvth<sup>_nsi"
+ [(match_operand:SI 0 "s_register_operand")
+  (unspec:SI
+   [(match_operand:HF 1 "s_register_operand")
+    (match_operand:SI 2 "immediate_operand")]
+   VCVT_SI_US_N)]
+ "TARGET_VFP_FP16INST"
+{
+  rtx op1 = gen_reg_rtx (SImode);
+
+  arm_const_bounds (operands[2], 1, 33);
+  emit_insn (gen_neon_vcvth<sup>_nsi_unspec (op1, operands[1], operands[2]));
+  emit_move_insn (operands[0], op1);
+  DONE;
+})
+
+(define_insn "neon_vcvt<vcvth_op>h<sup>si"
+ [(set
+   (match_operand:SI 0 "s_register_operand" "=w")
+   (unspec:SI
+    [(match_operand:HF 1 "s_register_operand" "w")]
+    VCVT_HF_US))]
+ "TARGET_VFP_FP16INST"
+ "vcvt<vcvth_op>.<sup>%#32.f16\t%0, %1"
+  [(set_attr "conds" "unconditional")
+   (set_attr "type" "f_cvtf2i")]
+)
 
 ;; Store multiple insn used in function prologue.
-
 (define_insn "*push_multi_vfp"
   [(match_parallel 2 "multi_register_push"
     [(set (match_operand:BLK 0 "memory_operand" "=m")
 	  (unspec:BLK [(match_operand:DF 1 "vfp_register_operand" "")]
 		      UNSPEC_PUSH_MULT))])]
-  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
-  "* return vfp_output_fstmd (operands);"
+  "TARGET_32BIT && TARGET_HARD_FLOAT"
+  "* return vfp_output_vstmd (operands);"
   [(set_attr "type" "f_stored")]
 )
 
+;; VRINT round to integral instructions.
+;; Invoked for the patterns: btruncsf2, btruncdf2, ceilsf2, ceildf2,
+;; roundsf2, rounddf2, floorsf2, floordf2, nearbyintsf2, nearbyintdf2,
+;; rintsf2, rintdf2.
+(define_insn "<vrint_pattern><SDF:mode>2"
+  [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
+        (unspec:SDF [(match_operand:SDF 1
+		         "register_operand" "<F_constraint>")]
+         VRINT))]
+  "TARGET_HARD_FLOAT && TARGET_VFP5 <vfp_double_cond>"
+  "vrint<vrint_variant>%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1"
+  [(set_attr "predicable" "<vrint_predicable>")
+   (set_attr "predicable_short_it" "no")
+   (set_attr "type" "f_rint<vfp_type>")
+   (set_attr "conds" "<vrint_conds>")]
+)
+
+;; Implements the lround, lfloor and lceil optabs.
+(define_insn "l<vrint_pattern><su_optab><mode>si2"
+  [(set (match_operand:SI 0 "register_operand" "=t")
+        (FIXUORS:SI (unspec:SDF
+                        [(match_operand:SDF 1
+                           "register_operand" "<F_constraint>")] VCVT)))]
+  "TARGET_HARD_FLOAT && TARGET_VFP5 <vfp_double_cond>"
+  "vcvt<vrint_variant>.<su>32.<V_if_elem>\\t%0, %<V_reg>1"
+  [(set_attr "predicable" "no")
+   (set_attr "conds" "unconditional")
+   (set_attr "type" "f_cvtf2i")]
+)
+
+;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
+;; The 'smax' and 'smin' RTL standard pattern names do not specify which
+;; operand will be returned when both operands are zero (i.e. they may not
+;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
+;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
+;; NaNs.
+
+(define_insn "smax<mode>3"
+  [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
+        (smax:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>")
+		  (match_operand:SDF 2 "register_operand" "<F_constraint>")))]
+  "TARGET_HARD_FLOAT && TARGET_VFP5 <vfp_double_cond>"
+  "vmaxnm.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "f_minmax<vfp_type>")
+   (set_attr "conds" "unconditional")]
+)
+
+(define_insn "smin<mode>3"
+  [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>")
+        (smin:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>")
+		  (match_operand:SDF 2 "register_operand" "<F_constraint>")))]
+  "TARGET_HARD_FLOAT && TARGET_VFP5 <vfp_double_cond>"
+  "vminnm.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "f_minmax<vfp_type>")
+   (set_attr "conds" "unconditional")]
+)
+
+;; Scalar forms for the IEEE-754 fmax()/fmin() functions
+
+(define_insn "neon_<fmaxmin_op>hf"
+ [(set
+   (match_operand:HF 0 "s_register_operand" "=w")
+   (unspec:HF
+    [(match_operand:HF 1 "s_register_operand" "w")
+     (match_operand:HF 2 "s_register_operand" "w")]
+    VMAXMINFNM))]
+ "TARGET_VFP_FP16INST"
+ "<fmaxmin_op>.f16\t%0, %1, %2"
+ [(set_attr "conds" "unconditional")
+  (set_attr "type" "f_minmaxs")]
+)
+
+(define_insn "<fmaxmin><mode>3"
+  [(set (match_operand:SDF 0 "s_register_operand" "=<F_constraint>")
+	(unspec:SDF [(match_operand:SDF 1 "s_register_operand" "<F_constraint>")
+		     (match_operand:SDF 2 "s_register_operand" "<F_constraint>")]
+		     VMAXMINFNM))]
+  "TARGET_HARD_FLOAT && TARGET_VFP5 <vfp_double_cond>"
+  "<fmaxmin_op>.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "type" "f_minmax<vfp_type>")
+   (set_attr "conds" "unconditional")]
+)
+
+;; Write Floating-point Status and Control Register.
+(define_insn "set_fpscr"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] VUNSPEC_SET_FPSCR)]
+  "TARGET_HARD_FLOAT"
+  "mcr\\tp10, 7, %0, cr1, cr0, 0\\t @SET_FPSCR"
+  [(set_attr "type" "mrs")])
+
+;; Read Floating-point Status and Control Register.
+(define_insn "get_fpscr"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec_volatile:SI [(const_int 0)] VUNSPEC_GET_FPSCR))]
+  "TARGET_HARD_FLOAT"
+  "mrc\\tp10, 7, %0, cr1, cr0, 0\\t @GET_FPSCR"
+  [(set_attr "type" "mrs")])
+
 
 ;; Unimplemented insns:
 ;; fldm*
@@ -1151,3 +2079,40 @@
 ;; fmdhr et al (VFPv1)
 ;; Support for xD (single precision only) variants.
 ;; fmrrs, fmsrr
+
+;; Split an immediate DF move to two immediate SI moves.
+(define_insn_and_split "no_literal_pool_df_immediate"
+  [(set (match_operand:DF 0 "s_register_operand" "")
+	(match_operand:DF 1 "const_double_operand" ""))]
+  "TARGET_THUMB2 && arm_disable_literal_pool
+  && !(TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE
+       && vfp3_const_double_rtx (operands[1]))"
+  "#"
+  "&& !reload_completed"
+  [(set (subreg:SI (match_dup 1) 0) (match_dup 2))
+   (set (subreg:SI (match_dup 1) 4) (match_dup 3))
+   (set (match_dup 0) (match_dup 1))]
+  "
+  long buf[2];
+  real_to_target (buf, CONST_DOUBLE_REAL_VALUE (operands[1]), DFmode);
+  operands[2] = GEN_INT ((int) buf[0]);
+  operands[3] = GEN_INT ((int) buf[1]);
+  operands[1] = gen_reg_rtx (DFmode);
+  ")
+
+;; Split an immediate SF move to one immediate SI move.
+(define_insn_and_split "no_literal_pool_sf_immediate"
+  [(set (match_operand:SF 0 "s_register_operand" "")
+	(match_operand:SF 1 "const_double_operand" ""))]
+  "TARGET_THUMB2 && arm_disable_literal_pool
+  && !(TARGET_HARD_FLOAT && vfp3_const_double_rtx (operands[1]))"
+  "#"
+  "&& !reload_completed"
+  [(set (subreg:SI (match_dup 1) 0) (match_dup 2))
+   (set (match_dup 0) (match_dup 1))]
+  "
+  long buf;
+  real_to_target (&buf, CONST_DOUBLE_REAL_VALUE (operands[1]), SFmode);
+  operands[2] = GEN_INT ((int) buf);
+  operands[1] = gen_reg_rtx (SFmode);
+  ")
author	kono
date	Fri, 27 Oct 2017 22:46:09 +0900
parents	f6334be47118
children	84e7813d76e9