diff gcc/config/arm/cortex-a9.md @ 67:f6334be47118

update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Tue, 22 Mar 2011 17:18:12 +0900
parents 77e2b8dfacca
children 04ced10e8804
line wrap: on
line diff
--- a/gcc/config/arm/cortex-a9.md	Tue May 25 18:58:51 2010 +0900
+++ b/gcc/config/arm/cortex-a9.md	Tue Mar 22 17:18:12 2011 +0900
@@ -1,9 +1,11 @@
 ;; ARM Cortex-A9 pipeline description
-;; Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+;; Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
 ;; Originally written by CodeSourcery for VFP.
 ;;
-;; Integer core pipeline description contributed by ARM Ltd.
-;;
+;; Rewritten by Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
+;; Integer Pipeline description contributed by ARM Ltd.
+;; VFP Pipeline description rewritten and contributed by ARM Ltd.
+
 ;; This file is part of GCC.
 ;;
 ;; GCC is free software; you can redistribute it and/or modify it
@@ -22,28 +24,27 @@
 
 (define_automaton "cortex_a9")
 
-;; The Cortex-A9 integer core is modelled as a dual issue pipeline that has
+;; The Cortex-A9 core is modelled as a dual issue pipeline that has
 ;; the following components.
 ;; 1. 1 Load Store Pipeline.
 ;; 2. P0 / main pipeline for data processing instructions.
 ;; 3. P1 / Dual pipeline for Data processing instructions.
 ;; 4. MAC pipeline for multiply as well as multiply
 ;;    and accumulate instructions.
-;; 5. 1 VFP / Neon pipeline.
-;; The Load/Store and VFP/Neon pipeline are multiplexed.
+;; 5. 1 VFP and an optional Neon unit.
+;; The Load/Store, VFP and Neon issue pipeline are multiplexed.
 ;; The P0 / main pipeline and M1 stage of the MAC pipeline are
 ;;   multiplexed.
 ;; The P1 / dual pipeline and M2 stage of the MAC pipeline are
 ;;   multiplexed.
-;; There are only 4 register read ports and hence at any point of
+;; There are only 4 integer register read ports and hence at any point of
 ;; time we can't have issue down the E1 and the E2 ports unless
 ;; of course there are bypass paths that get exercised.
 ;; Both P0 and P1 have 2 stages E1 and E2.
 ;; Data processing instructions issue to E1 or E2 depending on
 ;; whether they have an early shift or not.
 
-
-(define_cpu_unit "cortex_a9_vfp, cortex_a9_ls" "cortex_a9")
+(define_cpu_unit "ca9_issue_vfp_neon, cortex_a9_ls" "cortex_a9")
 (define_cpu_unit "cortex_a9_p0_e1, cortex_a9_p0_e2" "cortex_a9")
 (define_cpu_unit "cortex_a9_p1_e1, cortex_a9_p1_e2" "cortex_a9")
 (define_cpu_unit "cortex_a9_p0_wb, cortex_a9_p1_wb" "cortex_a9")
@@ -71,20 +72,18 @@
 
 ;; Issue at the same time along the load store pipeline and
 ;; the VFP / Neon pipeline is not possible.
-;; FIXME:: At some point we need to model the issue
-;; of the load store and the vfp being shared rather than anything else.
-
-(exclusion_set "cortex_a9_ls" "cortex_a9_vfp")
-
+(exclusion_set "cortex_a9_ls" "ca9_issue_vfp_neon")
 
 ;; Default data processing instruction without any shift
 ;; The only exception to this is the mov instruction
 ;; which can go down E2 without any problem.
 (define_insn_reservation "cortex_a9_dp" 2
   (and (eq_attr "tune" "cortexa9")
-       (ior (eq_attr "type" "alu")
-	    (and (eq_attr "type" "alu_shift_reg, alu_shift")
-		 (eq_attr "insn" "mov"))))
+         (ior (and (eq_attr "type" "alu")
+                        (eq_attr "neon_type" "none"))
+	      (and (and (eq_attr "type" "alu_shift_reg, alu_shift")
+			(eq_attr "insn" "mov"))
+                 (eq_attr "neon_type" "none"))))
   "cortex_a9_p0_default|cortex_a9_p1_default")
 
 ;; An instruction using the shifter will go down E1.
@@ -101,18 +100,13 @@
 
 (define_insn_reservation "cortex_a9_load1_2" 4
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "type" "load1, load2, load_byte"))
+       (eq_attr "type" "load1, load2, load_byte, f_loads, f_loadd"))
   "cortex_a9_ls")
 
 ;; Loads multiples and store multiples can't be issued for 2 cycles in a
 ;; row. The description below assumes that addresses are 64 bit aligned.
 ;; If not, there is an extra cycle latency which is not modelled.
 
-;; FIXME:: This bit might need to be reworked when we get to
-;; tuning for the VFP because strictly speaking the ldm
-;; is sent to the LSU unit as is and there is only an
-;; issue restriction between the LSU and the VFP/ Neon unit.
-
 (define_insn_reservation "cortex_a9_load3_4" 5
   (and (eq_attr "tune" "cortexa9")
        (eq_attr "type" "load3, load4"))
@@ -120,12 +114,13 @@
 
 (define_insn_reservation "cortex_a9_store1_2" 0
   (and (eq_attr "tune" "cortexa9")
-       (eq_attr "type" "store1, store2"))
+       (eq_attr "type" "store1, store2, f_stores, f_stored"))
   "cortex_a9_ls")
 
 ;; Almost all our store multiples use an auto-increment
 ;; form. Don't issue back to back load and store multiples
 ;; because the load store unit will stall.
+
 (define_insn_reservation "cortex_a9_store3_4" 0
   (and (eq_attr "tune" "cortexa9")
        (eq_attr "type" "store3, store4"))
@@ -193,47 +188,82 @@
 (define_insn_reservation "cortex_a9_call"  0
   (and (eq_attr "tune" "cortexa9")
        (eq_attr "type" "call"))
-  "cortex_a9_issue_branch + cortex_a9_multcycle1 + cortex_a9_ls + cortex_a9_vfp")
+  "cortex_a9_issue_branch + cortex_a9_multcycle1 + cortex_a9_ls + ca9_issue_vfp_neon")
 
 
 ;; Pipelining for VFP instructions.
+;; Issue happens either along load store unit or the VFP / Neon unit.
+;; Pipeline   Instruction Classification.
+;; FPS - fcpys, ffariths, ffarithd,r_2_f,f_2_r
+;; FP_ADD   - fadds, faddd, fcmps (1)
+;; FPMUL   - fmul{s,d}, fmac{s,d}
+;; FPDIV - fdiv{s,d}
+(define_cpu_unit "ca9fps" "cortex_a9")
+(define_cpu_unit "ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4" "cortex_a9")
+(define_cpu_unit "ca9fp_mul1, ca9fp_mul2 , ca9fp_mul3, ca9fp_mul4" "cortex_a9")
+(define_cpu_unit "ca9fp_ds1" "cortex_a9")
 
-(define_insn_reservation "cortex_a9_ffarith" 1
+
+;; fmrs, fmrrd, fmstat and fmrx - The data is available after 1 cycle.
+(define_insn_reservation "cortex_a9_fps" 2
  (and (eq_attr "tune" "cortexa9")
-      (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd,fconsts,fconstd"))
- "cortex_a9_vfp")
+      (eq_attr "type" "fcpys, fconsts, fconstd, ffariths, ffarithd, r_2_f, f_2_r, f_flag"))
+ "ca9_issue_vfp_neon + ca9fps")
+
+(define_bypass 1
+  "cortex_a9_fps"
+  "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply")
+
+;; Scheduling on the FP_ADD pipeline.
+(define_reservation "ca9fp_add" "ca9_issue_vfp_neon + ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4")
 
 (define_insn_reservation "cortex_a9_fadd" 4
- (and (eq_attr "tune" "cortexa9")
-      (eq_attr "type" "fadds,faddd,f_cvt"))
- "cortex_a9_vfp")
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fadds, faddd, f_cvt"))
+  "ca9fp_add")
+
+(define_insn_reservation "cortex_a9_fcmp" 1
+  (and (eq_attr "tune" "cortexa9")
+      (eq_attr "type" "fcmps, fcmpd"))
+ "ca9_issue_vfp_neon + ca9fp_add1")
+
+;; Scheduling for the Multiply and MAC instructions.
+(define_reservation "ca9fmuls"
+  "ca9fp_mul1 + ca9_issue_vfp_neon, ca9fp_mul2, ca9fp_mul3, ca9fp_mul4")
 
-(define_insn_reservation "cortex_a9_fmuls" 5
- (and (eq_attr "tune" "cortexa9")
-      (eq_attr "type" "fmuls"))
- "cortex_a9_vfp")
+(define_reservation "ca9fmuld"
+  "ca9fp_mul1 + ca9_issue_vfp_neon, (ca9fp_mul1 + ca9fp_mul2), ca9fp_mul2, ca9fp_mul3, ca9fp_mul4")
 
-(define_insn_reservation "cortex_a9_fmuld" 6
- (and (eq_attr "tune" "cortexa9")
-      (eq_attr "type" "fmuld"))
- "cortex_a9_vfp*2")
+(define_insn_reservation "cortex_a9_fmuls" 4
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fmuls"))
+  "ca9fmuls")
+
+(define_insn_reservation "cortex_a9_fmuld" 5
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fmuld"))
+  "ca9fmuld")
 
 (define_insn_reservation "cortex_a9_fmacs" 8
- (and (eq_attr "tune" "cortexa9")
-      (eq_attr "type" "fmacs"))
- "cortex_a9_vfp")
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fmacs"))
+  "ca9fmuls, ca9fp_add")
 
-(define_insn_reservation "cortex_a9_fmacd" 8
- (and (eq_attr "tune" "cortexa9")
-      (eq_attr "type" "fmacd"))
- "cortex_a9_vfp*2")
+(define_insn_reservation "cortex_a9_fmacd" 9
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fmacd"))
+  "ca9fmuld, ca9fp_add")
 
+;; Division pipeline description.
 (define_insn_reservation "cortex_a9_fdivs" 15
- (and (eq_attr "tune" "cortexa9")
-      (eq_attr "type" "fdivs"))
- "cortex_a9_vfp*10")
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fdivs"))
+  "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*14")
 
 (define_insn_reservation "cortex_a9_fdivd" 25
- (and (eq_attr "tune" "cortexa9")
-      (eq_attr "type" "fdivd"))
- "cortex_a9_vfp*20")
+  (and (eq_attr "tune" "cortexa9")
+       (eq_attr "type" "fdivd"))
+  "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*24")
+
+;; Include Neon pipeline description
+(include "cortex-a9-neon.md")