Mercurial > hg > CbC > CbC_gcc
diff gcc/config/arm/cortex-a9.md @ 67:f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
author | nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 22 Mar 2011 17:18:12 +0900 |
parents | 77e2b8dfacca |
children | 04ced10e8804 |
line wrap: on
line diff
--- a/gcc/config/arm/cortex-a9.md Tue May 25 18:58:51 2010 +0900 +++ b/gcc/config/arm/cortex-a9.md Tue Mar 22 17:18:12 2011 +0900 @@ -1,9 +1,11 @@ ;; ARM Cortex-A9 pipeline description -;; Copyright (C) 2008, 2009 Free Software Foundation, Inc. +;; Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc. ;; Originally written by CodeSourcery for VFP. ;; -;; Integer core pipeline description contributed by ARM Ltd. -;; +;; Rewritten by Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> +;; Integer Pipeline description contributed by ARM Ltd. +;; VFP Pipeline description rewritten and contributed by ARM Ltd. + ;; This file is part of GCC. ;; ;; GCC is free software; you can redistribute it and/or modify it @@ -22,28 +24,27 @@ (define_automaton "cortex_a9") -;; The Cortex-A9 integer core is modelled as a dual issue pipeline that has +;; The Cortex-A9 core is modelled as a dual issue pipeline that has ;; the following components. ;; 1. 1 Load Store Pipeline. ;; 2. P0 / main pipeline for data processing instructions. ;; 3. P1 / Dual pipeline for Data processing instructions. ;; 4. MAC pipeline for multiply as well as multiply ;; and accumulate instructions. -;; 5. 1 VFP / Neon pipeline. -;; The Load/Store and VFP/Neon pipeline are multiplexed. +;; 5. 1 VFP and an optional Neon unit. +;; The Load/Store, VFP and Neon issue pipeline are multiplexed. ;; The P0 / main pipeline and M1 stage of the MAC pipeline are ;; multiplexed. ;; The P1 / dual pipeline and M2 stage of the MAC pipeline are ;; multiplexed. -;; There are only 4 register read ports and hence at any point of +;; There are only 4 integer register read ports and hence at any point of ;; time we can't have issue down the E1 and the E2 ports unless ;; of course there are bypass paths that get exercised. ;; Both P0 and P1 have 2 stages E1 and E2. ;; Data processing instructions issue to E1 or E2 depending on ;; whether they have an early shift or not. - -(define_cpu_unit "cortex_a9_vfp, cortex_a9_ls" "cortex_a9") +(define_cpu_unit "ca9_issue_vfp_neon, cortex_a9_ls" "cortex_a9") (define_cpu_unit "cortex_a9_p0_e1, cortex_a9_p0_e2" "cortex_a9") (define_cpu_unit "cortex_a9_p1_e1, cortex_a9_p1_e2" "cortex_a9") (define_cpu_unit "cortex_a9_p0_wb, cortex_a9_p1_wb" "cortex_a9") @@ -71,20 +72,18 @@ ;; Issue at the same time along the load store pipeline and ;; the VFP / Neon pipeline is not possible. -;; FIXME:: At some point we need to model the issue -;; of the load store and the vfp being shared rather than anything else. - -(exclusion_set "cortex_a9_ls" "cortex_a9_vfp") - +(exclusion_set "cortex_a9_ls" "ca9_issue_vfp_neon") ;; Default data processing instruction without any shift ;; The only exception to this is the mov instruction ;; which can go down E2 without any problem. (define_insn_reservation "cortex_a9_dp" 2 (and (eq_attr "tune" "cortexa9") - (ior (eq_attr "type" "alu") - (and (eq_attr "type" "alu_shift_reg, alu_shift") - (eq_attr "insn" "mov")))) + (ior (and (eq_attr "type" "alu") + (eq_attr "neon_type" "none")) + (and (and (eq_attr "type" "alu_shift_reg, alu_shift") + (eq_attr "insn" "mov")) + (eq_attr "neon_type" "none")))) "cortex_a9_p0_default|cortex_a9_p1_default") ;; An instruction using the shifter will go down E1. @@ -101,18 +100,13 @@ (define_insn_reservation "cortex_a9_load1_2" 4 (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "load1, load2, load_byte")) + (eq_attr "type" "load1, load2, load_byte, f_loads, f_loadd")) "cortex_a9_ls") ;; Loads multiples and store multiples can't be issued for 2 cycles in a ;; row. The description below assumes that addresses are 64 bit aligned. ;; If not, there is an extra cycle latency which is not modelled. -;; FIXME:: This bit might need to be reworked when we get to -;; tuning for the VFP because strictly speaking the ldm -;; is sent to the LSU unit as is and there is only an -;; issue restriction between the LSU and the VFP/ Neon unit. - (define_insn_reservation "cortex_a9_load3_4" 5 (and (eq_attr "tune" "cortexa9") (eq_attr "type" "load3, load4")) @@ -120,12 +114,13 @@ (define_insn_reservation "cortex_a9_store1_2" 0 (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "store1, store2")) + (eq_attr "type" "store1, store2, f_stores, f_stored")) "cortex_a9_ls") ;; Almost all our store multiples use an auto-increment ;; form. Don't issue back to back load and store multiples ;; because the load store unit will stall. + (define_insn_reservation "cortex_a9_store3_4" 0 (and (eq_attr "tune" "cortexa9") (eq_attr "type" "store3, store4")) @@ -193,47 +188,82 @@ (define_insn_reservation "cortex_a9_call" 0 (and (eq_attr "tune" "cortexa9") (eq_attr "type" "call")) - "cortex_a9_issue_branch + cortex_a9_multcycle1 + cortex_a9_ls + cortex_a9_vfp") + "cortex_a9_issue_branch + cortex_a9_multcycle1 + cortex_a9_ls + ca9_issue_vfp_neon") ;; Pipelining for VFP instructions. +;; Issue happens either along load store unit or the VFP / Neon unit. +;; Pipeline Instruction Classification. +;; FPS - fcpys, ffariths, ffarithd,r_2_f,f_2_r +;; FP_ADD - fadds, faddd, fcmps (1) +;; FPMUL - fmul{s,d}, fmac{s,d} +;; FPDIV - fdiv{s,d} +(define_cpu_unit "ca9fps" "cortex_a9") +(define_cpu_unit "ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4" "cortex_a9") +(define_cpu_unit "ca9fp_mul1, ca9fp_mul2 , ca9fp_mul3, ca9fp_mul4" "cortex_a9") +(define_cpu_unit "ca9fp_ds1" "cortex_a9") -(define_insn_reservation "cortex_a9_ffarith" 1 + +;; fmrs, fmrrd, fmstat and fmrx - The data is available after 1 cycle. +(define_insn_reservation "cortex_a9_fps" 2 (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd,fconsts,fconstd")) - "cortex_a9_vfp") + (eq_attr "type" "fcpys, fconsts, fconstd, ffariths, ffarithd, r_2_f, f_2_r, f_flag")) + "ca9_issue_vfp_neon + ca9fps") + +(define_bypass 1 + "cortex_a9_fps" + "cortex_a9_fadd, cortex_a9_fps, cortex_a9_fcmp, cortex_a9_dp, cortex_a9_dp_shift, cortex_a9_multiply") + +;; Scheduling on the FP_ADD pipeline. +(define_reservation "ca9fp_add" "ca9_issue_vfp_neon + ca9fp_add1, ca9fp_add2, ca9fp_add3, ca9fp_add4") (define_insn_reservation "cortex_a9_fadd" 4 - (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "fadds,faddd,f_cvt")) - "cortex_a9_vfp") + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fadds, faddd, f_cvt")) + "ca9fp_add") + +(define_insn_reservation "cortex_a9_fcmp" 1 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fcmps, fcmpd")) + "ca9_issue_vfp_neon + ca9fp_add1") + +;; Scheduling for the Multiply and MAC instructions. +(define_reservation "ca9fmuls" + "ca9fp_mul1 + ca9_issue_vfp_neon, ca9fp_mul2, ca9fp_mul3, ca9fp_mul4") -(define_insn_reservation "cortex_a9_fmuls" 5 - (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "fmuls")) - "cortex_a9_vfp") +(define_reservation "ca9fmuld" + "ca9fp_mul1 + ca9_issue_vfp_neon, (ca9fp_mul1 + ca9fp_mul2), ca9fp_mul2, ca9fp_mul3, ca9fp_mul4") -(define_insn_reservation "cortex_a9_fmuld" 6 - (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "fmuld")) - "cortex_a9_vfp*2") +(define_insn_reservation "cortex_a9_fmuls" 4 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fmuls")) + "ca9fmuls") + +(define_insn_reservation "cortex_a9_fmuld" 5 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fmuld")) + "ca9fmuld") (define_insn_reservation "cortex_a9_fmacs" 8 - (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "fmacs")) - "cortex_a9_vfp") + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fmacs")) + "ca9fmuls, ca9fp_add") -(define_insn_reservation "cortex_a9_fmacd" 8 - (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "fmacd")) - "cortex_a9_vfp*2") +(define_insn_reservation "cortex_a9_fmacd" 9 + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fmacd")) + "ca9fmuld, ca9fp_add") +;; Division pipeline description. (define_insn_reservation "cortex_a9_fdivs" 15 - (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "fdivs")) - "cortex_a9_vfp*10") + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fdivs")) + "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*14") (define_insn_reservation "cortex_a9_fdivd" 25 - (and (eq_attr "tune" "cortexa9") - (eq_attr "type" "fdivd")) - "cortex_a9_vfp*20") + (and (eq_attr "tune" "cortexa9") + (eq_attr "type" "fdivd")) + "ca9fp_ds1 + ca9_issue_vfp_neon, nothing*24") + +;; Include Neon pipeline description +(include "cortex-a9-neon.md")