diff gcc/config/mips/10000.md @ 0:a06113de4d67

first commit
author kent <kent@cr.ie.u-ryukyu.ac.jp>
date Fri, 17 Jul 2009 14:47:48 +0900
parents
children 04ced10e8804
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gcc/config/mips/10000.md	Fri Jul 17 14:47:48 2009 +0900
@@ -0,0 +1,253 @@
+;; DFA-based pipeline description for the VR1x000.
+;;   Copyright (C) 2005, 2006, 2008 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+
+;; R12K/R14K/R16K are derivatives of R10K, thus copy its description
+;; until specific tuning for each is added.
+
+;; R10000 has an int queue, fp queue, address queue.
+;; The int queue feeds ALU1 and ALU2.
+;; The fp queue feeds the fp-adder and fp-multiplier.
+;; The addr queue feeds the Load/Store unit.
+;;
+;; However, we define the fp-adder and fp-multiplier as
+;; separate automatons, because the fp-multiplier is
+;; divided into fp-multiplier, fp-division, and
+;; fp-squareroot units, all of which share the same
+;; issue and completion logic, yet can operate in
+;; parallel.
+;;
+;; This is based on the model described in the R10K Manual
+;; and it helps to reduce the size of the automata.
+(define_automaton "r10k_a_int, r10k_a_fpadder, r10k_a_addr,
+                   r10k_a_fpmpy, r10k_a_fpdiv, r10k_a_fpsqrt")
+
+(define_cpu_unit "r10k_alu1" "r10k_a_int")
+(define_cpu_unit "r10k_alu2" "r10k_a_int")
+(define_cpu_unit "r10k_fpadd" "r10k_a_fpadder")
+(define_cpu_unit "r10k_fpmpy" "r10k_a_fpmpy")
+(define_cpu_unit "r10k_fpdiv" "r10k_a_fpdiv")
+(define_cpu_unit "r10k_fpsqrt" "r10k_a_fpsqrt")
+(define_cpu_unit "r10k_loadstore" "r10k_a_addr")
+
+
+;; R10k Loads and Stores.
+(define_insn_reservation "r10k_load" 2
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "load,prefetch,prefetchx"))
+  "r10k_loadstore")
+
+(define_insn_reservation "r10k_store" 0
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "store,fpstore,fpidxstore"))
+  "r10k_loadstore")
+
+(define_insn_reservation "r10k_fpload" 3
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "fpload,fpidxload"))
+  "r10k_loadstore")
+
+
+;; Integer add/sub + logic ops, and mt hi/lo can be done by alu1 or alu2.
+;; Miscellaneous arith goes here too (this is a guess).
+(define_insn_reservation "r10k_arith" 1
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "arith,mthilo,slt,clz,const,nop,trap,logical"))
+  "r10k_alu1 | r10k_alu2")
+
+;; We treat mfhilo differently, because we need to know when
+;; it's HI and when it's LO.
+(define_insn_reservation "r10k_mfhi" 1
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "mfhilo")
+            (not (match_operand 1 "lo_operand"))))
+  "r10k_alu1 | r10k_alu2")
+
+(define_insn_reservation "r10k_mflo" 1
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "mfhilo")
+            (match_operand 1 "lo_operand")))
+  "r10k_alu1 | r10k_alu2")
+
+
+;; ALU1 handles shifts, branch eval, and condmove.
+;;
+;; Brancher is separate, but part of ALU1, but can only
+;; do one branch per cycle (is this even implementable?).
+;;
+;; Unsure if the brancher handles jumps and calls as well, but since
+;; they're related, we'll add them here for now.
+(define_insn_reservation "r10k_brancher" 1
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "shift,branch,jump,call"))
+  "r10k_alu1")
+
+(define_insn_reservation "r10k_int_cmove" 1
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "condmove")
+            (eq_attr "mode" "SI,DI")))
+  "r10k_alu1")
+
+
+;; Coprocessor Moves.
+;; mtc1/dmtc1 are handled by ALU1.
+;; mfc1/dmfc1 are handled by the fp-multiplier.
+(define_insn_reservation "r10k_mt_xfer" 3
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "mtc"))
+  "r10k_alu1")
+
+(define_insn_reservation "r10k_mf_xfer" 2
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "mfc"))
+  "r10k_fpmpy")
+
+
+;; Only ALU2 does int multiplications and divisions.
+;;
+;; According to the Vr10000 series user manual,
+;; integer mult and div insns can be issued one
+;; cycle earlier if using register Lo.  We model
+;; this by using the Lo value by default, as it
+;; is the more common value, and use a bypass
+;; for the Hi value when needed.
+;;
+;; Also of note, There are different latencies
+;; for MULT/DMULT (Lo 5/Hi 6) and MULTU/DMULTU (Lo 6/Hi 7).
+;; However, gcc does not have separate types
+;; for these insns.  Thus to strike a balance,
+;; we use the Hi latency value for imul
+;; operations until the imul type can be split.
+(define_insn_reservation "r10k_imul_single" 6
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "imul,imul3")
+            (eq_attr "mode" "SI")))
+  "r10k_alu2 * 6")
+
+(define_insn_reservation "r10k_imul_double" 10
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "imul,imul3")
+            (eq_attr "mode" "DI")))
+  "r10k_alu2 * 10")
+
+;; Divides keep ALU2 busy.
+(define_insn_reservation "r10k_idiv_single" 34
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "SI")))
+  "r10k_alu2 * 35")
+
+(define_insn_reservation "r10k_idiv_double" 66
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "idiv")
+            (eq_attr "mode" "DI")))
+  "r10k_alu2 * 67")
+
+(define_bypass 35 "r10k_idiv_single" "r10k_mfhi")
+(define_bypass 67 "r10k_idiv_double" "r10k_mfhi")
+
+
+;; Floating point add/sub, mul, abs value, neg, comp, & moves.
+(define_insn_reservation "r10k_fp_miscadd" 2
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "fadd,fabs,fneg,fcmp"))
+  "r10k_fpadd")
+
+(define_insn_reservation "r10k_fp_miscmul" 2
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "fmul,fmove"))
+  "r10k_fpmpy")
+
+(define_insn_reservation "r10k_fp_cmove" 2
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "condmove")
+            (eq_attr "mode" "SF,DF")))
+  "r10k_fpmpy")
+
+
+;; The fcvt.s.[wl] insn has latency 4, repeat 2.
+;; All other fcvt insns have latency 2, repeat 1.
+(define_insn_reservation "r10k_fcvt_single" 4
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "fcvt")
+            (eq_attr "cnv_mode" "I2S")))
+  "r10k_fpadd * 2")
+
+(define_insn_reservation "r10k_fcvt_other" 2
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "fcvt")
+            (eq_attr "cnv_mode" "!I2S")))
+  "r10k_fpadd")
+
+
+;; Run the fmadd insn through fp-adder first, then fp-multiplier.
+;;
+;; The latency for fmadd is 2 cycles if the result is used
+;; by another fmadd instruction.
+(define_insn_reservation "r10k_fmadd" 4
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "fmadd"))
+  "r10k_fpadd, r10k_fpmpy")
+
+(define_bypass 2 "r10k_fmadd" "r10k_fmadd")
+
+
+;; Floating point Divisions & square roots.
+(define_insn_reservation "r10k_fdiv_single" 12
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "fdiv,frdiv")
+            (eq_attr "mode" "SF")))
+  "r10k_fpdiv * 14")
+
+(define_insn_reservation "r10k_fdiv_double" 19
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "fdiv,frdiv")
+            (eq_attr "mode" "DF")))
+  "r10k_fpdiv * 21")
+
+(define_insn_reservation "r10k_fsqrt_single" 18
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "fsqrt")
+            (eq_attr "mode" "SF")))
+  "r10k_fpsqrt * 20")
+
+(define_insn_reservation "r10k_fsqrt_double" 33
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "fsqrt")
+            (eq_attr "mode" "DF")))
+  "r10k_fpsqrt * 35")
+
+(define_insn_reservation "r10k_frsqrt_single" 30
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "frsqrt")
+            (eq_attr "mode" "SF")))
+  "r10k_fpsqrt * 20")
+
+(define_insn_reservation "r10k_frsqrt_double" 52
+  (and (eq_attr "cpu" "r10000")
+       (and (eq_attr "type" "frsqrt")
+            (eq_attr "mode" "DF")))
+  "r10k_fpsqrt * 35")
+
+
+;; Handle unknown/multi insns here (this is a guess).
+(define_insn_reservation "r10k_unknown" 1
+  (and (eq_attr "cpu" "r10000")
+       (eq_attr "type" "unknown,multi"))
+  "r10k_alu1 + r10k_alu2")