Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/mips/10000.md @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 04ced10e8804 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 ;; DFA-based pipeline description for the VR1x000. | |
2 ;; Copyright (C) 2005, 2006, 2008 Free Software Foundation, Inc. | |
3 ;; | |
4 ;; This file is part of GCC. | |
5 | |
6 ;; GCC is free software; you can redistribute it and/or modify it | |
7 ;; under the terms of the GNU General Public License as published | |
8 ;; by the Free Software Foundation; either version 3, or (at your | |
9 ;; option) any later version. | |
10 | |
11 ;; GCC is distributed in the hope that it will be useful, but WITHOUT | |
12 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
13 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
14 ;; License for more details. | |
15 | |
16 ;; You should have received a copy of the GNU General Public License | |
17 ;; along with GCC; see the file COPYING3. If not see | |
18 ;; <http://www.gnu.org/licenses/>. | |
19 | |
20 | |
21 ;; R12K/R14K/R16K are derivatives of R10K, thus copy its description | |
22 ;; until specific tuning for each is added. | |
23 | |
24 ;; R10000 has an int queue, fp queue, address queue. | |
25 ;; The int queue feeds ALU1 and ALU2. | |
26 ;; The fp queue feeds the fp-adder and fp-multiplier. | |
27 ;; The addr queue feeds the Load/Store unit. | |
28 ;; | |
29 ;; However, we define the fp-adder and fp-multiplier as | |
30 ;; separate automatons, because the fp-multiplier is | |
31 ;; divided into fp-multiplier, fp-division, and | |
32 ;; fp-squareroot units, all of which share the same | |
33 ;; issue and completion logic, yet can operate in | |
34 ;; parallel. | |
35 ;; | |
36 ;; This is based on the model described in the R10K Manual | |
37 ;; and it helps to reduce the size of the automata. | |
38 (define_automaton "r10k_a_int, r10k_a_fpadder, r10k_a_addr, | |
39 r10k_a_fpmpy, r10k_a_fpdiv, r10k_a_fpsqrt") | |
40 | |
41 (define_cpu_unit "r10k_alu1" "r10k_a_int") | |
42 (define_cpu_unit "r10k_alu2" "r10k_a_int") | |
43 (define_cpu_unit "r10k_fpadd" "r10k_a_fpadder") | |
44 (define_cpu_unit "r10k_fpmpy" "r10k_a_fpmpy") | |
45 (define_cpu_unit "r10k_fpdiv" "r10k_a_fpdiv") | |
46 (define_cpu_unit "r10k_fpsqrt" "r10k_a_fpsqrt") | |
47 (define_cpu_unit "r10k_loadstore" "r10k_a_addr") | |
48 | |
49 | |
50 ;; R10k Loads and Stores. | |
51 (define_insn_reservation "r10k_load" 2 | |
52 (and (eq_attr "cpu" "r10000") | |
53 (eq_attr "type" "load,prefetch,prefetchx")) | |
54 "r10k_loadstore") | |
55 | |
56 (define_insn_reservation "r10k_store" 0 | |
57 (and (eq_attr "cpu" "r10000") | |
58 (eq_attr "type" "store,fpstore,fpidxstore")) | |
59 "r10k_loadstore") | |
60 | |
61 (define_insn_reservation "r10k_fpload" 3 | |
62 (and (eq_attr "cpu" "r10000") | |
63 (eq_attr "type" "fpload,fpidxload")) | |
64 "r10k_loadstore") | |
65 | |
66 | |
67 ;; Integer add/sub + logic ops, and mt hi/lo can be done by alu1 or alu2. | |
68 ;; Miscellaneous arith goes here too (this is a guess). | |
69 (define_insn_reservation "r10k_arith" 1 | |
70 (and (eq_attr "cpu" "r10000") | |
71 (eq_attr "type" "arith,mthilo,slt,clz,const,nop,trap,logical")) | |
72 "r10k_alu1 | r10k_alu2") | |
73 | |
74 ;; We treat mfhilo differently, because we need to know when | |
75 ;; it's HI and when it's LO. | |
76 (define_insn_reservation "r10k_mfhi" 1 | |
77 (and (eq_attr "cpu" "r10000") | |
78 (and (eq_attr "type" "mfhilo") | |
79 (not (match_operand 1 "lo_operand")))) | |
80 "r10k_alu1 | r10k_alu2") | |
81 | |
82 (define_insn_reservation "r10k_mflo" 1 | |
83 (and (eq_attr "cpu" "r10000") | |
84 (and (eq_attr "type" "mfhilo") | |
85 (match_operand 1 "lo_operand"))) | |
86 "r10k_alu1 | r10k_alu2") | |
87 | |
88 | |
89 ;; ALU1 handles shifts, branch eval, and condmove. | |
90 ;; | |
91 ;; Brancher is separate, but part of ALU1, but can only | |
92 ;; do one branch per cycle (is this even implementable?). | |
93 ;; | |
94 ;; Unsure if the brancher handles jumps and calls as well, but since | |
95 ;; they're related, we'll add them here for now. | |
96 (define_insn_reservation "r10k_brancher" 1 | |
97 (and (eq_attr "cpu" "r10000") | |
98 (eq_attr "type" "shift,branch,jump,call")) | |
99 "r10k_alu1") | |
100 | |
101 (define_insn_reservation "r10k_int_cmove" 1 | |
102 (and (eq_attr "cpu" "r10000") | |
103 (and (eq_attr "type" "condmove") | |
104 (eq_attr "mode" "SI,DI"))) | |
105 "r10k_alu1") | |
106 | |
107 | |
108 ;; Coprocessor Moves. | |
109 ;; mtc1/dmtc1 are handled by ALU1. | |
110 ;; mfc1/dmfc1 are handled by the fp-multiplier. | |
111 (define_insn_reservation "r10k_mt_xfer" 3 | |
112 (and (eq_attr "cpu" "r10000") | |
113 (eq_attr "type" "mtc")) | |
114 "r10k_alu1") | |
115 | |
116 (define_insn_reservation "r10k_mf_xfer" 2 | |
117 (and (eq_attr "cpu" "r10000") | |
118 (eq_attr "type" "mfc")) | |
119 "r10k_fpmpy") | |
120 | |
121 | |
122 ;; Only ALU2 does int multiplications and divisions. | |
123 ;; | |
124 ;; According to the Vr10000 series user manual, | |
125 ;; integer mult and div insns can be issued one | |
126 ;; cycle earlier if using register Lo. We model | |
127 ;; this by using the Lo value by default, as it | |
128 ;; is the more common value, and use a bypass | |
129 ;; for the Hi value when needed. | |
130 ;; | |
131 ;; Also of note, There are different latencies | |
132 ;; for MULT/DMULT (Lo 5/Hi 6) and MULTU/DMULTU (Lo 6/Hi 7). | |
133 ;; However, gcc does not have separate types | |
134 ;; for these insns. Thus to strike a balance, | |
135 ;; we use the Hi latency value for imul | |
136 ;; operations until the imul type can be split. | |
137 (define_insn_reservation "r10k_imul_single" 6 | |
138 (and (eq_attr "cpu" "r10000") | |
139 (and (eq_attr "type" "imul,imul3") | |
140 (eq_attr "mode" "SI"))) | |
141 "r10k_alu2 * 6") | |
142 | |
143 (define_insn_reservation "r10k_imul_double" 10 | |
144 (and (eq_attr "cpu" "r10000") | |
145 (and (eq_attr "type" "imul,imul3") | |
146 (eq_attr "mode" "DI"))) | |
147 "r10k_alu2 * 10") | |
148 | |
149 ;; Divides keep ALU2 busy. | |
150 (define_insn_reservation "r10k_idiv_single" 34 | |
151 (and (eq_attr "cpu" "r10000") | |
152 (and (eq_attr "type" "idiv") | |
153 (eq_attr "mode" "SI"))) | |
154 "r10k_alu2 * 35") | |
155 | |
156 (define_insn_reservation "r10k_idiv_double" 66 | |
157 (and (eq_attr "cpu" "r10000") | |
158 (and (eq_attr "type" "idiv") | |
159 (eq_attr "mode" "DI"))) | |
160 "r10k_alu2 * 67") | |
161 | |
162 (define_bypass 35 "r10k_idiv_single" "r10k_mfhi") | |
163 (define_bypass 67 "r10k_idiv_double" "r10k_mfhi") | |
164 | |
165 | |
166 ;; Floating point add/sub, mul, abs value, neg, comp, & moves. | |
167 (define_insn_reservation "r10k_fp_miscadd" 2 | |
168 (and (eq_attr "cpu" "r10000") | |
169 (eq_attr "type" "fadd,fabs,fneg,fcmp")) | |
170 "r10k_fpadd") | |
171 | |
172 (define_insn_reservation "r10k_fp_miscmul" 2 | |
173 (and (eq_attr "cpu" "r10000") | |
174 (eq_attr "type" "fmul,fmove")) | |
175 "r10k_fpmpy") | |
176 | |
177 (define_insn_reservation "r10k_fp_cmove" 2 | |
178 (and (eq_attr "cpu" "r10000") | |
179 (and (eq_attr "type" "condmove") | |
180 (eq_attr "mode" "SF,DF"))) | |
181 "r10k_fpmpy") | |
182 | |
183 | |
184 ;; The fcvt.s.[wl] insn has latency 4, repeat 2. | |
185 ;; All other fcvt insns have latency 2, repeat 1. | |
186 (define_insn_reservation "r10k_fcvt_single" 4 | |
187 (and (eq_attr "cpu" "r10000") | |
188 (and (eq_attr "type" "fcvt") | |
189 (eq_attr "cnv_mode" "I2S"))) | |
190 "r10k_fpadd * 2") | |
191 | |
192 (define_insn_reservation "r10k_fcvt_other" 2 | |
193 (and (eq_attr "cpu" "r10000") | |
194 (and (eq_attr "type" "fcvt") | |
195 (eq_attr "cnv_mode" "!I2S"))) | |
196 "r10k_fpadd") | |
197 | |
198 | |
199 ;; Run the fmadd insn through fp-adder first, then fp-multiplier. | |
200 ;; | |
201 ;; The latency for fmadd is 2 cycles if the result is used | |
202 ;; by another fmadd instruction. | |
203 (define_insn_reservation "r10k_fmadd" 4 | |
204 (and (eq_attr "cpu" "r10000") | |
205 (eq_attr "type" "fmadd")) | |
206 "r10k_fpadd, r10k_fpmpy") | |
207 | |
208 (define_bypass 2 "r10k_fmadd" "r10k_fmadd") | |
209 | |
210 | |
211 ;; Floating point Divisions & square roots. | |
212 (define_insn_reservation "r10k_fdiv_single" 12 | |
213 (and (eq_attr "cpu" "r10000") | |
214 (and (eq_attr "type" "fdiv,frdiv") | |
215 (eq_attr "mode" "SF"))) | |
216 "r10k_fpdiv * 14") | |
217 | |
218 (define_insn_reservation "r10k_fdiv_double" 19 | |
219 (and (eq_attr "cpu" "r10000") | |
220 (and (eq_attr "type" "fdiv,frdiv") | |
221 (eq_attr "mode" "DF"))) | |
222 "r10k_fpdiv * 21") | |
223 | |
224 (define_insn_reservation "r10k_fsqrt_single" 18 | |
225 (and (eq_attr "cpu" "r10000") | |
226 (and (eq_attr "type" "fsqrt") | |
227 (eq_attr "mode" "SF"))) | |
228 "r10k_fpsqrt * 20") | |
229 | |
230 (define_insn_reservation "r10k_fsqrt_double" 33 | |
231 (and (eq_attr "cpu" "r10000") | |
232 (and (eq_attr "type" "fsqrt") | |
233 (eq_attr "mode" "DF"))) | |
234 "r10k_fpsqrt * 35") | |
235 | |
236 (define_insn_reservation "r10k_frsqrt_single" 30 | |
237 (and (eq_attr "cpu" "r10000") | |
238 (and (eq_attr "type" "frsqrt") | |
239 (eq_attr "mode" "SF"))) | |
240 "r10k_fpsqrt * 20") | |
241 | |
242 (define_insn_reservation "r10k_frsqrt_double" 52 | |
243 (and (eq_attr "cpu" "r10000") | |
244 (and (eq_attr "type" "frsqrt") | |
245 (eq_attr "mode" "DF"))) | |
246 "r10k_fpsqrt * 35") | |
247 | |
248 | |
249 ;; Handle unknown/multi insns here (this is a guess). | |
250 (define_insn_reservation "r10k_unknown" 1 | |
251 (and (eq_attr "cpu" "r10000") | |
252 (eq_attr "type" "unknown,multi")) | |
253 "r10k_alu1 + r10k_alu2") |