Mercurial > hg > CbC > CbC_gcc
annotate gcc/config/mips/sr71k.md @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
rev | line source |
---|---|
131 | 1 ;; Copyright (C) 2002-2018 Free Software Foundation, Inc. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2 ;; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
3 ;; This file is part of GCC. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
4 ;; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
5 ;; GCC is free software; you can redistribute it and/or modify |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
6 ;; it under the terms of the GNU General Public License as published by |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
7 ;; the Free Software Foundation; either version 3, or (at your option) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
8 ;; any later version. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
9 ;; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
10 ;; GCC is distributed in the hope that it will be useful, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
11 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
12 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
13 ;; GNU General Public License for more details. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
14 ;; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
15 ;; You should have received a copy of the GNU General Public License |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
16 ;; along with GCC; see the file COPYING3. If not see |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
17 ;; <http://www.gnu.org/licenses/>. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
18 ;; |
0 | 19 ;; ......................... |
20 ;; | |
21 ;; DFA-based pipeline description for Sandcraft SR3 (MIPS64 based) | |
22 ;; | |
23 ;; The SR3 is described as: | |
24 ;; - nine-stage pipeline, insn buffering with out-of-order issue to | |
25 ;; multiple function units, with an average dispatch rate of 2 | |
26 ;; insn.s per cycle (max 6 insns: 2 fpu, 4 cpu). | |
27 ;; | |
28 ;; The details on this are scant except for a diagram in | |
29 ;; Chap. 6 of Rev. 1.0 SR3 Spec. | |
30 ;; | |
31 ;; The model employed below is designed to closely approximate the | |
32 ;; published latencies. Emulation of out-of-order issue and the insn | |
33 ;; buffering is done via a VLIW dispatch style (with a packing of 6 insns); | |
34 ;; the function unit reservations restrictions (define_*_set) are | |
35 ;; contrived to support published timings. | |
36 ;; | |
37 ;; Reference: | |
38 ;; "SR3 Microprocessor Specification, System development information," | |
39 ;; Revision 1.0, 13 December 2000. | |
40 ;; | |
41 ;; | |
42 ;; Reservation model is based on: | |
43 ;; 1) Figure 6-1, from the 1.0 specification. | |
44 ;; 2) Chapter 19, from the 1.0 specification. | |
45 ;; 3) following questions(Red Hat)/answers(Sandcraft): | |
46 ;; RH> From Section 19.1 | |
47 ;; RH> 1) In terms of figure 6-1, are all the instructions in | |
48 ;; RH> table 19-1 restricted | |
49 ;; RH> to ALUx? When ALUx is not in use for an instruction in table;; RH> 19-1 is | |
50 ;; RH> it fully compatible with all insns that issue to ALUy? | |
51 ;; | |
52 ;; Yes, all the instructions in Table 19-1 only go to ALUX, and all the | |
53 ;; instructions that can be issued to ALUY can also be issued to ALUX. | |
54 ;; | |
55 ;; | |
56 ;; RH> From Section 19.2 | |
57 ;; RH> 2) Explain conditional moves execution path (in terms of | |
58 ;; RH> figure 6-1) | |
59 ;; | |
60 ;; Conditional move of integer registers (based on floating point condition | |
61 ;; codes or integer register value) go to ALUX or ALUY. | |
62 ;; | |
63 ;; RH> 3) Explain floating point store execution path (in terms of | |
64 ;; RH> figure 6-1) | |
65 ;; | |
66 ;; Floating point stores go to Ld/St and go to MOV in the floating point | |
67 ;; pipeline. | |
68 ;; | |
69 ;; Floating point loads go to Ld/St and go to LOAD in the floating point | |
70 ;; pipeline. | |
71 ;; | |
72 ;; RH> 4) Explain branch on floating condition (in terms of figure 6-1);; | |
73 ;; Branch on floating condition go to BRU. | |
74 ;; | |
75 ;; RH> 5) Is the column for single RECIP instruction latency correct? | |
76 ;; RH> What about for RSQRT single and double? | |
77 ;; | |
78 ;; The latency/repeat for RECIP and RSQRT are correct. | |
79 ;; | |
80 | |
81 ;; | |
82 ;; Use four automata to isolate long latency operations, and to | |
83 ;; reduce the complexity of cpu+fpu, reducing space. | |
84 ;; | |
85 (define_automaton "sr71_cpu, sr71_cpu1, sr71_cp1, sr71_cp2, sr71_fextra, sr71_imacc") | |
86 | |
87 ;; feeders for CPU function units and feeders for fpu (CP1 interface) | |
88 (define_cpu_unit "sr_iss0,sr_iss1,sr_iss2,sr_iss3,sr_iss4,sr_iss5" "sr71_cpu") | |
89 | |
90 ;; CPU function units | |
91 (define_cpu_unit "ipu_bru" "sr71_cpu1") | |
92 (define_cpu_unit "ipu_alux" "sr71_cpu1") | |
93 (define_cpu_unit "ipu_aluy" "sr71_cpu1") | |
94 (define_cpu_unit "ipu_ldst" "sr71_cpu1") | |
95 (define_cpu_unit "ipu_macc_iter" "sr71_imacc") | |
96 | |
97 | |
98 ;; Floating-point unit (Co-processor interface 1). | |
99 (define_cpu_unit "fpu_mov" "sr71_cp1") | |
100 (define_cpu_unit "fpu_load" "sr71_cp1") | |
101 (define_cpu_unit "fpu_fpu" "sr71_cp2") | |
102 | |
103 ;; fictitous unit to track long float insns with separate automaton | |
104 (define_cpu_unit "fpu_iter" "sr71_fextra") | |
105 | |
106 | |
107 ;; | |
108 ;; Define common execution path (reservation) combinations | |
109 ;; | |
110 | |
111 ;; | |
112 (define_reservation "cpu_iss" "sr_iss0|sr_iss1|sr_iss2|sr_iss3") | |
113 | |
114 ;; two cycles are used for instruction using the fpu as it runs | |
115 ;; at half the clock speed of the cpu. By adding an extra cycle | |
116 ;; to the issue units, the default/minimum "repeat" dispatch delay is | |
117 ;; accounted for all insn.s | |
118 (define_reservation "cp1_iss" "(sr_iss4*2)|(sr_iss5*2)") | |
119 | |
120 (define_reservation "serial_dispatch" "sr_iss0+sr_iss1+sr_iss2+sr_iss3+sr_iss4+sr_iss5") | |
121 | |
122 ;; Simulate a 6 insn VLIW dispatch, 1 cycle in dispatch followed by | |
123 ;; reservation of function unit. | |
124 (define_reservation "ri_insns" "cpu_iss,(ipu_alux|ipu_aluy)") | |
125 (define_reservation "ri_mem" "cpu_iss,ipu_ldst") | |
126 (define_reservation "ri_alux" "cpu_iss,ipu_alux") | |
127 (define_reservation "ri_branch" "cpu_iss,ipu_bru") | |
128 | |
129 (define_reservation "rf_insn" "cp1_iss,fpu_fpu") | |
130 (define_reservation "rf_ldmem" "cp1_iss,fpu_load") | |
131 | |
132 ; simultaneous reservation of pseudo-unit keeps cp1 fpu tied | |
133 ; up until long cycle insn is finished... | |
134 (define_reservation "rf_multi1" "rf_insn+fpu_iter") | |
135 | |
136 ;; | |
137 ;; The ordering of the instruction-execution-path/resource-usage | |
138 ;; descriptions (also known as reservation RTL) is roughly ordered | |
139 ;; based on the define attribute RTL for the "type" classification. | |
140 ;; When modifying, remember that the first test that matches is the | |
141 ;; reservation used! | |
142 ;; | |
143 | |
144 | |
145 (define_insn_reservation "ir_sr70_unknown" 1 | |
146 (and (eq_attr "cpu" "sr71000") | |
111 | 147 (eq_attr "type" "unknown,atomic,syncloop")) |
0 | 148 "serial_dispatch") |
149 | |
150 | |
151 ;; Assume prediction fails. | |
152 (define_insn_reservation "ir_sr70_branch" 6 | |
153 (and (eq_attr "cpu" "sr71000") | |
154 (eq_attr "type" "branch,jump,call")) | |
155 "ri_branch") | |
156 | |
157 (define_insn_reservation "ir_sr70_load" 2 | |
158 (and (eq_attr "cpu" "sr71000") | |
159 (eq_attr "type" "load")) | |
160 "ri_mem") | |
161 | |
162 (define_insn_reservation "ir_sr70_store" 1 | |
163 (and (eq_attr "cpu" "sr71000") | |
164 (eq_attr "type" "store")) | |
165 "ri_mem") | |
166 | |
167 | |
168 ;; | |
169 ;; float loads/stores flow through both cpu and cp1... | |
170 ;; | |
171 (define_insn_reservation "ir_sr70_fload" 9 | |
172 (and (eq_attr "cpu" "sr71000") | |
173 (eq_attr "type" "fpload,fpidxload")) | |
174 "(cpu_iss+cp1_iss),(ri_mem+rf_ldmem)") | |
175 | |
176 (define_insn_reservation "ir_sr70_fstore" 1 | |
177 (and (eq_attr "cpu" "sr71000") | |
178 (eq_attr "type" "fpstore,fpidxstore")) | |
179 "(cpu_iss+cp1_iss),(fpu_mov+ri_mem)") | |
180 | |
181 | |
182 ;; This reservation is for conditional move based on integer | |
183 ;; or floating point CC. | |
184 (define_insn_reservation "ir_sr70_condmove" 4 | |
185 (and (eq_attr "cpu" "sr71000") | |
186 (eq_attr "type" "condmove")) | |
187 "ri_insns") | |
188 | |
189 ;; Try to discriminate move-from-cp1 versus move-to-cp1 as latencies | |
190 ;; are different. Like float load/store, these insns use multiple | |
191 ;; resources simultaneously | |
192 (define_insn_reservation "ir_sr70_xfer_from" 6 | |
193 (and (eq_attr "cpu" "sr71000") | |
194 (eq_attr "type" "mfc")) | |
195 "(cpu_iss+cp1_iss),(fpu_mov+ri_mem)") | |
196 | |
197 (define_insn_reservation "ir_sr70_xfer_to" 9 | |
198 (and (eq_attr "cpu" "sr71000") | |
199 (eq_attr "type" "mtc")) | |
200 "(cpu_iss+cp1_iss),(ri_mem+rf_ldmem)") | |
201 | |
202 (define_insn_reservation "ir_sr70_hilo" 1 | |
203 (and (eq_attr "cpu" "sr71000") | |
111 | 204 (eq_attr "type" "mthi,mtlo,mfhi,mflo")) |
0 | 205 "ri_insns") |
206 | |
207 (define_insn_reservation "ir_sr70_arith" 1 | |
208 (and (eq_attr "cpu" "sr71000") | |
209 (eq_attr "type" "arith,shift,signext,slt,clz,const,logical,move,trap")) | |
210 "ri_insns") | |
211 | |
212 ;; emulate repeat (dispatch stall) by spending extra cycle(s) in | |
213 ;; in iter unit | |
214 (define_insn_reservation "ir_sr70_imul_si" 4 | |
215 (and (eq_attr "cpu" "sr71000") | |
216 (and (eq_attr "type" "imul,imul3,imadd") | |
217 (eq_attr "mode" "SI"))) | |
218 "ri_alux,ipu_alux,ipu_macc_iter") | |
219 | |
220 (define_insn_reservation "ir_sr70_imul_di" 6 | |
221 (and (eq_attr "cpu" "sr71000") | |
222 (and (eq_attr "type" "imul,imul3,imadd") | |
223 (eq_attr "mode" "DI"))) | |
224 "ri_alux,ipu_alux,(ipu_macc_iter*3)") | |
225 | |
226 ;; Divide algorithm is early out with best latency of 7 pcycles. | |
227 ;; Use worst case for scheduling purposes. | |
228 (define_insn_reservation "ir_sr70_idiv_si" 41 | |
229 (and (eq_attr "cpu" "sr71000") | |
230 (and (eq_attr "type" "idiv") | |
231 (eq_attr "mode" "SI"))) | |
232 "ri_alux,ipu_alux,(ipu_macc_iter*38)") | |
233 | |
234 (define_insn_reservation "ir_sr70_idiv_di" 73 | |
235 (and (eq_attr "cpu" "sr71000") | |
236 (and (eq_attr "type" "idiv") | |
237 (eq_attr "mode" "DI"))) | |
238 "ri_alux,ipu_alux,(ipu_macc_iter*70)") | |
239 | |
240 ;; extra reservations of fpu_fpu are for repeat latency | |
241 (define_insn_reservation "ir_sr70_fadd_sf" 8 | |
242 (and (eq_attr "cpu" "sr71000") | |
243 (and (eq_attr "type" "fadd") | |
244 (eq_attr "mode" "SF"))) | |
245 "rf_insn,fpu_fpu") | |
246 | |
247 (define_insn_reservation "ir_sr70_fadd_df" 10 | |
248 (and (eq_attr "cpu" "sr71000") | |
249 (and (eq_attr "type" "fadd") | |
250 (eq_attr "mode" "DF"))) | |
251 "rf_insn,fpu_fpu") | |
252 | |
253 ;; Latencies for MADD,MSUB, NMADD, NMSUB assume the Multiply is fused | |
254 ;; with the sub or add. | |
255 (define_insn_reservation "ir_sr70_fmul_sf" 8 | |
256 (and (eq_attr "cpu" "sr71000") | |
257 (and (eq_attr "type" "fmul,fmadd") | |
258 (eq_attr "mode" "SF"))) | |
259 "rf_insn,fpu_fpu") | |
260 | |
261 ;; tie up the fpu unit to emulate the balance for the "repeat | |
262 ;; rate" of 8 (2 are spent in the iss unit) | |
263 (define_insn_reservation "ir_sr70_fmul_df" 16 | |
264 (and (eq_attr "cpu" "sr71000") | |
265 (and (eq_attr "type" "fmul,fmadd") | |
266 (eq_attr "mode" "DF"))) | |
267 "rf_insn,fpu_fpu*6") | |
268 | |
269 | |
270 ;; RECIP insn uses same type attr as div, and for SR3, has same | |
271 ;; timings for double. However, single RECIP has a latency of | |
272 ;; 28 -- only way to fix this is to introduce new insn attrs. | |
273 ;; cycles spent in iter unit are designed to satisfy balance | |
274 ;; of "repeat" latency after insn uses up rf_multi1 reservation | |
275 (define_insn_reservation "ir_sr70_fdiv_sf" 60 | |
276 (and (eq_attr "cpu" "sr71000") | |
277 (and (eq_attr "type" "fdiv,frdiv") | |
278 (eq_attr "mode" "SF"))) | |
279 "rf_multi1+(fpu_iter*51)") | |
280 | |
281 (define_insn_reservation "ir_sr70_fdiv_df" 120 | |
282 (and (eq_attr "cpu" "sr71000") | |
283 (and (eq_attr "type" "fdiv,frdiv") | |
284 (eq_attr "mode" "DF"))) | |
285 "rf_multi1+(fpu_iter*109)") | |
286 | |
287 (define_insn_reservation "ir_sr70_fabs" 4 | |
288 (and (eq_attr "cpu" "sr71000") | |
289 (eq_attr "type" "fabs,fneg,fmove")) | |
290 "rf_insn,fpu_fpu") | |
291 | |
292 (define_insn_reservation "ir_sr70_fcmp" 10 | |
293 (and (eq_attr "cpu" "sr71000") | |
294 (eq_attr "type" "fcmp")) | |
295 "rf_insn,fpu_fpu") | |
296 | |
297 ;; "fcvt" type attribute covers a number of diff insns, most have the same | |
298 ;; latency descriptions, a few vary. We use the | |
299 ;; most common timing (which is also worst case). | |
300 (define_insn_reservation "ir_sr70_fcvt" 12 | |
301 (and (eq_attr "cpu" "sr71000") | |
302 (eq_attr "type" "fcvt")) | |
303 "rf_insn,fpu_fpu*4") | |
304 | |
305 (define_insn_reservation "ir_sr70_fsqrt_sf" 62 | |
306 (and (eq_attr "cpu" "sr71000") | |
307 (and (eq_attr "type" "fsqrt") | |
308 (eq_attr "mode" "SF"))) | |
309 "rf_multi1+(fpu_iter*53)") | |
310 | |
311 (define_insn_reservation "ir_sr70_fsqrt_df" 122 | |
312 (and (eq_attr "cpu" "sr71000") | |
313 (and (eq_attr "type" "fsqrt") | |
314 (eq_attr "mode" "DF"))) | |
315 "rf_multi1+(fpu_iter*111)") | |
316 | |
317 (define_insn_reservation "ir_sr70_frsqrt_sf" 48 | |
318 (and (eq_attr "cpu" "sr71000") | |
319 (and (eq_attr "type" "frsqrt") | |
320 (eq_attr "mode" "SF"))) | |
321 "rf_multi1+(fpu_iter*39)") | |
322 | |
323 (define_insn_reservation "ir_sr70_frsqrt_df" 240 | |
324 (and (eq_attr "cpu" "sr71000") | |
325 (and (eq_attr "type" "frsqrt") | |
326 (eq_attr "mode" "DF"))) | |
327 "rf_multi1+(fpu_iter*229)") | |
328 | |
329 (define_insn_reservation "ir_sr70_multi" 1 | |
330 (and (eq_attr "cpu" "sr71000") | |
331 (eq_attr "type" "multi")) | |
332 "serial_dispatch") | |
333 | |
334 (define_insn_reservation "ir_sr70_nop" 1 | |
335 (and (eq_attr "cpu" "sr71000") | |
336 (eq_attr "type" "nop")) | |
337 "ri_insns") |