annotate gcc/config/sparc/m8.md @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents
children 84e7813d76e9
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
111
kono
parents:
diff changeset
1 ;; Scheduling description for the SPARC M8.
kono
parents:
diff changeset
2 ;; Copyright (C) 2017 Free Software Foundation, Inc.
kono
parents:
diff changeset
3 ;;
kono
parents:
diff changeset
4 ;; This file is part of GCC.
kono
parents:
diff changeset
5 ;;
kono
parents:
diff changeset
6 ;; GCC is free software; you can redistribute it and/or modify
kono
parents:
diff changeset
7 ;; it under the terms of the GNU General Public License as published by
kono
parents:
diff changeset
8 ;; the Free Software Foundation; either version 3, or (at your option)
kono
parents:
diff changeset
9 ;; any later version.
kono
parents:
diff changeset
10 ;;
kono
parents:
diff changeset
11 ;; GCC is distributed in the hope that it will be useful,
kono
parents:
diff changeset
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
kono
parents:
diff changeset
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
kono
parents:
diff changeset
14 ;; GNU General Public License for more details.
kono
parents:
diff changeset
15 ;;
kono
parents:
diff changeset
16 ;; You should have received a copy of the GNU General Public License
kono
parents:
diff changeset
17 ;; along with GCC; see the file COPYING3. If not see
kono
parents:
diff changeset
18 ;; <http://www.gnu.org/licenses/>.
kono
parents:
diff changeset
19
kono
parents:
diff changeset
20 ;; Thigs to improve:
kono
parents:
diff changeset
21 ;;
kono
parents:
diff changeset
22 ;; - Store instructions are implemented by micro-ops, one of which
kono
parents:
diff changeset
23 ;; generates the store address and is executed in the store address
kono
parents:
diff changeset
24 ;; generation unit in the slot0. We need to model that.
kono
parents:
diff changeset
25 ;;
kono
parents:
diff changeset
26 ;; - There are two V3 pipes connected to different slots. The current
kono
parents:
diff changeset
27 ;; implementation assumes that all the instructions executing in a
kono
parents:
diff changeset
28 ;; V3 pipe are issued to the unit in slot3.
kono
parents:
diff changeset
29 ;;
kono
parents:
diff changeset
30 ;; - Single-issue ALU operations incur an additional cycle of latency to
kono
parents:
diff changeset
31 ;; slot 0 and slot 1 instructions. This is not currently reflected
kono
parents:
diff changeset
32 ;; in the DFA.
kono
parents:
diff changeset
33
kono
parents:
diff changeset
34 (define_automaton "m8_0")
kono
parents:
diff changeset
35
kono
parents:
diff changeset
36 ;; The S5 core has two dual-issue queues, PQLS and PQEX. Each queue
kono
parents:
diff changeset
37 ;; is divided into two slots: PQLS corresponds to slots 0 and 1, and
kono
parents:
diff changeset
38 ;; PQEX corresponds to slots 2 and 3. The core can issue 4
kono
parents:
diff changeset
39 ;; instructions per-cycle, and up to 4 instructions are committed each
kono
parents:
diff changeset
40 ;; cycle.
kono
parents:
diff changeset
41 ;;
kono
parents:
diff changeset
42 ;;
kono
parents:
diff changeset
43 ;; m8_slot0 - Load Unit.
kono
parents:
diff changeset
44 ;; - Store address gen. Unit.
kono
parents:
diff changeset
45 ;;
kono
parents:
diff changeset
46 ;;
kono
parents:
diff changeset
47 ;; === PQLS ==> m8_slot1 - Store data unit.
kono
parents:
diff changeset
48 ;; - Branch unit.
kono
parents:
diff changeset
49 ;;
kono
parents:
diff changeset
50 ;;
kono
parents:
diff changeset
51 ;; === PQEX ==> m8_slot2 - Integer Unit (EXU2).
kono
parents:
diff changeset
52 ;; - 3-cycles Crypto Unit (SPU2).
kono
parents:
diff changeset
53 ;;
kono
parents:
diff changeset
54 ;; m8_slot3 - Integer Unit (EXU3).
kono
parents:
diff changeset
55 ;; - 3-cycles Crypto Unit (SPU3).
kono
parents:
diff changeset
56 ;; - Floating-point and graphics unit (FPG).
kono
parents:
diff changeset
57 ;; - Long-latency Crypto Unit.
kono
parents:
diff changeset
58 ;; - Oracle Numbers Unit (ONU).
kono
parents:
diff changeset
59
kono
parents:
diff changeset
60 (define_cpu_unit "m8_slot0,m8_slot1,m8_slot2,m8_slot3" "m8_0")
kono
parents:
diff changeset
61
kono
parents:
diff changeset
62 ;; Some instructions stall the pipeline and avoid any other
kono
parents:
diff changeset
63 ;; instruction to be issued in the same cycle. We assume the same for
kono
parents:
diff changeset
64 ;; multi-instruction insns.
kono
parents:
diff changeset
65
kono
parents:
diff changeset
66 (define_reservation "m8_single_issue" "m8_slot0 + m8_slot1 + m8_slot2 + m8_slot3")
kono
parents:
diff changeset
67
kono
parents:
diff changeset
68 (define_insn_reservation "m8_single" 1
kono
parents:
diff changeset
69 (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
70 (eq_attr "type" "multi,savew,flushw,trap,bmask"))
kono
parents:
diff changeset
71 "m8_single_issue")
kono
parents:
diff changeset
72
kono
parents:
diff changeset
73 ;; Most of the instructions executing in the integer units have a
kono
parents:
diff changeset
74 ;; latency of 1.
kono
parents:
diff changeset
75
kono
parents:
diff changeset
76 (define_insn_reservation "m8_integer" 1
kono
parents:
diff changeset
77 (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
78 (eq_attr "type" "ialu,ialuX,shift,cmove,compare,bmask"))
kono
parents:
diff changeset
79 "(m8_slot2 | m8_slot3)")
kono
parents:
diff changeset
80
kono
parents:
diff changeset
81 ;; Flushing the instruction memory takes 27 cycles.
kono
parents:
diff changeset
82
kono
parents:
diff changeset
83
kono
parents:
diff changeset
84 (define_insn_reservation "m8_iflush" 27
kono
parents:
diff changeset
85 (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
86 (eq_attr "type" "iflush"))
kono
parents:
diff changeset
87 "(m8_slot2 | m8_slot3), nothing*26")
kono
parents:
diff changeset
88
kono
parents:
diff changeset
89 ;; The integer multiplication instructions have a latency of 10 cycles
kono
parents:
diff changeset
90 ;; and execute in integer units.
kono
parents:
diff changeset
91 ;;
kono
parents:
diff changeset
92 ;; Likewise for array*, edge* and pdistn instructions.
kono
parents:
diff changeset
93 ;;
kono
parents:
diff changeset
94 ;; However, the latency is only 9 cycles if the consumer of the
kono
parents:
diff changeset
95 ;; operation is also capable of 9 cycles latency. We model this with
kono
parents:
diff changeset
96 ;; a bypass.
kono
parents:
diff changeset
97
kono
parents:
diff changeset
98 (define_insn_reservation "m8_imul" 10
kono
parents:
diff changeset
99 (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
100 (eq_attr "type" "imul,array,edge,edgen,pdistn"))
kono
parents:
diff changeset
101 "(m8_slot2 | m8_slot3), nothing*12")
kono
parents:
diff changeset
102
kono
parents:
diff changeset
103 (define_bypass 9 "m8_imul" "m8_imul")
kono
parents:
diff changeset
104
kono
parents:
diff changeset
105 ;; The integer division instructions `sdiv' and `udivx' have a latency
kono
parents:
diff changeset
106 ;; of 30 cycles and execute in integer units.
kono
parents:
diff changeset
107
kono
parents:
diff changeset
108 (define_insn_reservation "m8_idiv" 30
kono
parents:
diff changeset
109 (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
110 (eq_attr "type" "idiv"))
kono
parents:
diff changeset
111 "(m8_slot2 | m8_slot3), nothing*29")
kono
parents:
diff changeset
112
kono
parents:
diff changeset
113 ;; Both integer and floating-point load instructions have a latency of
kono
parents:
diff changeset
114 ;; only 3 cycles,and execute in the slot0.
kono
parents:
diff changeset
115 ;;
kono
parents:
diff changeset
116 ;; Misaligned load instructions feature a latency of 11 cycles.
kono
parents:
diff changeset
117 ;;
kono
parents:
diff changeset
118 ;; The prefetch instruction also executes in the load unit, but it's
kono
parents:
diff changeset
119 ;; latency is only 1 cycle.
kono
parents:
diff changeset
120
kono
parents:
diff changeset
121 (define_insn_reservation "m8_load" 3
kono
parents:
diff changeset
122 (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
123 (ior (eq_attr "type" "fpload,sload")
kono
parents:
diff changeset
124 (and (eq_attr "type" "load")
kono
parents:
diff changeset
125 (eq_attr "subtype" "regular"))))
kono
parents:
diff changeset
126 "m8_slot0, nothing*2")
kono
parents:
diff changeset
127
kono
parents:
diff changeset
128 ;; (define_insn_reservation "m8_load_misalign" 11
kono
parents:
diff changeset
129 ;; (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
130 ;; (eq_attr "type" "load_mis,fpload_mis"))
kono
parents:
diff changeset
131 ;; "m8_slot0, nothing*10")
kono
parents:
diff changeset
132
kono
parents:
diff changeset
133 (define_insn_reservation "m8_prefetch" 1
kono
parents:
diff changeset
134 (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
135 (eq_attr "type" "load")
kono
parents:
diff changeset
136 (eq_attr "subtype" "prefetch"))
kono
parents:
diff changeset
137 "m8_slot0")
kono
parents:
diff changeset
138
kono
parents:
diff changeset
139 ;; Both integer and floating-point store instructions have a latency
kono
parents:
diff changeset
140 ;; of 1 cycle, and execute in the store data unit in slot1.
kono
parents:
diff changeset
141 ;;
kono
parents:
diff changeset
142 ;; However, misaligned store instructions feature a latency of 3
kono
parents:
diff changeset
143 ;; cycles.
kono
parents:
diff changeset
144
kono
parents:
diff changeset
145 (define_insn_reservation "m8_store" 1
kono
parents:
diff changeset
146 (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
147 (eq_attr "type" "store,fpstore"))
kono
parents:
diff changeset
148 "m8_slot1")
kono
parents:
diff changeset
149
kono
parents:
diff changeset
150 ;; (define_insn_reservation "m8_store_misalign" 3
kono
parents:
diff changeset
151 ;; (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
152 ;; (eq_attr "type" "store_mis,fpstore_mis"))
kono
parents:
diff changeset
153 ;; "m8_slot1, nothing*2")
kono
parents:
diff changeset
154
kono
parents:
diff changeset
155 ;; Control-transfer instructions execute in the Branch Unit in the
kono
parents:
diff changeset
156 ;; slot1.
kono
parents:
diff changeset
157
kono
parents:
diff changeset
158 (define_insn_reservation "m8_cti" 1
kono
parents:
diff changeset
159 (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
160 (eq_attr "type" "cbcond,uncond_cbcond,branch,call,sibcall,call_no_delay_slot,uncond_branch,return"))
kono
parents:
diff changeset
161 "m8_slot1")
kono
parents:
diff changeset
162
kono
parents:
diff changeset
163 ;; Many instructions executing in the Floating-point and Graphics Unit
kono
parents:
diff changeset
164 ;; (FGU) serving slot3 feature a default latency of 9 cycles.
kono
parents:
diff changeset
165
kono
parents:
diff changeset
166 (define_insn_reservation "m8_fp" 9
kono
parents:
diff changeset
167 (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
168 (ior (eq_attr "type" "fpmove,fpcmove,fpcrmove,fp,fpcmp,fpmul,fgm_pack,fgm_mul,pdist")
kono
parents:
diff changeset
169 (and (eq_attr "type" "fga")
kono
parents:
diff changeset
170 (eq_attr "subtype" "fpu"))))
kono
parents:
diff changeset
171 "m8_slot3, nothing*8")
kono
parents:
diff changeset
172
kono
parents:
diff changeset
173 ;; Floating-point division and floating-point square-root instructions
kono
parents:
diff changeset
174 ;; have high latencies. They execute in the FGU.
kono
parents:
diff changeset
175
kono
parents:
diff changeset
176 (define_insn_reservation "m8_fpdivs" 26
kono
parents:
diff changeset
177 (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
178 (eq_attr "type" "fpdivs"))
kono
parents:
diff changeset
179 "m8_slot3, nothing*25")
kono
parents:
diff changeset
180
kono
parents:
diff changeset
181 (define_insn_reservation "m8_fpsqrts" 33
kono
parents:
diff changeset
182 (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
183 (eq_attr "type" "fpsqrts"))
kono
parents:
diff changeset
184 "m8_slot3, nothing*32")
kono
parents:
diff changeset
185
kono
parents:
diff changeset
186 (define_insn_reservation "m8_fpdivd" 30
kono
parents:
diff changeset
187 (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
188 (eq_attr "type" "fpdivd"))
kono
parents:
diff changeset
189 "m8_slot3, nothing*29")
kono
parents:
diff changeset
190
kono
parents:
diff changeset
191 (define_insn_reservation "m8_fpsqrtd" 41
kono
parents:
diff changeset
192 (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
193 (eq_attr "type" "fpsqrtd"))
kono
parents:
diff changeset
194 "m8_slot3, nothing*40")
kono
parents:
diff changeset
195
kono
parents:
diff changeset
196 ;; SIMD VIS instructions executing in the Floating-point and graphics
kono
parents:
diff changeset
197 ;; unit (FPG) in slot3 usually have a latency of 5 cycles.
kono
parents:
diff changeset
198 ;;
kono
parents:
diff changeset
199 ;; However, the latency for many instructions is only 3 cycles if the
kono
parents:
diff changeset
200 ;; consumer can also be executed in 3 cycles. We model this with a
kono
parents:
diff changeset
201 ;; bypass. In these cases the instructions are executed in one of the
kono
parents:
diff changeset
202 ;; two 3-cycle crypto units (SPU, also known as "v3-pipes") in slots 2
kono
parents:
diff changeset
203 ;; and 3.
kono
parents:
diff changeset
204
kono
parents:
diff changeset
205 (define_insn_reservation "m8_vis" 5
kono
parents:
diff changeset
206 (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
207 (ior (eq_attr "type" "viscmp,lzd")
kono
parents:
diff changeset
208 (and (eq_attr "type" "fga")
kono
parents:
diff changeset
209 (eq_attr "subtype" "maxmin,cmask,other"))
kono
parents:
diff changeset
210 (and (eq_attr "type" "vismv")
kono
parents:
diff changeset
211 (eq_attr "subtype" "single,movstouw"))
kono
parents:
diff changeset
212 (and (eq_attr "type" "visl")
kono
parents:
diff changeset
213 (eq_attr "subtype" "single"))))
kono
parents:
diff changeset
214 "m8_slot3, nothing*4")
kono
parents:
diff changeset
215
kono
parents:
diff changeset
216 (define_bypass 3 "m8_vis" "m8_vis")
kono
parents:
diff changeset
217
kono
parents:
diff changeset
218 (define_insn_reservation "m8_gsr" 5
kono
parents:
diff changeset
219 (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
220 (eq_attr "type" "gsr")
kono
parents:
diff changeset
221 (eq_attr "subtype" "alignaddr"))
kono
parents:
diff changeset
222 "m8_slot3, nothing*4")
kono
parents:
diff changeset
223
kono
parents:
diff changeset
224 ;; A few VIS instructions have a latency of 1.
kono
parents:
diff changeset
225
kono
parents:
diff changeset
226 (define_insn_reservation "m8_vis_1cycle" 1
kono
parents:
diff changeset
227 (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
228 (ior (and (eq_attr "type" "vismv")
kono
parents:
diff changeset
229 (eq_attr "subtype" "double,movxtod,movdtox"))
kono
parents:
diff changeset
230 (and (eq_attr "type" "visl")
kono
parents:
diff changeset
231 (eq_attr "subtype" "double"))
kono
parents:
diff changeset
232 (and (eq_attr "type" "fga")
kono
parents:
diff changeset
233 (eq_attr "subtype" "addsub64"))))
kono
parents:
diff changeset
234 "m8_slot3")
kono
parents:
diff changeset
235
kono
parents:
diff changeset
236 ;; Reading and writing to the gsr register takes more than 70 cycles.
kono
parents:
diff changeset
237
kono
parents:
diff changeset
238 (define_insn_reservation "m8_gsr_reg" 70
kono
parents:
diff changeset
239 (and (eq_attr "cpu" "m8")
kono
parents:
diff changeset
240 (eq_attr "type" "gsr")
kono
parents:
diff changeset
241 (eq_attr "subtype" "reg"))
kono
parents:
diff changeset
242 "m8_slot3, nothing*69")