annotate gcc/config/sparc/niagara7.md @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
111
kono
parents:
diff changeset
1 ;; Scheduling description for Niagara-7
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2 ;; Copyright (C) 2016-2018 Free Software Foundation, Inc.
111
kono
parents:
diff changeset
3 ;;
kono
parents:
diff changeset
4 ;; This file is part of GCC.
kono
parents:
diff changeset
5 ;;
kono
parents:
diff changeset
6 ;; GCC is free software; you can redistribute it and/or modify
kono
parents:
diff changeset
7 ;; it under the terms of the GNU General Public License as published by
kono
parents:
diff changeset
8 ;; the Free Software Foundation; either version 3, or (at your option)
kono
parents:
diff changeset
9 ;; any later version.
kono
parents:
diff changeset
10 ;;
kono
parents:
diff changeset
11 ;; GCC is distributed in the hope that it will be useful,
kono
parents:
diff changeset
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
kono
parents:
diff changeset
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
kono
parents:
diff changeset
14 ;; GNU General Public License for more details.
kono
parents:
diff changeset
15 ;;
kono
parents:
diff changeset
16 ;; You should have received a copy of the GNU General Public License
kono
parents:
diff changeset
17 ;; along with GCC; see the file COPYING3. If not see
kono
parents:
diff changeset
18 ;; <http://www.gnu.org/licenses/>.
kono
parents:
diff changeset
19
kono
parents:
diff changeset
20 (define_automaton "niagara7_0")
kono
parents:
diff changeset
21
kono
parents:
diff changeset
22 ;; The S4 core has a dual-issue queue. This queue is divided into two
kono
parents:
diff changeset
23 ;; slots. One instruction can be issued each cycle to each slot, and
kono
parents:
diff changeset
24 ;; up to 2 instructions are committed each cycle. Each slot serves
kono
parents:
diff changeset
25 ;; several execution units, as depicted below:
kono
parents:
diff changeset
26 ;;
kono
parents:
diff changeset
27 ;;
kono
parents:
diff changeset
28 ;; m7_slot0 - Integer unit.
kono
parents:
diff changeset
29 ;; - Load/Store unit.
kono
parents:
diff changeset
30 ;; === QUEUE ==>
kono
parents:
diff changeset
31 ;;
kono
parents:
diff changeset
32 ;; m7_slot1 - Integer unit.
kono
parents:
diff changeset
33 ;; - Branch unit.
kono
parents:
diff changeset
34 ;; - Floating-point and graphics unit.
kono
parents:
diff changeset
35 ;; - 3-cycles crypto unit.
kono
parents:
diff changeset
36
kono
parents:
diff changeset
37 (define_cpu_unit "n7_slot0,n7_slot1" "niagara7_0")
kono
parents:
diff changeset
38
kono
parents:
diff changeset
39 ;; Some instructions stall the pipeline and avoid any other
kono
parents:
diff changeset
40 ;; instruction to be issued in the same cycle. We assume the same for
kono
parents:
diff changeset
41 ;; multi-instruction insns.
kono
parents:
diff changeset
42
kono
parents:
diff changeset
43 (define_reservation "n7_single_issue" "n7_slot0 + n7_slot1")
kono
parents:
diff changeset
44
kono
parents:
diff changeset
45 (define_insn_reservation "n7_single" 1
kono
parents:
diff changeset
46 (and (eq_attr "cpu" "niagara7")
kono
parents:
diff changeset
47 (eq_attr "type" "multi,savew,flushw,trap"))
kono
parents:
diff changeset
48 "n7_single_issue")
kono
parents:
diff changeset
49
kono
parents:
diff changeset
50 ;; Most of the instructions executing in the integer unit have a
kono
parents:
diff changeset
51 ;; latency of 1.
kono
parents:
diff changeset
52
kono
parents:
diff changeset
53 (define_insn_reservation "n7_integer" 1
kono
parents:
diff changeset
54 (and (eq_attr "cpu" "niagara7")
kono
parents:
diff changeset
55 (eq_attr "type" "ialu,ialuX,shift,cmove,compare"))
kono
parents:
diff changeset
56 "(n7_slot0 | n7_slot1)")
kono
parents:
diff changeset
57
kono
parents:
diff changeset
58 ;; Flushing the instruction memory takes 27 cycles.
kono
parents:
diff changeset
59
kono
parents:
diff changeset
60 (define_insn_reservation "n7_iflush" 27
kono
parents:
diff changeset
61 (and (eq_attr "cpu" "niagara7")
kono
parents:
diff changeset
62 (eq_attr "type" "iflush"))
kono
parents:
diff changeset
63 "(n7_slot0 | n7_slot1), nothing*26")
kono
parents:
diff changeset
64
kono
parents:
diff changeset
65 ;; The integer multiplication instructions have a latency of 12 cycles
kono
parents:
diff changeset
66 ;; and execute in the integer unit.
kono
parents:
diff changeset
67 ;;
kono
parents:
diff changeset
68 ;; Likewise for array*, edge* and pdistn instructions.
kono
parents:
diff changeset
69
kono
parents:
diff changeset
70 (define_insn_reservation "n7_imul" 12
kono
parents:
diff changeset
71 (and (eq_attr "cpu" "niagara7")
kono
parents:
diff changeset
72 (eq_attr "type" "imul,array,edge,edgen,pdistn"))
kono
parents:
diff changeset
73 "(n7_slot0 | n7_slot1), nothing*11")
kono
parents:
diff changeset
74
kono
parents:
diff changeset
75 ;; The integer division instructions have a latency of 35 cycles and
kono
parents:
diff changeset
76 ;; execute in the integer unit.
kono
parents:
diff changeset
77
kono
parents:
diff changeset
78 (define_insn_reservation "n7_idiv" 35
kono
parents:
diff changeset
79 (and (eq_attr "cpu" "niagara7")
kono
parents:
diff changeset
80 (eq_attr "type" "idiv"))
kono
parents:
diff changeset
81 "(n7_slot0 | n7_slot1), nothing*34")
kono
parents:
diff changeset
82
kono
parents:
diff changeset
83 ;; Both integer and floating-point load instructions have a latency of
kono
parents:
diff changeset
84 ;; 5 cycles, and execute in the slot0.
kono
parents:
diff changeset
85 ;;
kono
parents:
diff changeset
86 ;; The prefetch instruction also executes in the load/store unit, but
kono
parents:
diff changeset
87 ;; its latency is only 1 cycle.
kono
parents:
diff changeset
88
kono
parents:
diff changeset
89 (define_insn_reservation "n7_load" 5
kono
parents:
diff changeset
90 (and (eq_attr "cpu" "niagara7")
kono
parents:
diff changeset
91 (ior (eq_attr "type" "fpload,sload")
kono
parents:
diff changeset
92 (and (eq_attr "type" "load")
kono
parents:
diff changeset
93 (eq_attr "subtype" "regular"))))
kono
parents:
diff changeset
94 "n7_slot0, nothing*4")
kono
parents:
diff changeset
95
kono
parents:
diff changeset
96 (define_insn_reservation "n7_prefetch" 1
kono
parents:
diff changeset
97 (and (eq_attr "cpu" "niagara7")
kono
parents:
diff changeset
98 (eq_attr "type" "load")
kono
parents:
diff changeset
99 (eq_attr "subtype" "prefetch"))
kono
parents:
diff changeset
100 "n7_slot0")
kono
parents:
diff changeset
101
kono
parents:
diff changeset
102 ;; Both integer and floating-point store instructions have a latency
kono
parents:
diff changeset
103 ;; of 1 cycle, and execute in the load/store unit in slot0.
kono
parents:
diff changeset
104
kono
parents:
diff changeset
105 (define_insn_reservation "n7_store" 1
kono
parents:
diff changeset
106 (and (eq_attr "cpu" "niagara7")
kono
parents:
diff changeset
107 (eq_attr "type" "store,fpstore"))
kono
parents:
diff changeset
108 "n7_slot0")
kono
parents:
diff changeset
109
kono
parents:
diff changeset
110 ;; Control-transfer instructions execute in the Branch Unit in the
kono
parents:
diff changeset
111 ;; slot1.
kono
parents:
diff changeset
112
kono
parents:
diff changeset
113 (define_insn_reservation "n7_cti" 1
kono
parents:
diff changeset
114 (and (eq_attr "cpu" "niagara7")
kono
parents:
diff changeset
115 (eq_attr "type" "cbcond,uncond_cbcond,branch,call,sibcall,call_no_delay_slot,uncond_branch,return"))
kono
parents:
diff changeset
116 "n7_slot1")
kono
parents:
diff changeset
117
kono
parents:
diff changeset
118 ;; Many instructions executing in the Floating-point and Graphics unit
kono
parents:
diff changeset
119 ;; in the slot1 feature a latency of 11 cycles.
kono
parents:
diff changeset
120
kono
parents:
diff changeset
121 (define_insn_reservation "n7_fp" 11
kono
parents:
diff changeset
122 (and (eq_attr "cpu" "niagara7")
kono
parents:
diff changeset
123 (ior (eq_attr "type" "fpmove,fpcmove,fpcrmove,fp,fpcmp,fpmul,fgm_pack,fgm_mul,pdist")
kono
parents:
diff changeset
124 (and (eq_attr "type" "fga")
kono
parents:
diff changeset
125 (eq_attr "subtype" "fpu,maxmin"))))
kono
parents:
diff changeset
126 "n7_slot1, nothing*10")
kono
parents:
diff changeset
127
kono
parents:
diff changeset
128 ;; Floating-point division and floating-point square-root instructions
kono
parents:
diff changeset
129 ;; have high latencies. They execute in the floating-point and
kono
parents:
diff changeset
130 ;; graphics unit in the slot1.
kono
parents:
diff changeset
131
kono
parents:
diff changeset
132
kono
parents:
diff changeset
133 (define_insn_reservation "n7_fpdivs" 24
kono
parents:
diff changeset
134 (and (eq_attr "cpu" "niagara7")
kono
parents:
diff changeset
135 (eq_attr "type" "fpdivs,fpsqrts"))
kono
parents:
diff changeset
136 "n7_slot1, nothing*23")
kono
parents:
diff changeset
137
kono
parents:
diff changeset
138 (define_insn_reservation "n7_fpdivd" 37
kono
parents:
diff changeset
139 (and (eq_attr "cpu" "niagara7")
kono
parents:
diff changeset
140 (eq_attr "type" "fpdivd,fpsqrtd"))
kono
parents:
diff changeset
141 "n7_slot1, nothing*36")
kono
parents:
diff changeset
142
kono
parents:
diff changeset
143 ;; SIMD VIS instructions executing in the Floating-point and graphics
kono
parents:
diff changeset
144 ;; unit (FPG) in slot1 usually have a latency of either 11 or 12
kono
parents:
diff changeset
145 ;; cycles.
kono
parents:
diff changeset
146 ;;
kono
parents:
diff changeset
147 ;; However, the latency for many instructions is only 3 cycles if the
kono
parents:
diff changeset
148 ;; consumer can also be executed in 3 cycles. We model this with a
kono
parents:
diff changeset
149 ;; bypass. In these cases the instructions are executed in the
kono
parents:
diff changeset
150 ;; 3-cycle crypto unit which also serves slot1.
kono
parents:
diff changeset
151
kono
parents:
diff changeset
152 (define_insn_reservation "n7_vis_11cycles" 11
kono
parents:
diff changeset
153 (and (eq_attr "cpu" "niagara7")
kono
parents:
diff changeset
154 (ior (and (eq_attr "type" "fga")
kono
parents:
diff changeset
155 (eq_attr "subtype" "addsub64,other"))
kono
parents:
diff changeset
156 (and (eq_attr "type" "vismv")
kono
parents:
diff changeset
157 (eq_attr "subtype" "double,single"))
kono
parents:
diff changeset
158 (and (eq_attr "type" "visl")
kono
parents:
diff changeset
159 (eq_attr "subtype" "double,single"))))
kono
parents:
diff changeset
160 "n7_slot1, nothing*10")
kono
parents:
diff changeset
161
kono
parents:
diff changeset
162 (define_insn_reservation "n7_vis_12cycles" 12
kono
parents:
diff changeset
163 (and (eq_attr "cpu" "niagara7")
kono
parents:
diff changeset
164 (ior (eq_attr "type" "bmask,viscmp")
kono
parents:
diff changeset
165 (and (eq_attr "type" "fga")
kono
parents:
diff changeset
166 (eq_attr "subtype" "cmask"))
kono
parents:
diff changeset
167 (and (eq_attr "type" "vismv")
kono
parents:
diff changeset
168 (eq_attr "subtype" "movstouw"))))
kono
parents:
diff changeset
169 "n7_slot1, nothing*11")
kono
parents:
diff changeset
170
kono
parents:
diff changeset
171 (define_bypass 3 "n7_vis_*" "n7_vis_*")
kono
parents:
diff changeset
172
kono
parents:
diff changeset
173 ;; Some other VIS instructions have a latency of 12 cycles, and won't
kono
parents:
diff changeset
174 ;; be executed in the 3-cycle crypto pipe.
kono
parents:
diff changeset
175
kono
parents:
diff changeset
176 (define_insn_reservation "n7_lzd" 12
kono
parents:
diff changeset
177 (and (eq_attr "cpu" "niagara7")
kono
parents:
diff changeset
178 (ior (eq_attr "type" "lzd,")
kono
parents:
diff changeset
179 (and (eq_attr "type" "gsr")
kono
parents:
diff changeset
180 (eq_attr "subtype" "alignaddr"))))
kono
parents:
diff changeset
181 "n7_slot1, nothing*11")
kono
parents:
diff changeset
182
kono
parents:
diff changeset
183 ;; A couple of VIS instructions feature very low latencies in the M7.
kono
parents:
diff changeset
184
kono
parents:
diff changeset
185 (define_insn_reservation "n7_single_vis" 1
kono
parents:
diff changeset
186 (and (eq_attr "cpu" "niagara7")
kono
parents:
diff changeset
187 (eq_attr "type" "vismv")
kono
parents:
diff changeset
188 (eq_attr "subtype" "movxtod"))
kono
parents:
diff changeset
189 "n7_slot1")
kono
parents:
diff changeset
190
kono
parents:
diff changeset
191 (define_insn_reservation "n7_double_vis" 2
kono
parents:
diff changeset
192 (and (eq_attr "cpu" "niagara7")
kono
parents:
diff changeset
193 (eq_attr "type" "vismv")
kono
parents:
diff changeset
194 (eq_attr "subtype" "movdtox"))
kono
parents:
diff changeset
195 "n7_slot1, nothing")
kono
parents:
diff changeset
196
kono
parents:
diff changeset
197 ;; Reading and writing to the gsr register takes a high number of
kono
parents:
diff changeset
198 ;; cycles that is not documented in the PRM. Let's use the same value
kono
parents:
diff changeset
199 ;; than the M8.
kono
parents:
diff changeset
200
kono
parents:
diff changeset
201 (define_insn_reservation "n7_gsr_reg" 70
kono
parents:
diff changeset
202 (and (eq_attr "cpu" "niagara7")
kono
parents:
diff changeset
203 (eq_attr "type" "gsr")
kono
parents:
diff changeset
204 (eq_attr "subtype" "reg"))
kono
parents:
diff changeset
205 "n7_slot1, nothing*70")