Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/sparc/niagara7.md @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | |
children | 84e7813d76e9 |
comparison
equal
deleted
inserted
replaced
68:561a7518be6b | 111:04ced10e8804 |
---|---|
1 ;; Scheduling description for Niagara-7 | |
2 ;; Copyright (C) 2016-2017 Free Software Foundation, Inc. | |
3 ;; | |
4 ;; This file is part of GCC. | |
5 ;; | |
6 ;; GCC is free software; you can redistribute it and/or modify | |
7 ;; it under the terms of the GNU General Public License as published by | |
8 ;; the Free Software Foundation; either version 3, or (at your option) | |
9 ;; any later version. | |
10 ;; | |
11 ;; GCC is distributed in the hope that it will be useful, | |
12 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 ;; GNU General Public License for more details. | |
15 ;; | |
16 ;; You should have received a copy of the GNU General Public License | |
17 ;; along with GCC; see the file COPYING3. If not see | |
18 ;; <http://www.gnu.org/licenses/>. | |
19 | |
20 (define_automaton "niagara7_0") | |
21 | |
22 ;; The S4 core has a dual-issue queue. This queue is divided into two | |
23 ;; slots. One instruction can be issued each cycle to each slot, and | |
24 ;; up to 2 instructions are committed each cycle. Each slot serves | |
25 ;; several execution units, as depicted below: | |
26 ;; | |
27 ;; | |
28 ;; m7_slot0 - Integer unit. | |
29 ;; - Load/Store unit. | |
30 ;; === QUEUE ==> | |
31 ;; | |
32 ;; m7_slot1 - Integer unit. | |
33 ;; - Branch unit. | |
34 ;; - Floating-point and graphics unit. | |
35 ;; - 3-cycles crypto unit. | |
36 | |
37 (define_cpu_unit "n7_slot0,n7_slot1" "niagara7_0") | |
38 | |
39 ;; Some instructions stall the pipeline and avoid any other | |
40 ;; instruction to be issued in the same cycle. We assume the same for | |
41 ;; multi-instruction insns. | |
42 | |
43 (define_reservation "n7_single_issue" "n7_slot0 + n7_slot1") | |
44 | |
45 (define_insn_reservation "n7_single" 1 | |
46 (and (eq_attr "cpu" "niagara7") | |
47 (eq_attr "type" "multi,savew,flushw,trap")) | |
48 "n7_single_issue") | |
49 | |
50 ;; Most of the instructions executing in the integer unit have a | |
51 ;; latency of 1. | |
52 | |
53 (define_insn_reservation "n7_integer" 1 | |
54 (and (eq_attr "cpu" "niagara7") | |
55 (eq_attr "type" "ialu,ialuX,shift,cmove,compare")) | |
56 "(n7_slot0 | n7_slot1)") | |
57 | |
58 ;; Flushing the instruction memory takes 27 cycles. | |
59 | |
60 (define_insn_reservation "n7_iflush" 27 | |
61 (and (eq_attr "cpu" "niagara7") | |
62 (eq_attr "type" "iflush")) | |
63 "(n7_slot0 | n7_slot1), nothing*26") | |
64 | |
65 ;; The integer multiplication instructions have a latency of 12 cycles | |
66 ;; and execute in the integer unit. | |
67 ;; | |
68 ;; Likewise for array*, edge* and pdistn instructions. | |
69 | |
70 (define_insn_reservation "n7_imul" 12 | |
71 (and (eq_attr "cpu" "niagara7") | |
72 (eq_attr "type" "imul,array,edge,edgen,pdistn")) | |
73 "(n7_slot0 | n7_slot1), nothing*11") | |
74 | |
75 ;; The integer division instructions have a latency of 35 cycles and | |
76 ;; execute in the integer unit. | |
77 | |
78 (define_insn_reservation "n7_idiv" 35 | |
79 (and (eq_attr "cpu" "niagara7") | |
80 (eq_attr "type" "idiv")) | |
81 "(n7_slot0 | n7_slot1), nothing*34") | |
82 | |
83 ;; Both integer and floating-point load instructions have a latency of | |
84 ;; 5 cycles, and execute in the slot0. | |
85 ;; | |
86 ;; The prefetch instruction also executes in the load/store unit, but | |
87 ;; its latency is only 1 cycle. | |
88 | |
89 (define_insn_reservation "n7_load" 5 | |
90 (and (eq_attr "cpu" "niagara7") | |
91 (ior (eq_attr "type" "fpload,sload") | |
92 (and (eq_attr "type" "load") | |
93 (eq_attr "subtype" "regular")))) | |
94 "n7_slot0, nothing*4") | |
95 | |
96 (define_insn_reservation "n7_prefetch" 1 | |
97 (and (eq_attr "cpu" "niagara7") | |
98 (eq_attr "type" "load") | |
99 (eq_attr "subtype" "prefetch")) | |
100 "n7_slot0") | |
101 | |
102 ;; Both integer and floating-point store instructions have a latency | |
103 ;; of 1 cycle, and execute in the load/store unit in slot0. | |
104 | |
105 (define_insn_reservation "n7_store" 1 | |
106 (and (eq_attr "cpu" "niagara7") | |
107 (eq_attr "type" "store,fpstore")) | |
108 "n7_slot0") | |
109 | |
110 ;; Control-transfer instructions execute in the Branch Unit in the | |
111 ;; slot1. | |
112 | |
113 (define_insn_reservation "n7_cti" 1 | |
114 (and (eq_attr "cpu" "niagara7") | |
115 (eq_attr "type" "cbcond,uncond_cbcond,branch,call,sibcall,call_no_delay_slot,uncond_branch,return")) | |
116 "n7_slot1") | |
117 | |
118 ;; Many instructions executing in the Floating-point and Graphics unit | |
119 ;; in the slot1 feature a latency of 11 cycles. | |
120 | |
121 (define_insn_reservation "n7_fp" 11 | |
122 (and (eq_attr "cpu" "niagara7") | |
123 (ior (eq_attr "type" "fpmove,fpcmove,fpcrmove,fp,fpcmp,fpmul,fgm_pack,fgm_mul,pdist") | |
124 (and (eq_attr "type" "fga") | |
125 (eq_attr "subtype" "fpu,maxmin")))) | |
126 "n7_slot1, nothing*10") | |
127 | |
128 ;; Floating-point division and floating-point square-root instructions | |
129 ;; have high latencies. They execute in the floating-point and | |
130 ;; graphics unit in the slot1. | |
131 | |
132 | |
133 (define_insn_reservation "n7_fpdivs" 24 | |
134 (and (eq_attr "cpu" "niagara7") | |
135 (eq_attr "type" "fpdivs,fpsqrts")) | |
136 "n7_slot1, nothing*23") | |
137 | |
138 (define_insn_reservation "n7_fpdivd" 37 | |
139 (and (eq_attr "cpu" "niagara7") | |
140 (eq_attr "type" "fpdivd,fpsqrtd")) | |
141 "n7_slot1, nothing*36") | |
142 | |
143 ;; SIMD VIS instructions executing in the Floating-point and graphics | |
144 ;; unit (FPG) in slot1 usually have a latency of either 11 or 12 | |
145 ;; cycles. | |
146 ;; | |
147 ;; However, the latency for many instructions is only 3 cycles if the | |
148 ;; consumer can also be executed in 3 cycles. We model this with a | |
149 ;; bypass. In these cases the instructions are executed in the | |
150 ;; 3-cycle crypto unit which also serves slot1. | |
151 | |
152 (define_insn_reservation "n7_vis_11cycles" 11 | |
153 (and (eq_attr "cpu" "niagara7") | |
154 (ior (and (eq_attr "type" "fga") | |
155 (eq_attr "subtype" "addsub64,other")) | |
156 (and (eq_attr "type" "vismv") | |
157 (eq_attr "subtype" "double,single")) | |
158 (and (eq_attr "type" "visl") | |
159 (eq_attr "subtype" "double,single")))) | |
160 "n7_slot1, nothing*10") | |
161 | |
162 (define_insn_reservation "n7_vis_12cycles" 12 | |
163 (and (eq_attr "cpu" "niagara7") | |
164 (ior (eq_attr "type" "bmask,viscmp") | |
165 (and (eq_attr "type" "fga") | |
166 (eq_attr "subtype" "cmask")) | |
167 (and (eq_attr "type" "vismv") | |
168 (eq_attr "subtype" "movstouw")))) | |
169 "n7_slot1, nothing*11") | |
170 | |
171 (define_bypass 3 "n7_vis_*" "n7_vis_*") | |
172 | |
173 ;; Some other VIS instructions have a latency of 12 cycles, and won't | |
174 ;; be executed in the 3-cycle crypto pipe. | |
175 | |
176 (define_insn_reservation "n7_lzd" 12 | |
177 (and (eq_attr "cpu" "niagara7") | |
178 (ior (eq_attr "type" "lzd,") | |
179 (and (eq_attr "type" "gsr") | |
180 (eq_attr "subtype" "alignaddr")))) | |
181 "n7_slot1, nothing*11") | |
182 | |
183 ;; A couple of VIS instructions feature very low latencies in the M7. | |
184 | |
185 (define_insn_reservation "n7_single_vis" 1 | |
186 (and (eq_attr "cpu" "niagara7") | |
187 (eq_attr "type" "vismv") | |
188 (eq_attr "subtype" "movxtod")) | |
189 "n7_slot1") | |
190 | |
191 (define_insn_reservation "n7_double_vis" 2 | |
192 (and (eq_attr "cpu" "niagara7") | |
193 (eq_attr "type" "vismv") | |
194 (eq_attr "subtype" "movdtox")) | |
195 "n7_slot1, nothing") | |
196 | |
197 ;; Reading and writing to the gsr register takes a high number of | |
198 ;; cycles that is not documented in the PRM. Let's use the same value | |
199 ;; than the M8. | |
200 | |
201 (define_insn_reservation "n7_gsr_reg" 70 | |
202 (and (eq_attr "cpu" "niagara7") | |
203 (eq_attr "type" "gsr") | |
204 (eq_attr "subtype" "reg")) | |
205 "n7_slot1, nothing*70") |