Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/rs6000/power9.md @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | |
children | 84e7813d76e9 |
comparison
equal
deleted
inserted
replaced
68:561a7518be6b | 111:04ced10e8804 |
---|---|
1 ;; Scheduling description for IBM POWER9 processor. | |
2 ;; Copyright (C) 2016-2017 Free Software Foundation, Inc. | |
3 ;; | |
4 ;; Contributed by Pat Haugen (pthaugen@us.ibm.com). | |
5 | |
6 ;; This file is part of GCC. | |
7 ;; | |
8 ;; GCC is free software; you can redistribute it and/or modify it | |
9 ;; under the terms of the GNU General Public License as published | |
10 ;; by the Free Software Foundation; either version 3, or (at your | |
11 ;; option) any later version. | |
12 ;; | |
13 ;; GCC is distributed in the hope that it will be useful, but WITHOUT | |
14 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
15 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
16 ;; License for more details. | |
17 ;; | |
18 ;; You should have received a copy of the GNU General Public License | |
19 ;; along with GCC; see the file COPYING3. If not see | |
20 ;; <http://www.gnu.org/licenses/>. | |
21 | |
22 (define_automaton "power9dsp,power9lsu,power9vsu,power9misc") | |
23 | |
24 (define_cpu_unit "lsu0_power9,lsu1_power9,lsu2_power9,lsu3_power9" "power9lsu") | |
25 (define_cpu_unit "vsu0_power9,vsu1_power9,vsu2_power9,vsu3_power9" "power9vsu") | |
26 ; Two vector permute units, part of vsu | |
27 (define_cpu_unit "prm0_power9,prm1_power9" "power9vsu") | |
28 ; Two fixed point divide units, not pipelined | |
29 (define_cpu_unit "fx_div0_power9,fx_div1_power9" "power9misc") | |
30 (define_cpu_unit "bru_power9,cryptu_power9,dfu_power9" "power9misc") | |
31 | |
32 (define_cpu_unit "x0_power9,x1_power9,xa0_power9,xa1_power9, | |
33 x2_power9,x3_power9,xb0_power9,xb1_power9, | |
34 br0_power9,br1_power9" "power9dsp") | |
35 | |
36 | |
37 ; Dispatch port reservations | |
38 ; | |
39 ; Power9 can dispatch a maximum of 6 iops per cycle with the following | |
40 ; general restrictions (other restrictions also apply): | |
41 ; 1) At most 2 iops per execution slice | |
42 ; 2) At most 2 iops to the branch unit | |
43 ; Note that insn position in a dispatch group of 6 insns does not infer which | |
44 ; execution slice the insn is routed to. The units are used to infer the | |
45 ; conflicts that exist (i.e. an 'even' requirement will preclude dispatch | |
46 ; with 2 insns with 'superslice' requirement). | |
47 | |
48 ; The xa0/xa1 units really represent the 3rd dispatch port for a superslice but | |
49 ; are listed as separate units to allow those insns that preclude its use to | |
50 ; still be scheduled two to a superslice while reserving the 3rd slot. The | |
51 ; same applies for xb0/xb1. | |
52 (define_reservation "DU_xa_power9" "xa0_power9+xa1_power9") | |
53 (define_reservation "DU_xb_power9" "xb0_power9+xb1_power9") | |
54 | |
55 ; Any execution slice dispatch | |
56 (define_reservation "DU_any_power9" | |
57 "x0_power9|x1_power9|DU_xa_power9|x2_power9|x3_power9| | |
58 DU_xb_power9") | |
59 | |
60 ; Even slice, actually takes even/odd slots | |
61 (define_reservation "DU_even_power9" "x0_power9+x1_power9|x2_power9+x3_power9") | |
62 | |
63 ; Slice plus 3rd slot | |
64 (define_reservation "DU_slice_3_power9" | |
65 "x0_power9+xa0_power9|x1_power9+xa1_power9| | |
66 x2_power9+xb0_power9|x3_power9+xb1_power9") | |
67 | |
68 ; Superslice | |
69 (define_reservation "DU_super_power9" | |
70 "x0_power9+x1_power9|x2_power9+x3_power9") | |
71 | |
72 ; 2-way cracked | |
73 (define_reservation "DU_C2_power9" "x0_power9+x1_power9| | |
74 x1_power9+DU_xa_power9| | |
75 x1_power9+x2_power9| | |
76 DU_xa_power9+x2_power9| | |
77 x2_power9+x3_power9| | |
78 x3_power9+DU_xb_power9") | |
79 | |
80 ; 2-way cracked plus 3rd slot | |
81 (define_reservation "DU_C2_3_power9" "x0_power9+x1_power9+xa0_power9| | |
82 x1_power9+x2_power9+xa0_power9| | |
83 x1_power9+x2_power9+xb0_power9| | |
84 x2_power9+x3_power9+xb0_power9") | |
85 | |
86 ; 3-way cracked (consumes whole decode/dispatch cycle) | |
87 (define_reservation "DU_C3_power9" | |
88 "x0_power9+x1_power9+xa0_power9+xa1_power9+x2_power9+ | |
89 x3_power9+xb0_power9+xb1_power9+br0_power9+br1_power9") | |
90 | |
91 ; Branch ports | |
92 (define_reservation "DU_branch_power9" "br0_power9|br1_power9") | |
93 | |
94 | |
95 ; Execution unit reservations | |
96 (define_reservation "LSU_power9" | |
97 "lsu0_power9|lsu1_power9|lsu2_power9|lsu3_power9") | |
98 | |
99 (define_reservation "LSU_pair_power9" | |
100 "lsu0_power9+lsu1_power9|lsu1_power9+lsu2_power9| | |
101 lsu2_power9+lsu3_power9|lsu3_power9+lsu0_power9") | |
102 | |
103 (define_reservation "VSU_power9" | |
104 "vsu0_power9|vsu1_power9|vsu2_power9|vsu3_power9") | |
105 | |
106 (define_reservation "VSU_super_power9" | |
107 "vsu0_power9+vsu1_power9|vsu2_power9+vsu3_power9") | |
108 | |
109 (define_reservation "VSU_PRM_power9" "prm0_power9|prm1_power9") | |
110 | |
111 | |
112 ; LS Unit | |
113 (define_insn_reservation "power9-load" 4 | |
114 (and (eq_attr "type" "load") | |
115 (eq_attr "sign_extend" "no") | |
116 (eq_attr "update" "no") | |
117 (eq_attr "cpu" "power9")) | |
118 "DU_any_power9,LSU_power9") | |
119 | |
120 (define_insn_reservation "power9-load-update" 4 | |
121 (and (eq_attr "type" "load") | |
122 (eq_attr "sign_extend" "no") | |
123 (eq_attr "update" "yes") | |
124 (eq_attr "cpu" "power9")) | |
125 "DU_C2_power9,LSU_power9+VSU_power9") | |
126 | |
127 (define_insn_reservation "power9-load-ext" 6 | |
128 (and (eq_attr "type" "load") | |
129 (eq_attr "sign_extend" "yes") | |
130 (eq_attr "update" "no") | |
131 (eq_attr "cpu" "power9")) | |
132 "DU_C2_power9,LSU_power9") | |
133 | |
134 (define_insn_reservation "power9-load-ext-update" 6 | |
135 (and (eq_attr "type" "load") | |
136 (eq_attr "sign_extend" "yes") | |
137 (eq_attr "update" "yes") | |
138 (eq_attr "cpu" "power9")) | |
139 "DU_C3_power9,LSU_power9+VSU_power9") | |
140 | |
141 (define_insn_reservation "power9-fpload-double" 4 | |
142 (and (eq_attr "type" "fpload") | |
143 (eq_attr "update" "no") | |
144 (eq_attr "size" "64") | |
145 (eq_attr "cpu" "power9")) | |
146 "DU_slice_3_power9,LSU_power9") | |
147 | |
148 (define_insn_reservation "power9-fpload-update-double" 4 | |
149 (and (eq_attr "type" "fpload") | |
150 (eq_attr "update" "yes") | |
151 (eq_attr "size" "64") | |
152 (eq_attr "cpu" "power9")) | |
153 "DU_C2_3_power9,LSU_power9+VSU_power9") | |
154 | |
155 ; SFmode loads are cracked and have additional 2 cycles over DFmode | |
156 (define_insn_reservation "power9-fpload-single" 6 | |
157 (and (eq_attr "type" "fpload") | |
158 (eq_attr "update" "no") | |
159 (eq_attr "size" "32") | |
160 (eq_attr "cpu" "power9")) | |
161 "DU_C2_3_power9,LSU_power9") | |
162 | |
163 (define_insn_reservation "power9-fpload-update-single" 6 | |
164 (and (eq_attr "type" "fpload") | |
165 (eq_attr "update" "yes") | |
166 (eq_attr "size" "32") | |
167 (eq_attr "cpu" "power9")) | |
168 "DU_C3_power9,LSU_power9+VSU_power9") | |
169 | |
170 (define_insn_reservation "power9-vecload" 5 | |
171 (and (eq_attr "type" "vecload") | |
172 (eq_attr "cpu" "power9")) | |
173 "DU_any_power9,LSU_pair_power9") | |
174 | |
175 ; Store data can issue 2 cycles after AGEN issue, 3 cycles for vector store | |
176 (define_insn_reservation "power9-store" 0 | |
177 (and (eq_attr "type" "store") | |
178 (eq_attr "update" "no") | |
179 (eq_attr "indexed" "no") | |
180 (eq_attr "cpu" "power9")) | |
181 "DU_slice_3_power9,LSU_power9") | |
182 | |
183 (define_insn_reservation "power9-store-indexed" 0 | |
184 (and (eq_attr "type" "store") | |
185 (eq_attr "update" "no") | |
186 (eq_attr "indexed" "yes") | |
187 (eq_attr "cpu" "power9")) | |
188 "DU_slice_3_power9,LSU_power9") | |
189 | |
190 ; Update forms have 2 cycle latency for updated addr reg | |
191 (define_insn_reservation "power9-store-update" 2 | |
192 (and (eq_attr "type" "store") | |
193 (eq_attr "update" "yes") | |
194 (eq_attr "indexed" "no") | |
195 (eq_attr "cpu" "power9")) | |
196 "DU_C2_3_power9,LSU_power9+VSU_power9") | |
197 | |
198 ; Update forms have 2 cycle latency for updated addr reg | |
199 (define_insn_reservation "power9-store-update-indexed" 2 | |
200 (and (eq_attr "type" "store") | |
201 (eq_attr "update" "yes") | |
202 (eq_attr "indexed" "yes") | |
203 (eq_attr "cpu" "power9")) | |
204 "DU_C2_3_power9,LSU_power9+VSU_power9") | |
205 | |
206 (define_insn_reservation "power9-fpstore" 0 | |
207 (and (eq_attr "type" "fpstore") | |
208 (eq_attr "update" "no") | |
209 (eq_attr "cpu" "power9")) | |
210 "DU_slice_3_power9,LSU_power9") | |
211 | |
212 ; Update forms have 2 cycle latency for updated addr reg | |
213 (define_insn_reservation "power9-fpstore-update" 2 | |
214 (and (eq_attr "type" "fpstore") | |
215 (eq_attr "update" "yes") | |
216 (eq_attr "cpu" "power9")) | |
217 "DU_C2_3_power9,LSU_power9+VSU_power9") | |
218 | |
219 (define_insn_reservation "power9-vecstore" 0 | |
220 (and (eq_attr "type" "vecstore") | |
221 (eq_attr "cpu" "power9")) | |
222 "DU_super_power9,LSU_pair_power9") | |
223 | |
224 (define_insn_reservation "power9-larx" 4 | |
225 (and (eq_attr "type" "load_l") | |
226 (eq_attr "cpu" "power9")) | |
227 "DU_any_power9,LSU_power9") | |
228 | |
229 (define_insn_reservation "power9-stcx" 2 | |
230 (and (eq_attr "type" "store_c") | |
231 (eq_attr "cpu" "power9")) | |
232 "DU_C2_3_power9,LSU_power9+VSU_power9") | |
233 | |
234 (define_insn_reservation "power9-sync" 4 | |
235 (and (eq_attr "type" "sync,isync") | |
236 (eq_attr "cpu" "power9")) | |
237 "DU_any_power9,LSU_power9") | |
238 | |
239 | |
240 ; VSU Execution Unit | |
241 | |
242 ; Fixed point ops | |
243 | |
244 ; Most ALU insns are simple 2 cycle, including record form | |
245 (define_insn_reservation "power9-alu" 2 | |
246 (and (ior (eq_attr "type" "add,exts,integer,logical,isel") | |
247 (and (eq_attr "type" "insert,shift") | |
248 (eq_attr "dot" "no"))) | |
249 (eq_attr "cpu" "power9")) | |
250 "DU_any_power9,VSU_power9") | |
251 ; 5 cycle CR latency | |
252 (define_bypass 5 "power9-alu" | |
253 "power9-crlogical,power9-mfcr,power9-mfcrf") | |
254 | |
255 ; Record form rotate/shift are cracked | |
256 (define_insn_reservation "power9-cracked-alu" 2 | |
257 (and (eq_attr "type" "insert,shift") | |
258 (eq_attr "dot" "yes") | |
259 (eq_attr "cpu" "power9")) | |
260 "DU_C2_power9,VSU_power9") | |
261 ; 7 cycle CR latency | |
262 (define_bypass 7 "power9-cracked-alu" | |
263 "power9-crlogical,power9-mfcr,power9-mfcrf") | |
264 | |
265 (define_insn_reservation "power9-alu2" 3 | |
266 (and (eq_attr "type" "cntlz,popcnt,trap") | |
267 (eq_attr "cpu" "power9")) | |
268 "DU_any_power9,VSU_power9") | |
269 ; 6 cycle CR latency | |
270 (define_bypass 6 "power9-alu2" | |
271 "power9-crlogical,power9-mfcr,power9-mfcrf") | |
272 | |
273 (define_insn_reservation "power9-cmp" 2 | |
274 (and (eq_attr "type" "cmp") | |
275 (eq_attr "cpu" "power9")) | |
276 "DU_any_power9,VSU_power9") | |
277 | |
278 | |
279 ; Treat 'two' and 'three' types as 2 or 3 way cracked | |
280 (define_insn_reservation "power9-two" 4 | |
281 (and (eq_attr "type" "two") | |
282 (eq_attr "cpu" "power9")) | |
283 "DU_C2_power9,VSU_power9") | |
284 | |
285 (define_insn_reservation "power9-three" 6 | |
286 (and (eq_attr "type" "three") | |
287 (eq_attr "cpu" "power9")) | |
288 "DU_C3_power9,VSU_power9") | |
289 | |
290 (define_insn_reservation "power9-mul" 5 | |
291 (and (eq_attr "type" "mul") | |
292 (eq_attr "dot" "no") | |
293 (eq_attr "cpu" "power9")) | |
294 "DU_any_power9,VSU_power9") | |
295 | |
296 (define_insn_reservation "power9-mul-compare" 5 | |
297 (and (eq_attr "type" "mul") | |
298 (eq_attr "dot" "yes") | |
299 (eq_attr "cpu" "power9")) | |
300 "DU_C2_power9,VSU_power9") | |
301 ; 10 cycle CR latency | |
302 (define_bypass 10 "power9-mul-compare" | |
303 "power9-crlogical,power9-mfcr,power9-mfcrf") | |
304 | |
305 ; Fixed point divides reserve the divide units for a minimum of 8 cycles | |
306 (define_insn_reservation "power9-idiv" 16 | |
307 (and (eq_attr "type" "div") | |
308 (eq_attr "size" "32") | |
309 (eq_attr "cpu" "power9")) | |
310 "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8") | |
311 | |
312 (define_insn_reservation "power9-ldiv" 24 | |
313 (and (eq_attr "type" "div") | |
314 (eq_attr "size" "64") | |
315 (eq_attr "cpu" "power9")) | |
316 "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8") | |
317 | |
318 (define_insn_reservation "power9-crlogical" 2 | |
319 (and (eq_attr "type" "cr_logical,delayed_cr") | |
320 (eq_attr "cpu" "power9")) | |
321 "DU_any_power9,VSU_power9") | |
322 | |
323 (define_insn_reservation "power9-mfcrf" 2 | |
324 (and (eq_attr "type" "mfcrf") | |
325 (eq_attr "cpu" "power9")) | |
326 "DU_any_power9,VSU_power9") | |
327 | |
328 (define_insn_reservation "power9-mfcr" 6 | |
329 (and (eq_attr "type" "mfcr") | |
330 (eq_attr "cpu" "power9")) | |
331 "DU_C3_power9,VSU_power9") | |
332 | |
333 ; Should differentiate between 1 cr field and > 1 since target of > 1 cr | |
334 ; is cracked | |
335 (define_insn_reservation "power9-mtcr" 2 | |
336 (and (eq_attr "type" "mtcr") | |
337 (eq_attr "cpu" "power9")) | |
338 "DU_any_power9,VSU_power9") | |
339 | |
340 ; Move to LR/CTR are executed in VSU | |
341 (define_insn_reservation "power9-mtjmpr" 5 | |
342 (and (eq_attr "type" "mtjmpr") | |
343 (eq_attr "cpu" "power9")) | |
344 "DU_any_power9,VSU_power9") | |
345 | |
346 ; Floating point/Vector ops | |
347 (define_insn_reservation "power9-fpsimple" 2 | |
348 (and (eq_attr "type" "fpsimple") | |
349 (eq_attr "cpu" "power9")) | |
350 "DU_slice_3_power9,VSU_power9") | |
351 | |
352 (define_insn_reservation "power9-fp" 7 | |
353 (and (eq_attr "type" "fp,dmul") | |
354 (eq_attr "cpu" "power9")) | |
355 "DU_slice_3_power9,VSU_power9") | |
356 | |
357 (define_insn_reservation "power9-fpcompare" 3 | |
358 (and (eq_attr "type" "fpcompare") | |
359 (eq_attr "cpu" "power9")) | |
360 "DU_slice_3_power9,VSU_power9") | |
361 | |
362 ; FP div/sqrt are executed in the VSU slices. They are not pipelined wrt other | |
363 ; divide insns, but for the most part do not block pipelined ops. | |
364 (define_insn_reservation "power9-sdiv" 22 | |
365 (and (eq_attr "type" "sdiv") | |
366 (eq_attr "cpu" "power9")) | |
367 "DU_slice_3_power9,VSU_power9") | |
368 | |
369 (define_insn_reservation "power9-ddiv" 33 | |
370 (and (eq_attr "type" "ddiv") | |
371 (eq_attr "cpu" "power9")) | |
372 "DU_slice_3_power9,VSU_power9") | |
373 | |
374 (define_insn_reservation "power9-sqrt" 26 | |
375 (and (eq_attr "type" "ssqrt") | |
376 (eq_attr "cpu" "power9")) | |
377 "DU_slice_3_power9,VSU_power9") | |
378 | |
379 (define_insn_reservation "power9-dsqrt" 36 | |
380 (and (eq_attr "type" "dsqrt") | |
381 (eq_attr "cpu" "power9")) | |
382 "DU_slice_3_power9,VSU_power9") | |
383 | |
384 (define_insn_reservation "power9-vec-2cyc" 2 | |
385 (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx") | |
386 (eq_attr "cpu" "power9")) | |
387 "DU_super_power9,VSU_super_power9") | |
388 | |
389 (define_insn_reservation "power9-veccmp" 3 | |
390 (and (eq_attr "type" "veccmp") | |
391 (eq_attr "cpu" "power9")) | |
392 "DU_super_power9,VSU_super_power9") | |
393 | |
394 (define_insn_reservation "power9-vecsimple" 3 | |
395 (and (eq_attr "type" "vecsimple") | |
396 (eq_attr "cpu" "power9")) | |
397 "DU_super_power9,VSU_super_power9") | |
398 | |
399 (define_insn_reservation "power9-vecnormal" 7 | |
400 (and (eq_attr "type" "vecfloat,vecdouble") | |
401 (eq_attr "size" "!128") | |
402 (eq_attr "cpu" "power9")) | |
403 "DU_super_power9,VSU_super_power9") | |
404 | |
405 ; Quad-precision FP ops, execute in DFU | |
406 (define_insn_reservation "power9-qp" 12 | |
407 (and (eq_attr "type" "vecfloat,vecdouble") | |
408 (eq_attr "size" "128") | |
409 (eq_attr "cpu" "power9")) | |
410 "DU_super_power9,dfu_power9") | |
411 | |
412 (define_insn_reservation "power9-vecperm" 3 | |
413 (and (eq_attr "type" "vecperm") | |
414 (eq_attr "cpu" "power9")) | |
415 "DU_super_power9,VSU_PRM_power9") | |
416 | |
417 (define_insn_reservation "power9-veccomplex" 7 | |
418 (and (eq_attr "type" "veccomplex") | |
419 (eq_attr "cpu" "power9")) | |
420 "DU_super_power9,VSU_super_power9") | |
421 | |
422 (define_insn_reservation "power9-vecfdiv" 28 | |
423 (and (eq_attr "type" "vecfdiv") | |
424 (eq_attr "cpu" "power9")) | |
425 "DU_super_power9,VSU_super_power9") | |
426 | |
427 (define_insn_reservation "power9-vecdiv" 32 | |
428 (and (eq_attr "type" "vecdiv") | |
429 (eq_attr "size" "!128") | |
430 (eq_attr "cpu" "power9")) | |
431 "DU_super_power9,VSU_super_power9") | |
432 | |
433 (define_insn_reservation "power9-qpdiv" 56 | |
434 (and (eq_attr "type" "vecdiv") | |
435 (eq_attr "size" "128") | |
436 (eq_attr "cpu" "power9")) | |
437 "DU_super_power9,dfu_power9") | |
438 | |
439 (define_insn_reservation "power9-mffgpr" 2 | |
440 (and (eq_attr "type" "mffgpr") | |
441 (eq_attr "cpu" "power9")) | |
442 "DU_slice_3_power9,VSU_power9") | |
443 | |
444 (define_insn_reservation "power9-mftgpr" 2 | |
445 (and (eq_attr "type" "mftgpr") | |
446 (eq_attr "cpu" "power9")) | |
447 "DU_slice_3_power9,VSU_power9") | |
448 | |
449 | |
450 ; Branch Unit | |
451 ; Move from LR/CTR are executed in BRU but consume a writeback port from an | |
452 ; execution slice. | |
453 (define_insn_reservation "power9-mfjmpr" 6 | |
454 (and (eq_attr "type" "mfjmpr") | |
455 (eq_attr "cpu" "power9")) | |
456 "DU_branch_power9,bru_power9+VSU_power9") | |
457 | |
458 ; Branch is 2 cycles | |
459 (define_insn_reservation "power9-branch" 2 | |
460 (and (eq_attr "type" "jmpreg,branch") | |
461 (eq_attr "cpu" "power9")) | |
462 "DU_branch_power9,bru_power9") | |
463 | |
464 | |
465 ; Crypto Unit | |
466 (define_insn_reservation "power9-crypto" 6 | |
467 (and (eq_attr "type" "crypto") | |
468 (eq_attr "cpu" "power9")) | |
469 "DU_super_power9,cryptu_power9") | |
470 | |
471 | |
472 ; HTM Unit | |
473 (define_insn_reservation "power9-htm" 4 | |
474 (and (eq_attr "type" "htm") | |
475 (eq_attr "cpu" "power9")) | |
476 "DU_C2_power9,LSU_power9") | |
477 | |
478 (define_insn_reservation "power9-htm-simple" 2 | |
479 (and (eq_attr "type" "htmsimple") | |
480 (eq_attr "cpu" "power9")) | |
481 "DU_any_power9,VSU_power9") | |
482 | |
483 | |
484 ; DFP Unit | |
485 (define_insn_reservation "power9-dfp" 12 | |
486 (and (eq_attr "type" "dfp") | |
487 (eq_attr "cpu" "power9")) | |
488 "DU_even_power9,dfu_power9") | |
489 |