comparison gcc/config/arm/arm1020e.md @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents a06113de4d67
children 84e7813d76e9
comparison
equal deleted inserted replaced
68:561a7518be6b 111:04ced10e8804
1 ;; ARM 1020E & ARM 1022E Pipeline Description 1 ;; ARM 1020E & ARM 1022E Pipeline Description
2 ;; Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc. 2 ;; Copyright (C) 2005-2017 Free Software Foundation, Inc.
3 ;; Contributed by Richard Earnshaw (richard.earnshaw@arm.com) 3 ;; Contributed by Richard Earnshaw (richard.earnshaw@arm.com)
4 ;; 4 ;;
5 ;; This file is part of GCC. 5 ;; This file is part of GCC.
6 ;; 6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it 7 ;; GCC is free software; you can redistribute it and/or modify it
56 ;; ALU Instructions 56 ;; ALU Instructions
57 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 57 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
58 58
59 ;; ALU instructions require three cycles to execute, and use the ALU 59 ;; ALU instructions require three cycles to execute, and use the ALU
60 ;; pipeline in each of the three stages. The results are available 60 ;; pipeline in each of the three stages. The results are available
61 ;; after the execute stage stage has finished. 61 ;; after the execute stage has finished.
62 ;; 62 ;;
63 ;; If the destination register is the PC, the pipelines are stalled 63 ;; If the destination register is the PC, the pipelines are stalled
64 ;; for several cycles. That case is not modeled here. 64 ;; for several cycles. That case is not modeled here.
65 65
66 ;; ALU operations with no shifted operand 66 ;; ALU operations with no shifted operand
67 (define_insn_reservation "1020alu_op" 1 67 (define_insn_reservation "1020alu_op" 1
68 (and (eq_attr "tune" "arm1020e,arm1022e") 68 (and (eq_attr "tune" "arm1020e,arm1022e")
69 (eq_attr "type" "alu")) 69 (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
70 alu_sreg,alus_sreg,logic_reg,logics_reg,\
71 adc_imm,adcs_imm,adc_reg,adcs_reg,\
72 adr,bfm,rev,\
73 shift_imm,shift_reg,\
74 mov_imm,mov_reg,mvn_imm,mvn_reg,\
75 multiple,no_insn"))
70 "1020a_e,1020a_m,1020a_w") 76 "1020a_e,1020a_m,1020a_w")
71 77
72 ;; ALU operations with a shift-by-constant operand 78 ;; ALU operations with a shift-by-constant operand
73 (define_insn_reservation "1020alu_shift_op" 1 79 (define_insn_reservation "1020alu_shift_op" 1
74 (and (eq_attr "tune" "arm1020e,arm1022e") 80 (and (eq_attr "tune" "arm1020e,arm1022e")
75 (eq_attr "type" "alu_shift")) 81 (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
82 logic_shift_imm,logics_shift_imm,\
83 extend,mov_shift,mvn_shift"))
76 "1020a_e,1020a_m,1020a_w") 84 "1020a_e,1020a_m,1020a_w")
77 85
78 ;; ALU operations with a shift-by-register operand 86 ;; ALU operations with a shift-by-register operand
79 ;; These really stall in the decoder, in order to read 87 ;; These really stall in the decoder, in order to read
80 ;; the shift value in a second cycle. Pretend we take two cycles in 88 ;; the shift value in a second cycle. Pretend we take two cycles in
81 ;; the execute stage. 89 ;; the execute stage.
82 (define_insn_reservation "1020alu_shift_reg_op" 2 90 (define_insn_reservation "1020alu_shift_reg_op" 2
83 (and (eq_attr "tune" "arm1020e,arm1022e") 91 (and (eq_attr "tune" "arm1020e,arm1022e")
84 (eq_attr "type" "alu_shift_reg")) 92 (eq_attr "type" "alu_shift_reg,alus_shift_reg,\
93 logic_shift_reg,logics_shift_reg,\
94 mov_shift_reg,mvn_shift_reg"))
85 "1020a_e*2,1020a_m,1020a_w") 95 "1020a_e*2,1020a_m,1020a_w")
86 96
87 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 97 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
88 ;; Multiplication Instructions 98 ;; Multiplication Instructions
89 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 99 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
94 104
95 ;; The result of the "smul" and "smulw" instructions is not available 105 ;; The result of the "smul" and "smulw" instructions is not available
96 ;; until after the memory stage. 106 ;; until after the memory stage.
97 (define_insn_reservation "1020mult1" 2 107 (define_insn_reservation "1020mult1" 2
98 (and (eq_attr "tune" "arm1020e,arm1022e") 108 (and (eq_attr "tune" "arm1020e,arm1022e")
99 (eq_attr "insn" "smulxy,smulwy")) 109 (eq_attr "type" "smulxy,smulwy"))
100 "1020a_e,1020a_m,1020a_w") 110 "1020a_e,1020a_m,1020a_w")
101 111
102 ;; The "smlaxy" and "smlawx" instructions require two iterations through 112 ;; The "smlaxy" and "smlawx" instructions require two iterations through
103 ;; the execute stage; the result is available immediately following 113 ;; the execute stage; the result is available immediately following
104 ;; the execute stage. 114 ;; the execute stage.
105 (define_insn_reservation "1020mult2" 2 115 (define_insn_reservation "1020mult2" 2
106 (and (eq_attr "tune" "arm1020e,arm1022e") 116 (and (eq_attr "tune" "arm1020e,arm1022e")
107 (eq_attr "insn" "smlaxy,smlalxy,smlawx")) 117 (eq_attr "type" "smlaxy,smlalxy,smlawx"))
108 "1020a_e*2,1020a_m,1020a_w") 118 "1020a_e*2,1020a_m,1020a_w")
109 119
110 ;; The "smlalxy", "mul", and "mla" instructions require two iterations 120 ;; The "smlalxy", "mul", and "mla" instructions require two iterations
111 ;; through the execute stage; the result is not available until after 121 ;; through the execute stage; the result is not available until after
112 ;; the memory stage. 122 ;; the memory stage.
113 (define_insn_reservation "1020mult3" 3 123 (define_insn_reservation "1020mult3" 3
114 (and (eq_attr "tune" "arm1020e,arm1022e") 124 (and (eq_attr "tune" "arm1020e,arm1022e")
115 (eq_attr "insn" "smlalxy,mul,mla")) 125 (eq_attr "type" "smlalxy,mul,mla"))
116 "1020a_e*2,1020a_m,1020a_w") 126 "1020a_e*2,1020a_m,1020a_w")
117 127
118 ;; The "muls" and "mlas" instructions loop in the execute stage for 128 ;; The "muls" and "mlas" instructions loop in the execute stage for
119 ;; four iterations in order to set the flags. The value result is 129 ;; four iterations in order to set the flags. The value result is
120 ;; available after three iterations. 130 ;; available after three iterations.
121 (define_insn_reservation "1020mult4" 3 131 (define_insn_reservation "1020mult4" 3
122 (and (eq_attr "tune" "arm1020e,arm1022e") 132 (and (eq_attr "tune" "arm1020e,arm1022e")
123 (eq_attr "insn" "muls,mlas")) 133 (eq_attr "type" "muls,mlas"))
124 "1020a_e*4,1020a_m,1020a_w") 134 "1020a_e*4,1020a_m,1020a_w")
125 135
126 ;; Long multiply instructions that produce two registers of 136 ;; Long multiply instructions that produce two registers of
127 ;; output (such as umull) make their results available in two cycles; 137 ;; output (such as umull) make their results available in two cycles;
128 ;; the least significant word is available before the most significant 138 ;; the least significant word is available before the most significant
133 ;; The "umull", "umlal", "smull", and "smlal" instructions all take 143 ;; The "umull", "umlal", "smull", and "smlal" instructions all take
134 ;; three iterations through the execute cycle, and make their results 144 ;; three iterations through the execute cycle, and make their results
135 ;; available after the memory cycle. 145 ;; available after the memory cycle.
136 (define_insn_reservation "1020mult5" 4 146 (define_insn_reservation "1020mult5" 4
137 (and (eq_attr "tune" "arm1020e,arm1022e") 147 (and (eq_attr "tune" "arm1020e,arm1022e")
138 (eq_attr "insn" "umull,umlal,smull,smlal")) 148 (eq_attr "type" "umull,umlal,smull,smlal"))
139 "1020a_e*3,1020a_m,1020a_w") 149 "1020a_e*3,1020a_m,1020a_w")
140 150
141 ;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in 151 ;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in
142 ;; the execute stage for five iterations in order to set the flags. 152 ;; the execute stage for five iterations in order to set the flags.
143 ;; The value result is available after four iterations. 153 ;; The value result is available after four iterations.
144 (define_insn_reservation "1020mult6" 4 154 (define_insn_reservation "1020mult6" 4
145 (and (eq_attr "tune" "arm1020e,arm1022e") 155 (and (eq_attr "tune" "arm1020e,arm1022e")
146 (eq_attr "insn" "umulls,umlals,smulls,smlals")) 156 (eq_attr "type" "umulls,umlals,smulls,smlals"))
147 "1020a_e*5,1020a_m,1020a_w") 157 "1020a_e*5,1020a_m,1020a_w")
148 158
149 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 159 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
150 ;; Load/Store Instructions 160 ;; Load/Store Instructions
151 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 161 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
164 ;; with LSL of zero. The remainder take 1 cycle to execute. 174 ;; with LSL of zero. The remainder take 1 cycle to execute.
165 ;; For 4byte loads there is a bypass from the load stage 175 ;; For 4byte loads there is a bypass from the load stage
166 176
167 (define_insn_reservation "1020load1_op" 2 177 (define_insn_reservation "1020load1_op" 2
168 (and (eq_attr "tune" "arm1020e,arm1022e") 178 (and (eq_attr "tune" "arm1020e,arm1022e")
169 (eq_attr "type" "load_byte,load1")) 179 (eq_attr "type" "load_byte,load_4"))
170 "1020a_e+1020l_e,1020l_m,1020l_w") 180 "1020a_e+1020l_e,1020l_m,1020l_w")
171 181
172 (define_insn_reservation "1020store1_op" 0 182 (define_insn_reservation "1020store1_op" 0
173 (and (eq_attr "tune" "arm1020e,arm1022e") 183 (and (eq_attr "tune" "arm1020e,arm1022e")
174 (eq_attr "type" "store1")) 184 (eq_attr "type" "store_4"))
175 "1020a_e+1020l_e,1020l_m,1020l_w") 185 "1020a_e+1020l_e,1020l_m,1020l_w")
176 186
177 ;; A load's result can be stored by an immediately following store 187 ;; A load's result can be stored by an immediately following store
178 (define_bypass 1 "1020load1_op" "1020store1_op" "arm_no_early_store_addr_dep") 188 (define_bypass 1 "1020load1_op" "1020store1_op" "arm_no_early_store_addr_dep")
179 189
199 ;; As with ALU operations, if one of the destination registers is the 209 ;; As with ALU operations, if one of the destination registers is the
200 ;; PC, there are additional stalls; that is not modeled. 210 ;; PC, there are additional stalls; that is not modeled.
201 211
202 (define_insn_reservation "1020load2_op" 2 212 (define_insn_reservation "1020load2_op" 2
203 (and (eq_attr "tune" "arm1020e,arm1022e") 213 (and (eq_attr "tune" "arm1020e,arm1022e")
204 (eq_attr "type" "load2")) 214 (eq_attr "type" "load_8"))
205 "1020a_e+1020l_e,1020l_m,1020l_w") 215 "1020a_e+1020l_e,1020l_m,1020l_w")
206 216
207 (define_insn_reservation "1020store2_op" 0 217 (define_insn_reservation "1020store2_op" 0
208 (and (eq_attr "tune" "arm1020e,arm1022e") 218 (and (eq_attr "tune" "arm1020e,arm1022e")
209 (eq_attr "type" "store2")) 219 (eq_attr "type" "store_8"))
210 "1020a_e+1020l_e,1020l_m,1020l_w") 220 "1020a_e+1020l_e,1020l_m,1020l_w")
211 221
212 (define_insn_reservation "1020load34_op" 3 222 (define_insn_reservation "1020load34_op" 3
213 (and (eq_attr "tune" "arm1020e,arm1022e") 223 (and (eq_attr "tune" "arm1020e,arm1022e")
214 (eq_attr "type" "load3,load4")) 224 (eq_attr "type" "load_12,load_16"))
215 "1020a_e+1020l_e,1020l_e+1020l_m,1020l_m,1020l_w") 225 "1020a_e+1020l_e,1020l_e+1020l_m,1020l_m,1020l_w")
216 226
217 (define_insn_reservation "1020store34_op" 0 227 (define_insn_reservation "1020store34_op" 0
218 (and (eq_attr "tune" "arm1020e,arm1022e") 228 (and (eq_attr "tune" "arm1020e,arm1022e")
219 (eq_attr "type" "store3,store4")) 229 (eq_attr "type" "store_12,store_16"))
220 "1020a_e+1020l_e,1020l_e+1020l_m,1020l_m,1020l_w") 230 "1020a_e+1020l_e,1020l_e+1020l_m,1020l_m,1020l_w")
221 231
222 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 232 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
223 ;; Branch and Call Instructions 233 ;; Branch and Call Instructions
224 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 234 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
234 (define_insn_reservation "1020branch_op" 0 244 (define_insn_reservation "1020branch_op" 0
235 (and (eq_attr "tune" "arm1020e,arm1022e") 245 (and (eq_attr "tune" "arm1020e,arm1022e")
236 (eq_attr "type" "branch")) 246 (eq_attr "type" "branch"))
237 "1020a_e") 247 "1020a_e")
238 248
239 ;; The latency for a call is not predictable. Therefore, we use 32 as 249 ;; The latency for a call is not predictable. Therefore, we model as blocking
240 ;; roughly equivalent to positive infinity. 250 ;; execution for a number of cycles but we can't do anything more accurate
251 ;; than that.
241 252
242 (define_insn_reservation "1020call_op" 32 253 (define_insn_reservation "1020call_op" 32
243 (and (eq_attr "tune" "arm1020e,arm1022e") 254 (and (eq_attr "tune" "arm1020e,arm1022e")
244 (eq_attr "type" "call")) 255 (eq_attr "type" "call"))
245 "1020a_e*32") 256 "1020a_e*4")
246 257
247 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 258 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
248 ;; VFP 259 ;; VFP
249 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 260 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
250 261
267 278
268 ;; Note, no instruction can issue to the VFP if the core is stalled in the 279 ;; Note, no instruction can issue to the VFP if the core is stalled in the
269 ;; first execute state. We model this by using 1020a_e in the first cycle. 280 ;; first execute state. We model this by using 1020a_e in the first cycle.
270 (define_insn_reservation "v10_ffarith" 5 281 (define_insn_reservation "v10_ffarith" 5
271 (and (eq_attr "vfp10" "yes") 282 (and (eq_attr "vfp10" "yes")
272 (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd")) 283 (eq_attr "type" "fmov,ffariths,ffarithd,fcmps,fcmpd"))
273 "1020a_e+v10_fmac") 284 "1020a_e+v10_fmac")
274 285
275 (define_insn_reservation "v10_farith" 5 286 (define_insn_reservation "v10_farith" 5
276 (and (eq_attr "vfp10" "yes") 287 (and (eq_attr "vfp10" "yes")
277 (eq_attr "type" "faddd,fadds")) 288 (eq_attr "type" "faddd,fadds"))
278 "1020a_e+v10_fmac") 289 "1020a_e+v10_fmac")
279 290
280 (define_insn_reservation "v10_cvt" 5 291 (define_insn_reservation "v10_cvt" 5
281 (and (eq_attr "vfp10" "yes") 292 (and (eq_attr "vfp10" "yes")
282 (eq_attr "type" "f_cvt")) 293 (eq_attr "type" "f_cvt,f_cvti2f,f_cvtf2i"))
283 "1020a_e+v10_fmac") 294 "1020a_e+v10_fmac")
284 295
285 (define_insn_reservation "v10_fmul" 6 296 (define_insn_reservation "v10_fmul" 6
286 (and (eq_attr "vfp10" "yes") 297 (and (eq_attr "vfp10" "yes")
287 (eq_attr "type" "fmuls,fmacs,fmuld,fmacd")) 298 (eq_attr "type" "fmuls,fmacs,ffmas,fmuld,fmacd,ffmad"))
288 "1020a_e+v10_fmac*2") 299 "1020a_e+v10_fmac*2")
289 300
290 (define_insn_reservation "v10_fdivs" 18 301 (define_insn_reservation "v10_fdivs" 18
291 (and (eq_attr "vfp10" "yes") 302 (and (eq_attr "vfp10" "yes")
292 (eq_attr "type" "fdivs")) 303 (eq_attr "type" "fdivs, fsqrts"))
293 "1020a_e+v10_ds*14") 304 "1020a_e+v10_ds*4")
294 305
295 (define_insn_reservation "v10_fdivd" 32 306 (define_insn_reservation "v10_fdivd" 32
296 (and (eq_attr "vfp10" "yes") 307 (and (eq_attr "vfp10" "yes")
297 (eq_attr "type" "fdivd")) 308 (eq_attr "type" "fdivd, fsqrtd"))
298 "1020a_e+v10_fmac+v10_ds*28") 309 "1020a_e+v10_fmac+v10_ds*4")
299 310
300 (define_insn_reservation "v10_floads" 4 311 (define_insn_reservation "v10_floads" 4
301 (and (eq_attr "vfp10" "yes") 312 (and (eq_attr "vfp10" "yes")
302 (eq_attr "type" "f_loads")) 313 (eq_attr "type" "f_loads"))
303 "1020a_e+1020l_e+v10_ls1,v10_ls2") 314 "1020a_e+1020l_e+v10_ls1,v10_ls2")
313 324
314 ;; Moves to/from arm regs also use the load/store pipeline. 325 ;; Moves to/from arm regs also use the load/store pipeline.
315 326
316 (define_insn_reservation "v10_c2v" 4 327 (define_insn_reservation "v10_c2v" 4
317 (and (eq_attr "vfp10" "yes") 328 (and (eq_attr "vfp10" "yes")
318 (eq_attr "type" "r_2_f")) 329 (eq_attr "type" "f_mcr,f_mcrr"))
319 "1020a_e+1020l_e+v10_ls1,v10_ls2") 330 "1020a_e+1020l_e+v10_ls1,v10_ls2")
320 331
321 (define_insn_reservation "v10_fstores" 1 332 (define_insn_reservation "v10_fstores" 1
322 (and (eq_attr "vfp10" "yes") 333 (and (eq_attr "vfp10" "yes")
323 (eq_attr "type" "f_stores")) 334 (eq_attr "type" "f_stores"))
328 (eq_attr "type" "f_stored")) 339 (eq_attr "type" "f_stored"))
329 "1020a_e+1020l_e+v10_ls1+v10_ls2+v10_ls3,v10_ls2+v10_ls3,v10_ls3") 340 "1020a_e+1020l_e+v10_ls1+v10_ls2+v10_ls3,v10_ls2+v10_ls3,v10_ls3")
330 341
331 (define_insn_reservation "v10_v2c" 1 342 (define_insn_reservation "v10_v2c" 1
332 (and (eq_attr "vfp10" "yes") 343 (and (eq_attr "vfp10" "yes")
333 (eq_attr "type" "f_2_r")) 344 (eq_attr "type" "f_mrc,f_mrrc"))
334 "1020a_e+1020l_e,1020l_m,1020l_w") 345 "1020a_e+1020l_e,1020l_m,1020l_w")
335 346
336 (define_insn_reservation "v10_to_cpsr" 2 347 (define_insn_reservation "v10_to_cpsr" 2
337 (and (eq_attr "vfp10" "yes") 348 (and (eq_attr "vfp10" "yes")
338 (eq_attr "type" "f_flag")) 349 (eq_attr "type" "f_flag"))