0
|
1 ;; ARM 926EJ-S Pipeline Description
|
|
2 ;; Copyright (C) 2003, 2007 Free Software Foundation, Inc.
|
|
3 ;; Written by CodeSourcery, LLC.
|
|
4 ;;
|
|
5 ;; This file is part of GCC.
|
|
6 ;;
|
|
7 ;; GCC is free software; you can redistribute it and/or modify it
|
|
8 ;; under the terms of the GNU General Public License as published by
|
|
9 ;; the Free Software Foundation; either version 3, or (at your option)
|
|
10 ;; any later version.
|
|
11 ;;
|
|
12 ;; GCC is distributed in the hope that it will be useful, but
|
|
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
15 ;; General Public License for more details.
|
|
16 ;;
|
|
17 ;; You should have received a copy of the GNU General Public License
|
|
18 ;; along with GCC; see the file COPYING3. If not see
|
|
19 ;; <http://www.gnu.org/licenses/>. */
|
|
20
|
|
21 ;; These descriptions are based on the information contained in the
|
|
22 ;; ARM926EJ-S Technical Reference Manual, Copyright (c) 2002 ARM
|
|
23 ;; Limited.
|
|
24 ;;
|
|
25
|
|
26 ;; This automaton provides a pipeline description for the ARM
|
|
27 ;; 926EJ-S core.
|
|
28 ;;
|
|
29 ;; The model given here assumes that the condition for all conditional
|
|
30 ;; instructions is "true", i.e., that all of the instructions are
|
|
31 ;; actually executed.
|
|
32
|
|
33 (define_automaton "arm926ejs")
|
|
34
|
|
35 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
36 ;; Pipelines
|
|
37 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
38
|
|
39 ;; There is a single pipeline
|
|
40 ;;
|
|
41 ;; The ALU pipeline has fetch, decode, execute, memory, and
|
|
42 ;; write stages. We only need to model the execute, memory and write
|
|
43 ;; stages.
|
|
44
|
|
45 (define_cpu_unit "e,m,w" "arm926ejs")
|
|
46
|
|
47 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
48 ;; ALU Instructions
|
|
49 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
50
|
|
51 ;; ALU instructions require three cycles to execute, and use the ALU
|
|
52 ;; pipeline in each of the three stages. The results are available
|
|
53 ;; after the execute stage stage has finished.
|
|
54 ;;
|
|
55 ;; If the destination register is the PC, the pipelines are stalled
|
|
56 ;; for several cycles. That case is not modeled here.
|
|
57
|
|
58 ;; ALU operations with no shifted operand
|
|
59 (define_insn_reservation "9_alu_op" 1
|
|
60 (and (eq_attr "tune" "arm926ejs")
|
|
61 (eq_attr "type" "alu,alu_shift"))
|
|
62 "e,m,w")
|
|
63
|
|
64 ;; ALU operations with a shift-by-register operand
|
|
65 ;; These really stall in the decoder, in order to read
|
|
66 ;; the shift value in a second cycle. Pretend we take two cycles in
|
|
67 ;; the execute stage.
|
|
68 (define_insn_reservation "9_alu_shift_reg_op" 2
|
|
69 (and (eq_attr "tune" "arm926ejs")
|
|
70 (eq_attr "type" "alu_shift_reg"))
|
|
71 "e*2,m,w")
|
|
72
|
|
73 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
74 ;; Multiplication Instructions
|
|
75 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
76
|
|
77 ;; Multiplication instructions loop in the execute stage until the
|
|
78 ;; instruction has been passed through the multiplier array enough
|
|
79 ;; times. Multiply operations occur in both the execute and memory
|
|
80 ;; stages of the pipeline
|
|
81
|
|
82 (define_insn_reservation "9_mult1" 3
|
|
83 (and (eq_attr "tune" "arm926ejs")
|
|
84 (eq_attr "insn" "smlalxy,mul,mla"))
|
|
85 "e*2,m,w")
|
|
86
|
|
87 (define_insn_reservation "9_mult2" 4
|
|
88 (and (eq_attr "tune" "arm926ejs")
|
|
89 (eq_attr "insn" "muls,mlas"))
|
|
90 "e*3,m,w")
|
|
91
|
|
92 (define_insn_reservation "9_mult3" 4
|
|
93 (and (eq_attr "tune" "arm926ejs")
|
|
94 (eq_attr "insn" "umull,umlal,smull,smlal"))
|
|
95 "e*3,m,w")
|
|
96
|
|
97 (define_insn_reservation "9_mult4" 5
|
|
98 (and (eq_attr "tune" "arm926ejs")
|
|
99 (eq_attr "insn" "umulls,umlals,smulls,smlals"))
|
|
100 "e*4,m,w")
|
|
101
|
|
102 (define_insn_reservation "9_mult5" 2
|
|
103 (and (eq_attr "tune" "arm926ejs")
|
|
104 (eq_attr "insn" "smulxy,smlaxy,smlawx"))
|
|
105 "e,m,w")
|
|
106
|
|
107 (define_insn_reservation "9_mult6" 3
|
|
108 (and (eq_attr "tune" "arm926ejs")
|
|
109 (eq_attr "insn" "smlalxy"))
|
|
110 "e*2,m,w")
|
|
111
|
|
112 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
113 ;; Load/Store Instructions
|
|
114 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
115
|
|
116 ;; The models for load/store instructions do not accurately describe
|
|
117 ;; the difference between operations with a base register writeback
|
|
118 ;; (such as "ldm!"). These models assume that all memory references
|
|
119 ;; hit in dcache.
|
|
120
|
|
121 ;; Loads with a shifted offset take 3 cycles, and are (a) probably the
|
|
122 ;; most common and (b) the pessimistic assumption will lead to fewer stalls.
|
|
123 (define_insn_reservation "9_load1_op" 3
|
|
124 (and (eq_attr "tune" "arm926ejs")
|
|
125 (eq_attr "type" "load1,load_byte"))
|
|
126 "e*2,m,w")
|
|
127
|
|
128 (define_insn_reservation "9_store1_op" 0
|
|
129 (and (eq_attr "tune" "arm926ejs")
|
|
130 (eq_attr "type" "store1"))
|
|
131 "e,m,w")
|
|
132
|
|
133 ;; multiple word loads and stores
|
|
134 (define_insn_reservation "9_load2_op" 3
|
|
135 (and (eq_attr "tune" "arm926ejs")
|
|
136 (eq_attr "type" "load2"))
|
|
137 "e,m*2,w")
|
|
138
|
|
139 (define_insn_reservation "9_load3_op" 4
|
|
140 (and (eq_attr "tune" "arm926ejs")
|
|
141 (eq_attr "type" "load3"))
|
|
142 "e,m*3,w")
|
|
143
|
|
144 (define_insn_reservation "9_load4_op" 5
|
|
145 (and (eq_attr "tune" "arm926ejs")
|
|
146 (eq_attr "type" "load4"))
|
|
147 "e,m*4,w")
|
|
148
|
|
149 (define_insn_reservation "9_store2_op" 0
|
|
150 (and (eq_attr "tune" "arm926ejs")
|
|
151 (eq_attr "type" "store2"))
|
|
152 "e,m*2,w")
|
|
153
|
|
154 (define_insn_reservation "9_store3_op" 0
|
|
155 (and (eq_attr "tune" "arm926ejs")
|
|
156 (eq_attr "type" "store3"))
|
|
157 "e,m*3,w")
|
|
158
|
|
159 (define_insn_reservation "9_store4_op" 0
|
|
160 (and (eq_attr "tune" "arm926ejs")
|
|
161 (eq_attr "type" "store4"))
|
|
162 "e,m*4,w")
|
|
163
|
|
164 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
165 ;; Branch and Call Instructions
|
|
166 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
167
|
|
168 ;; Branch instructions are difficult to model accurately. The ARM
|
|
169 ;; core can predict most branches. If the branch is predicted
|
|
170 ;; correctly, and predicted early enough, the branch can be completely
|
|
171 ;; eliminated from the instruction stream. Some branches can
|
|
172 ;; therefore appear to require zero cycles to execute. We assume that
|
|
173 ;; all branches are predicted correctly, and that the latency is
|
|
174 ;; therefore the minimum value.
|
|
175
|
|
176 (define_insn_reservation "9_branch_op" 0
|
|
177 (and (eq_attr "tune" "arm926ejs")
|
|
178 (eq_attr "type" "branch"))
|
|
179 "nothing")
|
|
180
|
|
181 ;; The latency for a call is not predictable. Therefore, we use 32 as
|
|
182 ;; roughly equivalent to positive infinity.
|
|
183
|
|
184 (define_insn_reservation "9_call_op" 32
|
|
185 (and (eq_attr "tune" "arm926ejs")
|
|
186 (eq_attr "type" "call"))
|
|
187 "nothing")
|