Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/arm/arm926ejs.md @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 04ced10e8804 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 ;; ARM 926EJ-S Pipeline Description | |
2 ;; Copyright (C) 2003, 2007 Free Software Foundation, Inc. | |
3 ;; Written by CodeSourcery, LLC. | |
4 ;; | |
5 ;; This file is part of GCC. | |
6 ;; | |
7 ;; GCC is free software; you can redistribute it and/or modify it | |
8 ;; under the terms of the GNU General Public License as published by | |
9 ;; the Free Software Foundation; either version 3, or (at your option) | |
10 ;; any later version. | |
11 ;; | |
12 ;; GCC is distributed in the hope that it will be useful, but | |
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 ;; General Public License for more details. | |
16 ;; | |
17 ;; You should have received a copy of the GNU General Public License | |
18 ;; along with GCC; see the file COPYING3. If not see | |
19 ;; <http://www.gnu.org/licenses/>. */ | |
20 | |
21 ;; These descriptions are based on the information contained in the | |
22 ;; ARM926EJ-S Technical Reference Manual, Copyright (c) 2002 ARM | |
23 ;; Limited. | |
24 ;; | |
25 | |
26 ;; This automaton provides a pipeline description for the ARM | |
27 ;; 926EJ-S core. | |
28 ;; | |
29 ;; The model given here assumes that the condition for all conditional | |
30 ;; instructions is "true", i.e., that all of the instructions are | |
31 ;; actually executed. | |
32 | |
33 (define_automaton "arm926ejs") | |
34 | |
35 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
36 ;; Pipelines | |
37 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
38 | |
39 ;; There is a single pipeline | |
40 ;; | |
41 ;; The ALU pipeline has fetch, decode, execute, memory, and | |
42 ;; write stages. We only need to model the execute, memory and write | |
43 ;; stages. | |
44 | |
45 (define_cpu_unit "e,m,w" "arm926ejs") | |
46 | |
47 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
48 ;; ALU Instructions | |
49 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
50 | |
51 ;; ALU instructions require three cycles to execute, and use the ALU | |
52 ;; pipeline in each of the three stages. The results are available | |
53 ;; after the execute stage stage has finished. | |
54 ;; | |
55 ;; If the destination register is the PC, the pipelines are stalled | |
56 ;; for several cycles. That case is not modeled here. | |
57 | |
58 ;; ALU operations with no shifted operand | |
59 (define_insn_reservation "9_alu_op" 1 | |
60 (and (eq_attr "tune" "arm926ejs") | |
61 (eq_attr "type" "alu,alu_shift")) | |
62 "e,m,w") | |
63 | |
64 ;; ALU operations with a shift-by-register operand | |
65 ;; These really stall in the decoder, in order to read | |
66 ;; the shift value in a second cycle. Pretend we take two cycles in | |
67 ;; the execute stage. | |
68 (define_insn_reservation "9_alu_shift_reg_op" 2 | |
69 (and (eq_attr "tune" "arm926ejs") | |
70 (eq_attr "type" "alu_shift_reg")) | |
71 "e*2,m,w") | |
72 | |
73 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
74 ;; Multiplication Instructions | |
75 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
76 | |
77 ;; Multiplication instructions loop in the execute stage until the | |
78 ;; instruction has been passed through the multiplier array enough | |
79 ;; times. Multiply operations occur in both the execute and memory | |
80 ;; stages of the pipeline | |
81 | |
82 (define_insn_reservation "9_mult1" 3 | |
83 (and (eq_attr "tune" "arm926ejs") | |
84 (eq_attr "insn" "smlalxy,mul,mla")) | |
85 "e*2,m,w") | |
86 | |
87 (define_insn_reservation "9_mult2" 4 | |
88 (and (eq_attr "tune" "arm926ejs") | |
89 (eq_attr "insn" "muls,mlas")) | |
90 "e*3,m,w") | |
91 | |
92 (define_insn_reservation "9_mult3" 4 | |
93 (and (eq_attr "tune" "arm926ejs") | |
94 (eq_attr "insn" "umull,umlal,smull,smlal")) | |
95 "e*3,m,w") | |
96 | |
97 (define_insn_reservation "9_mult4" 5 | |
98 (and (eq_attr "tune" "arm926ejs") | |
99 (eq_attr "insn" "umulls,umlals,smulls,smlals")) | |
100 "e*4,m,w") | |
101 | |
102 (define_insn_reservation "9_mult5" 2 | |
103 (and (eq_attr "tune" "arm926ejs") | |
104 (eq_attr "insn" "smulxy,smlaxy,smlawx")) | |
105 "e,m,w") | |
106 | |
107 (define_insn_reservation "9_mult6" 3 | |
108 (and (eq_attr "tune" "arm926ejs") | |
109 (eq_attr "insn" "smlalxy")) | |
110 "e*2,m,w") | |
111 | |
112 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
113 ;; Load/Store Instructions | |
114 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
115 | |
116 ;; The models for load/store instructions do not accurately describe | |
117 ;; the difference between operations with a base register writeback | |
118 ;; (such as "ldm!"). These models assume that all memory references | |
119 ;; hit in dcache. | |
120 | |
121 ;; Loads with a shifted offset take 3 cycles, and are (a) probably the | |
122 ;; most common and (b) the pessimistic assumption will lead to fewer stalls. | |
123 (define_insn_reservation "9_load1_op" 3 | |
124 (and (eq_attr "tune" "arm926ejs") | |
125 (eq_attr "type" "load1,load_byte")) | |
126 "e*2,m,w") | |
127 | |
128 (define_insn_reservation "9_store1_op" 0 | |
129 (and (eq_attr "tune" "arm926ejs") | |
130 (eq_attr "type" "store1")) | |
131 "e,m,w") | |
132 | |
133 ;; multiple word loads and stores | |
134 (define_insn_reservation "9_load2_op" 3 | |
135 (and (eq_attr "tune" "arm926ejs") | |
136 (eq_attr "type" "load2")) | |
137 "e,m*2,w") | |
138 | |
139 (define_insn_reservation "9_load3_op" 4 | |
140 (and (eq_attr "tune" "arm926ejs") | |
141 (eq_attr "type" "load3")) | |
142 "e,m*3,w") | |
143 | |
144 (define_insn_reservation "9_load4_op" 5 | |
145 (and (eq_attr "tune" "arm926ejs") | |
146 (eq_attr "type" "load4")) | |
147 "e,m*4,w") | |
148 | |
149 (define_insn_reservation "9_store2_op" 0 | |
150 (and (eq_attr "tune" "arm926ejs") | |
151 (eq_attr "type" "store2")) | |
152 "e,m*2,w") | |
153 | |
154 (define_insn_reservation "9_store3_op" 0 | |
155 (and (eq_attr "tune" "arm926ejs") | |
156 (eq_attr "type" "store3")) | |
157 "e,m*3,w") | |
158 | |
159 (define_insn_reservation "9_store4_op" 0 | |
160 (and (eq_attr "tune" "arm926ejs") | |
161 (eq_attr "type" "store4")) | |
162 "e,m*4,w") | |
163 | |
164 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
165 ;; Branch and Call Instructions | |
166 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
167 | |
168 ;; Branch instructions are difficult to model accurately. The ARM | |
169 ;; core can predict most branches. If the branch is predicted | |
170 ;; correctly, and predicted early enough, the branch can be completely | |
171 ;; eliminated from the instruction stream. Some branches can | |
172 ;; therefore appear to require zero cycles to execute. We assume that | |
173 ;; all branches are predicted correctly, and that the latency is | |
174 ;; therefore the minimum value. | |
175 | |
176 (define_insn_reservation "9_branch_op" 0 | |
177 (and (eq_attr "tune" "arm926ejs") | |
178 (eq_attr "type" "branch")) | |
179 "nothing") | |
180 | |
181 ;; The latency for a call is not predictable. Therefore, we use 32 as | |
182 ;; roughly equivalent to positive infinity. | |
183 | |
184 (define_insn_reservation "9_call_op" 32 | |
185 (and (eq_attr "tune" "arm926ejs") | |
186 (eq_attr "type" "call")) | |
187 "nothing") |