Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/score/mul-div.S @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Copyright (C) 2005, 2007 Free Software Foundation, Inc. | |
2 Contributed by Sunnorth | |
3 | |
4 This file is part of GCC. | |
5 | |
6 GCC is free software; you can redistribute it and/or modify it | |
7 under the terms of the GNU General Public License as published | |
8 by the Free Software Foundation; either version 3, or (at your | |
9 option) any later version. | |
10 | |
11 GCC is distributed in the hope that it will be useful, but WITHOUT | |
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
14 License for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with GCC; see the file COPYING3. If not see | |
18 <http://www.gnu.org/licenses/>. */ | |
19 | |
20 #define ra r3 | |
21 #define a0 r4 | |
22 #define a1 r5 | |
23 #define a2 r6 | |
24 #define a3 r7 | |
25 #define v0 r23 | |
26 | |
27 #define t0 r8 | |
28 #define t1 r9 | |
29 #define t2 r10 | |
30 #define t3 r11 | |
31 #define t4 r22 | |
32 | |
33 #ifndef __pic__ | |
34 #if !defined(L_mulsi3) && !defined(L_divsi3) | |
35 .text | |
36 .global _flush_cache | |
37 #ifdef __score3__ | |
38 _flush_cache: | |
39 br r3 | |
40 #else | |
41 _flush_cache: | |
42 srli r9, r5, 4 | |
43 mv r8, r4 | |
44 mtsr r9, sr0 | |
45 1: | |
46 cache 0xe, [r8, 0] # write back invalid dcache | |
47 addi r8, 16 | |
48 bcnz 1b | |
49 mfcr r8, cr4 | |
50 bittst! r8, 0x3 # if LDM is enable, write back LDM | |
51 beq! 6f | |
52 ldi r10, 0 | |
53 cache 0xc, [r10, 0] | |
54 6: | |
55 bittst! r8, 0x2 # if LIM is enable, refill it | |
56 beq! 7f | |
57 cache 0x4, [r10, 0] | |
58 7: | |
59 #nop! | |
60 #nop! | |
61 #nop! | |
62 #nop! | |
63 #nop! | |
64 mv r8, r4 | |
65 mtsr r9, sr0 | |
66 2: | |
67 cache 0x2, [r8, 0] # invalid unlock icache | |
68 #nop! | |
69 #nop! | |
70 #nop! | |
71 #nop! | |
72 #nop! | |
73 addi r8, 16 | |
74 bcnz 2b | |
75 br r3 | |
76 #endif | |
77 #endif | |
78 | |
79 /* FUNCTION | |
80 (U) INT32 v0 = __mulsi3 ((U) INT32 a0, (U) INT32 a1); | |
81 REGISTERS: | |
82 use t0 | |
83 modify a0 | |
84 a1 -> become 0 | |
85 NOTE: | |
86 this seems to give better performance to just rotate and add. */ | |
87 | |
88 #ifdef L_mulsi3 | |
89 .text | |
90 .global __umulsi3 | |
91 .global __mulsi3 | |
92 /* signed multiplication (32x32) */ | |
93 .ent __mulsi3 | |
94 __umulsi3: | |
95 __mulsi3: | |
96 li t1, 0 | |
97 __mulsi3_loop: | |
98 andri.c t0, a1, 1 # t0 = multiplier[0] | |
99 srli a1, a1, 1 # a1 /= 2 | |
100 beq __mulsi3_loop2 # skip if (t0 == 0) | |
101 add t1, t1, a0 # add multiplicand | |
102 __mulsi3_loop2: | |
103 slli a0, a0, 1 # multiplicand mul 2 | |
104 cmpi.c a1, 0 | |
105 bne __mulsi3_loop | |
106 mv r4, t1 | |
107 br ra | |
108 .end __mulsi3 | |
109 #endif /* L_mulsi3 */ | |
110 | |
111 /* FUNCTION | |
112 UINT32 (v0) = __udivsi3 (UINT32 (a0), UINT32 (a1)); | |
113 INT32 (v0) = __divsi3 (INT32 (a0), INT32 (a1)); | |
114 UINT32 (v0) = __umodsi3 (UINT32 (a0), UINT32 (a1)); | |
115 INT32 (v0) = __modsi3 (INT32 (a0), INT32 (a1)); | |
116 DESCRIPTION | |
117 performs 32-bit division/modulo. | |
118 REGISTERS | |
119 used t0 bit-index | |
120 t1 | |
121 modify a0 becomes remainer */ | |
122 #ifdef L_divsi3 | |
123 .text | |
124 .global __udivsi3 | |
125 .global __umodsi3 | |
126 .global __divsi3 | |
127 .global __modsi3 | |
128 | |
129 /* unsigned division */ | |
130 .ent __udivsi3 | |
131 __udivsi3: | |
132 li t4, 0 | |
133 cmpi.c a1, 0 | |
134 beq __uds_exit | |
135 li t0, 1 | |
136 blt __uds_ok | |
137 __uds_normalize: | |
138 cmp.c a0, a1 | |
139 bcc __uds_ok | |
140 slli a1, a1, 1 | |
141 slli t0, t0, 1 | |
142 cmpi.c a1, 0 | |
143 bge __uds_normalize | |
144 __uds_ok: | |
145 __uds_loop2: | |
146 cmp.c a0, a1 | |
147 bcc __uds_loop3 | |
148 sub a0, a0, a1 | |
149 or t4, t4, t0 | |
150 __uds_loop3: | |
151 srli t0, t0, 1 | |
152 srli a1, a1, 1 | |
153 cmpi.c t0, 0 | |
154 bne __uds_loop2 | |
155 __uds_exit: | |
156 mv a1, a0 | |
157 mv r4, t4 | |
158 br ra | |
159 .end __udivsi3 | |
160 | |
161 /* unsigned modulus */ | |
162 .ent __umodsi3 | |
163 __umodsi3: | |
164 mv t3, ra | |
165 jl __udivsi3 | |
166 mv r4, a1 | |
167 br t3 | |
168 .end __umodsi3 | |
169 | |
170 /* abs and div */ | |
171 .ent __orgsi3 | |
172 __orgsi3: | |
173 cmpi.c a0, 0 | |
174 bge __orgsi3_a0p | |
175 neg a0, a0 | |
176 __orgsi3_a0p: | |
177 cmpi.c a1, 0 | |
178 bge __udivsi3 | |
179 neg a1, a1 | |
180 b __udivsi3 # goto udivsi3 | |
181 .end __orgsi3 | |
182 | |
183 /* signed division */ | |
184 .ent __divsi3 | |
185 __divsi3: | |
186 mv t3, ra | |
187 xor t2, a0, a1 | |
188 jl __orgsi3 | |
189 __divsi3_adjust: | |
190 cmpi.c t2, 0 | |
191 bge __divsi3_exit | |
192 neg r4, r4 | |
193 __divsi3_exit: | |
194 br t3 | |
195 .end __divsi3 | |
196 | |
197 /* signed modulus */ | |
198 .ent __modsi3 | |
199 __modsi3: | |
200 mv t3, ra | |
201 mv t2, a0 | |
202 jl __orgsi3 | |
203 mv r4, a1 | |
204 b __divsi3_adjust | |
205 .end __modsi3 | |
206 | |
207 #endif /* L_divsi3 */ | |
208 #else /* -fPIC */ | |
209 #if !defined(L_mulsi3) && !defined(L_divsi3) | |
210 .set pic | |
211 .text | |
212 .global _flush_cache | |
213 #ifdef __score3__ | |
214 _flush_cache: | |
215 br r3 | |
216 #else | |
217 _flush_cache: | |
218 addi r0, -8 # pic used | |
219 .cpload r29 # pic used | |
220 srli r9, r5, 4 | |
221 mv r8, r4 | |
222 mtsr r9, sr0 | |
223 1: | |
224 cache 0xe, [r8, 0] # write back invalid dcache | |
225 addi r8, 16 | |
226 bcnz 1b | |
227 mfcr r8, cr4 | |
228 bittst! r8, 0x3 # if LDM is enable, write back LDM | |
229 beq! 6f | |
230 ldi r10, 0 | |
231 cache 0xc, [r10, 0] | |
232 6: | |
233 bittst! r8, 0x2 # if LIM is enable, refill it | |
234 beq! 7f | |
235 cache 0x4, [r10, 0] | |
236 7: | |
237 #nop! | |
238 #nop! | |
239 #nop! | |
240 #nop! | |
241 #nop! | |
242 mv r8, r4 | |
243 mtsr r9, sr0 | |
244 2: | |
245 cache 0x2, [r8, 0] # invalid unlock icache | |
246 #nop! | |
247 #nop! | |
248 #nop! | |
249 #nop! | |
250 #nop! | |
251 addi r8, 16 | |
252 bcnz 2b | |
253 .cprestore r0, 12 # pic used | |
254 addi r0, 8 # pic used | |
255 br r3 | |
256 #endif | |
257 #endif | |
258 | |
259 /* FUNCTION | |
260 (U) INT32 v0 = __mulsi3 ((U) INT32 a0, (U) INT32 a1); | |
261 REGISTERS: | |
262 use t0 | |
263 modify a0 | |
264 a1 -> become 0 | |
265 NOTE: | |
266 this seems to give better performance to just rotate and add. */ | |
267 | |
268 #ifdef L_mulsi3 | |
269 .set pic | |
270 .text | |
271 .global __umulsi3 | |
272 .global __mulsi3 | |
273 /* signed multiplication (32x32) */ | |
274 .ent __mulsi3 | |
275 __umulsi3: | |
276 __mulsi3: | |
277 addi r0, -8 # pic used | |
278 .cpload r29 # pic used | |
279 li t1, 0 | |
280 __mulsi3_loop: | |
281 andri.c t0, a1, 1 # t0 = multiplier[0] | |
282 srli a1, a1, 1 # a1 /= 2 | |
283 beq __mulsi3_loop2 # skip if (t0 == 0) | |
284 add t1, t1, a0 # add multiplicand | |
285 __mulsi3_loop2: | |
286 slli a0, a0, 1 # multiplicand mul 2 | |
287 cmpi.c a1, 0 | |
288 bne __mulsi3_loop | |
289 mv r4, t1 | |
290 .cprestore r0, 12 # pic used | |
291 addi r0, 8 # pic used | |
292 br ra | |
293 .end __mulsi3 | |
294 #endif /* L_mulsi3 */ | |
295 | |
296 /* FUNCTION | |
297 UINT32 (v0) = __udivsi3 (UINT32 (a0), UINT32 (a1)); | |
298 INT32 (v0) = __divsi3 (INT32 (a0), INT32 (a1)); | |
299 UINT32 (v0) = __umodsi3 (UINT32 (a0), UINT32 (a1)); | |
300 INT32 (v0) = __modsi3 (INT32 (a0), INT32 (a1)); | |
301 DESCRIPTION | |
302 performs 32-bit division/modulo. | |
303 REGISTERS | |
304 used t0 bit-index | |
305 t1 | |
306 modify a0 becomes remainer */ | |
307 #ifdef L_divsi3 | |
308 .set pic | |
309 .text | |
310 .global __udivsi3 | |
311 .global __umodsi3 | |
312 .global __divsi3 | |
313 .global __modsi3 | |
314 | |
315 /* unsigned division */ | |
316 .ent __udivsi3 | |
317 __udivsi3: | |
318 addi r0, -8 # pic used | |
319 .cpload r29 # pic used | |
320 li t4, 0 | |
321 cmpi.c a1, 0 | |
322 beq __uds_exit | |
323 li t0, 1 | |
324 blt __uds_ok | |
325 __uds_normalize: | |
326 cmp.c a0, a1 | |
327 bcc __uds_ok | |
328 slli a1, a1, 1 | |
329 slli t0, t0, 1 | |
330 cmpi.c a1, 0 | |
331 bge __uds_normalize | |
332 __uds_ok: | |
333 __uds_loop2: | |
334 cmp.c a0, a1 | |
335 bcc __uds_loop3 | |
336 sub a0, a0, a1 | |
337 or t4, t4, t0 | |
338 __uds_loop3: | |
339 srli t0, t0, 1 | |
340 srli a1, a1, 1 | |
341 cmpi.c t0, 0 | |
342 bne __uds_loop2 | |
343 __uds_exit: | |
344 mv a1, a0 | |
345 mv r4, t4 | |
346 .cprestore r0, 12 # pic used | |
347 addi r0, 8 # pic used | |
348 br ra | |
349 .end __udivsi3 | |
350 | |
351 /* unsigned modulus */ | |
352 .ent __umodsi3 | |
353 __umodsi3: | |
354 addi r0, -8 # pic used | |
355 .cpload r29 # pic used | |
356 li t1, 0 | |
357 mv t3, ra | |
358 la r29, __udivsi3 | |
359 brl r29 | |
360 mv r4, a1 | |
361 .cprestore r0, 12 # pic used | |
362 addi r0, 8 # pic used | |
363 br t3 | |
364 .end __umodsi3 | |
365 | |
366 /* abs and div */ | |
367 .ent __orgsi3 | |
368 __orgsi3: | |
369 cmpi.c a0, 0 | |
370 bge __orgsi3_a0p | |
371 neg a0, a0 | |
372 __orgsi3_a0p: | |
373 cmpi.c a1, 0 | |
374 bge __udivsi3 | |
375 neg a1, a1 | |
376 b __udivsi3 # goto udivsi3 | |
377 .end __orgsi3 | |
378 | |
379 /* signed division */ | |
380 .ent __divsi3 | |
381 __divsi3: | |
382 addi r0, -8 # pic used | |
383 .cpload r29 # pic used | |
384 mv t3, ra | |
385 xor t2, a0, a1 | |
386 la r29, __orgsi3 | |
387 brl r29 | |
388 __divsi3_adjust: | |
389 cmpi.c t2, 0 | |
390 bge __divsi3_exit | |
391 neg r4, r4 | |
392 __divsi3_exit: | |
393 .cprestore r0, 12 # pic used | |
394 addi r0, 8 # pic used | |
395 br t3 | |
396 .end __divsi3 | |
397 | |
398 /* signed modulus */ | |
399 .ent __modsi3 | |
400 __modsi3: | |
401 addi r0, -8 # pic used | |
402 .cpload r29 # pic used | |
403 mv t3, ra | |
404 mv t2, a0 | |
405 la r29, __orgsi3 | |
406 brl r29 | |
407 mv r4, a1 | |
408 b __divsi3_adjust | |
409 .end __modsi3 | |
410 | |
411 #endif /*L_divsi3 */ | |
412 #endif |