Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/sh/lib1funcs.asm @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 77e2b8dfacca |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, | |
2 2004, 2005, 2006, 2009 | |
3 Free Software Foundation, Inc. | |
4 | |
5 This file is free software; you can redistribute it and/or modify it | |
6 under the terms of the GNU General Public License as published by the | |
7 Free Software Foundation; either version 3, or (at your option) any | |
8 later version. | |
9 | |
10 This file is distributed in the hope that it will be useful, but | |
11 WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 General Public License for more details. | |
14 | |
15 Under Section 7 of GPL version 3, you are granted additional | |
16 permissions described in the GCC Runtime Library Exception, version | |
17 3.1, as published by the Free Software Foundation. | |
18 | |
19 You should have received a copy of the GNU General Public License and | |
20 a copy of the GCC Runtime Library Exception along with this program; | |
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 <http://www.gnu.org/licenses/>. */ | |
23 | |
24 | |
25 !! libgcc routines for the Renesas / SuperH SH CPUs. | |
26 !! Contributed by Steve Chamberlain. | |
27 !! sac@cygnus.com | |
28 | |
29 !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines | |
30 !! recoded in assembly by Toshiyasu Morita | |
31 !! tm@netcom.com | |
32 | |
33 /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and | |
34 ELF local label prefixes by J"orn Rennecke | |
35 amylaar@cygnus.com */ | |
36 | |
37 #include "lib1funcs.h" | |
38 | |
39 /* t-vxworks needs to build both PIC and non-PIC versions of libgcc, | |
40 so it is more convenient to define NO_FPSCR_VALUES here than to | |
41 define it on the command line. */ | |
42 #if defined __vxworks && defined __PIC__ | |
43 #define NO_FPSCR_VALUES | |
44 #endif | |
45 | |
46 #if ! __SH5__ | |
47 #ifdef L_ashiftrt | |
48 .global GLOBAL(ashiftrt_r4_0) | |
49 .global GLOBAL(ashiftrt_r4_1) | |
50 .global GLOBAL(ashiftrt_r4_2) | |
51 .global GLOBAL(ashiftrt_r4_3) | |
52 .global GLOBAL(ashiftrt_r4_4) | |
53 .global GLOBAL(ashiftrt_r4_5) | |
54 .global GLOBAL(ashiftrt_r4_6) | |
55 .global GLOBAL(ashiftrt_r4_7) | |
56 .global GLOBAL(ashiftrt_r4_8) | |
57 .global GLOBAL(ashiftrt_r4_9) | |
58 .global GLOBAL(ashiftrt_r4_10) | |
59 .global GLOBAL(ashiftrt_r4_11) | |
60 .global GLOBAL(ashiftrt_r4_12) | |
61 .global GLOBAL(ashiftrt_r4_13) | |
62 .global GLOBAL(ashiftrt_r4_14) | |
63 .global GLOBAL(ashiftrt_r4_15) | |
64 .global GLOBAL(ashiftrt_r4_16) | |
65 .global GLOBAL(ashiftrt_r4_17) | |
66 .global GLOBAL(ashiftrt_r4_18) | |
67 .global GLOBAL(ashiftrt_r4_19) | |
68 .global GLOBAL(ashiftrt_r4_20) | |
69 .global GLOBAL(ashiftrt_r4_21) | |
70 .global GLOBAL(ashiftrt_r4_22) | |
71 .global GLOBAL(ashiftrt_r4_23) | |
72 .global GLOBAL(ashiftrt_r4_24) | |
73 .global GLOBAL(ashiftrt_r4_25) | |
74 .global GLOBAL(ashiftrt_r4_26) | |
75 .global GLOBAL(ashiftrt_r4_27) | |
76 .global GLOBAL(ashiftrt_r4_28) | |
77 .global GLOBAL(ashiftrt_r4_29) | |
78 .global GLOBAL(ashiftrt_r4_30) | |
79 .global GLOBAL(ashiftrt_r4_31) | |
80 .global GLOBAL(ashiftrt_r4_32) | |
81 | |
82 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0)) | |
83 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1)) | |
84 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2)) | |
85 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3)) | |
86 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4)) | |
87 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5)) | |
88 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6)) | |
89 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7)) | |
90 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8)) | |
91 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9)) | |
92 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10)) | |
93 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11)) | |
94 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12)) | |
95 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13)) | |
96 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14)) | |
97 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15)) | |
98 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16)) | |
99 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17)) | |
100 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18)) | |
101 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19)) | |
102 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20)) | |
103 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21)) | |
104 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22)) | |
105 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23)) | |
106 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24)) | |
107 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25)) | |
108 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26)) | |
109 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27)) | |
110 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28)) | |
111 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29)) | |
112 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30)) | |
113 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31)) | |
114 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32)) | |
115 | |
116 .align 1 | |
117 GLOBAL(ashiftrt_r4_32): | |
118 GLOBAL(ashiftrt_r4_31): | |
119 rotcl r4 | |
120 rts | |
121 subc r4,r4 | |
122 | |
123 GLOBAL(ashiftrt_r4_30): | |
124 shar r4 | |
125 GLOBAL(ashiftrt_r4_29): | |
126 shar r4 | |
127 GLOBAL(ashiftrt_r4_28): | |
128 shar r4 | |
129 GLOBAL(ashiftrt_r4_27): | |
130 shar r4 | |
131 GLOBAL(ashiftrt_r4_26): | |
132 shar r4 | |
133 GLOBAL(ashiftrt_r4_25): | |
134 shar r4 | |
135 GLOBAL(ashiftrt_r4_24): | |
136 shlr16 r4 | |
137 shlr8 r4 | |
138 rts | |
139 exts.b r4,r4 | |
140 | |
141 GLOBAL(ashiftrt_r4_23): | |
142 shar r4 | |
143 GLOBAL(ashiftrt_r4_22): | |
144 shar r4 | |
145 GLOBAL(ashiftrt_r4_21): | |
146 shar r4 | |
147 GLOBAL(ashiftrt_r4_20): | |
148 shar r4 | |
149 GLOBAL(ashiftrt_r4_19): | |
150 shar r4 | |
151 GLOBAL(ashiftrt_r4_18): | |
152 shar r4 | |
153 GLOBAL(ashiftrt_r4_17): | |
154 shar r4 | |
155 GLOBAL(ashiftrt_r4_16): | |
156 shlr16 r4 | |
157 rts | |
158 exts.w r4,r4 | |
159 | |
160 GLOBAL(ashiftrt_r4_15): | |
161 shar r4 | |
162 GLOBAL(ashiftrt_r4_14): | |
163 shar r4 | |
164 GLOBAL(ashiftrt_r4_13): | |
165 shar r4 | |
166 GLOBAL(ashiftrt_r4_12): | |
167 shar r4 | |
168 GLOBAL(ashiftrt_r4_11): | |
169 shar r4 | |
170 GLOBAL(ashiftrt_r4_10): | |
171 shar r4 | |
172 GLOBAL(ashiftrt_r4_9): | |
173 shar r4 | |
174 GLOBAL(ashiftrt_r4_8): | |
175 shar r4 | |
176 GLOBAL(ashiftrt_r4_7): | |
177 shar r4 | |
178 GLOBAL(ashiftrt_r4_6): | |
179 shar r4 | |
180 GLOBAL(ashiftrt_r4_5): | |
181 shar r4 | |
182 GLOBAL(ashiftrt_r4_4): | |
183 shar r4 | |
184 GLOBAL(ashiftrt_r4_3): | |
185 shar r4 | |
186 GLOBAL(ashiftrt_r4_2): | |
187 shar r4 | |
188 GLOBAL(ashiftrt_r4_1): | |
189 rts | |
190 shar r4 | |
191 | |
192 GLOBAL(ashiftrt_r4_0): | |
193 rts | |
194 nop | |
195 | |
196 ENDFUNC(GLOBAL(ashiftrt_r4_0)) | |
197 ENDFUNC(GLOBAL(ashiftrt_r4_1)) | |
198 ENDFUNC(GLOBAL(ashiftrt_r4_2)) | |
199 ENDFUNC(GLOBAL(ashiftrt_r4_3)) | |
200 ENDFUNC(GLOBAL(ashiftrt_r4_4)) | |
201 ENDFUNC(GLOBAL(ashiftrt_r4_5)) | |
202 ENDFUNC(GLOBAL(ashiftrt_r4_6)) | |
203 ENDFUNC(GLOBAL(ashiftrt_r4_7)) | |
204 ENDFUNC(GLOBAL(ashiftrt_r4_8)) | |
205 ENDFUNC(GLOBAL(ashiftrt_r4_9)) | |
206 ENDFUNC(GLOBAL(ashiftrt_r4_10)) | |
207 ENDFUNC(GLOBAL(ashiftrt_r4_11)) | |
208 ENDFUNC(GLOBAL(ashiftrt_r4_12)) | |
209 ENDFUNC(GLOBAL(ashiftrt_r4_13)) | |
210 ENDFUNC(GLOBAL(ashiftrt_r4_14)) | |
211 ENDFUNC(GLOBAL(ashiftrt_r4_15)) | |
212 ENDFUNC(GLOBAL(ashiftrt_r4_16)) | |
213 ENDFUNC(GLOBAL(ashiftrt_r4_17)) | |
214 ENDFUNC(GLOBAL(ashiftrt_r4_18)) | |
215 ENDFUNC(GLOBAL(ashiftrt_r4_19)) | |
216 ENDFUNC(GLOBAL(ashiftrt_r4_20)) | |
217 ENDFUNC(GLOBAL(ashiftrt_r4_21)) | |
218 ENDFUNC(GLOBAL(ashiftrt_r4_22)) | |
219 ENDFUNC(GLOBAL(ashiftrt_r4_23)) | |
220 ENDFUNC(GLOBAL(ashiftrt_r4_24)) | |
221 ENDFUNC(GLOBAL(ashiftrt_r4_25)) | |
222 ENDFUNC(GLOBAL(ashiftrt_r4_26)) | |
223 ENDFUNC(GLOBAL(ashiftrt_r4_27)) | |
224 ENDFUNC(GLOBAL(ashiftrt_r4_28)) | |
225 ENDFUNC(GLOBAL(ashiftrt_r4_29)) | |
226 ENDFUNC(GLOBAL(ashiftrt_r4_30)) | |
227 ENDFUNC(GLOBAL(ashiftrt_r4_31)) | |
228 ENDFUNC(GLOBAL(ashiftrt_r4_32)) | |
229 #endif | |
230 | |
231 #ifdef L_ashiftrt_n | |
232 | |
233 ! | |
234 ! GLOBAL(ashrsi3) | |
235 ! | |
236 ! Entry: | |
237 ! | |
238 ! r4: Value to shift | |
239 ! r5: Shifts | |
240 ! | |
241 ! Exit: | |
242 ! | |
243 ! r0: Result | |
244 ! | |
245 ! Destroys: | |
246 ! | |
247 ! (none) | |
248 ! | |
249 | |
250 .global GLOBAL(ashrsi3) | |
251 HIDDEN_FUNC(GLOBAL(ashrsi3)) | |
252 .align 2 | |
253 GLOBAL(ashrsi3): | |
254 mov #31,r0 | |
255 and r0,r5 | |
256 mova LOCAL(ashrsi3_table),r0 | |
257 mov.b @(r0,r5),r5 | |
258 #ifdef __sh1__ | |
259 add r5,r0 | |
260 jmp @r0 | |
261 #else | |
262 braf r5 | |
263 #endif | |
264 mov r4,r0 | |
265 | |
266 .align 2 | |
267 LOCAL(ashrsi3_table): | |
268 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table) | |
269 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table) | |
270 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table) | |
271 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table) | |
272 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table) | |
273 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table) | |
274 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table) | |
275 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table) | |
276 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table) | |
277 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table) | |
278 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table) | |
279 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table) | |
280 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table) | |
281 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table) | |
282 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table) | |
283 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table) | |
284 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table) | |
285 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table) | |
286 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table) | |
287 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table) | |
288 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table) | |
289 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table) | |
290 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table) | |
291 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table) | |
292 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table) | |
293 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table) | |
294 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table) | |
295 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table) | |
296 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table) | |
297 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table) | |
298 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table) | |
299 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table) | |
300 | |
301 LOCAL(ashrsi3_31): | |
302 rotcl r0 | |
303 rts | |
304 subc r0,r0 | |
305 | |
306 LOCAL(ashrsi3_30): | |
307 shar r0 | |
308 LOCAL(ashrsi3_29): | |
309 shar r0 | |
310 LOCAL(ashrsi3_28): | |
311 shar r0 | |
312 LOCAL(ashrsi3_27): | |
313 shar r0 | |
314 LOCAL(ashrsi3_26): | |
315 shar r0 | |
316 LOCAL(ashrsi3_25): | |
317 shar r0 | |
318 LOCAL(ashrsi3_24): | |
319 shlr16 r0 | |
320 shlr8 r0 | |
321 rts | |
322 exts.b r0,r0 | |
323 | |
324 LOCAL(ashrsi3_23): | |
325 shar r0 | |
326 LOCAL(ashrsi3_22): | |
327 shar r0 | |
328 LOCAL(ashrsi3_21): | |
329 shar r0 | |
330 LOCAL(ashrsi3_20): | |
331 shar r0 | |
332 LOCAL(ashrsi3_19): | |
333 shar r0 | |
334 LOCAL(ashrsi3_18): | |
335 shar r0 | |
336 LOCAL(ashrsi3_17): | |
337 shar r0 | |
338 LOCAL(ashrsi3_16): | |
339 shlr16 r0 | |
340 rts | |
341 exts.w r0,r0 | |
342 | |
343 LOCAL(ashrsi3_15): | |
344 shar r0 | |
345 LOCAL(ashrsi3_14): | |
346 shar r0 | |
347 LOCAL(ashrsi3_13): | |
348 shar r0 | |
349 LOCAL(ashrsi3_12): | |
350 shar r0 | |
351 LOCAL(ashrsi3_11): | |
352 shar r0 | |
353 LOCAL(ashrsi3_10): | |
354 shar r0 | |
355 LOCAL(ashrsi3_9): | |
356 shar r0 | |
357 LOCAL(ashrsi3_8): | |
358 shar r0 | |
359 LOCAL(ashrsi3_7): | |
360 shar r0 | |
361 LOCAL(ashrsi3_6): | |
362 shar r0 | |
363 LOCAL(ashrsi3_5): | |
364 shar r0 | |
365 LOCAL(ashrsi3_4): | |
366 shar r0 | |
367 LOCAL(ashrsi3_3): | |
368 shar r0 | |
369 LOCAL(ashrsi3_2): | |
370 shar r0 | |
371 LOCAL(ashrsi3_1): | |
372 rts | |
373 shar r0 | |
374 | |
375 LOCAL(ashrsi3_0): | |
376 rts | |
377 nop | |
378 | |
379 ENDFUNC(GLOBAL(ashrsi3)) | |
380 #endif | |
381 | |
382 #ifdef L_ashiftlt | |
383 | |
384 ! | |
385 ! GLOBAL(ashlsi3) | |
386 ! | |
387 ! Entry: | |
388 ! | |
389 ! r4: Value to shift | |
390 ! r5: Shifts | |
391 ! | |
392 ! Exit: | |
393 ! | |
394 ! r0: Result | |
395 ! | |
396 ! Destroys: | |
397 ! | |
398 ! (none) | |
399 ! | |
400 .global GLOBAL(ashlsi3) | |
401 HIDDEN_FUNC(GLOBAL(ashlsi3)) | |
402 .align 2 | |
403 GLOBAL(ashlsi3): | |
404 mov #31,r0 | |
405 and r0,r5 | |
406 mova LOCAL(ashlsi3_table),r0 | |
407 mov.b @(r0,r5),r5 | |
408 #ifdef __sh1__ | |
409 add r5,r0 | |
410 jmp @r0 | |
411 #else | |
412 braf r5 | |
413 #endif | |
414 mov r4,r0 | |
415 | |
416 .align 2 | |
417 LOCAL(ashlsi3_table): | |
418 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table) | |
419 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table) | |
420 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table) | |
421 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table) | |
422 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table) | |
423 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table) | |
424 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table) | |
425 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table) | |
426 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table) | |
427 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table) | |
428 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table) | |
429 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table) | |
430 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table) | |
431 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table) | |
432 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table) | |
433 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table) | |
434 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table) | |
435 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table) | |
436 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table) | |
437 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table) | |
438 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table) | |
439 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table) | |
440 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table) | |
441 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table) | |
442 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table) | |
443 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table) | |
444 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table) | |
445 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table) | |
446 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table) | |
447 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table) | |
448 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table) | |
449 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table) | |
450 | |
451 LOCAL(ashlsi3_6): | |
452 shll2 r0 | |
453 LOCAL(ashlsi3_4): | |
454 shll2 r0 | |
455 LOCAL(ashlsi3_2): | |
456 rts | |
457 shll2 r0 | |
458 | |
459 LOCAL(ashlsi3_7): | |
460 shll2 r0 | |
461 LOCAL(ashlsi3_5): | |
462 shll2 r0 | |
463 LOCAL(ashlsi3_3): | |
464 shll2 r0 | |
465 LOCAL(ashlsi3_1): | |
466 rts | |
467 shll r0 | |
468 | |
469 LOCAL(ashlsi3_14): | |
470 shll2 r0 | |
471 LOCAL(ashlsi3_12): | |
472 shll2 r0 | |
473 LOCAL(ashlsi3_10): | |
474 shll2 r0 | |
475 LOCAL(ashlsi3_8): | |
476 rts | |
477 shll8 r0 | |
478 | |
479 LOCAL(ashlsi3_15): | |
480 shll2 r0 | |
481 LOCAL(ashlsi3_13): | |
482 shll2 r0 | |
483 LOCAL(ashlsi3_11): | |
484 shll2 r0 | |
485 LOCAL(ashlsi3_9): | |
486 shll8 r0 | |
487 rts | |
488 shll r0 | |
489 | |
490 LOCAL(ashlsi3_22): | |
491 shll2 r0 | |
492 LOCAL(ashlsi3_20): | |
493 shll2 r0 | |
494 LOCAL(ashlsi3_18): | |
495 shll2 r0 | |
496 LOCAL(ashlsi3_16): | |
497 rts | |
498 shll16 r0 | |
499 | |
500 LOCAL(ashlsi3_23): | |
501 shll2 r0 | |
502 LOCAL(ashlsi3_21): | |
503 shll2 r0 | |
504 LOCAL(ashlsi3_19): | |
505 shll2 r0 | |
506 LOCAL(ashlsi3_17): | |
507 shll16 r0 | |
508 rts | |
509 shll r0 | |
510 | |
511 LOCAL(ashlsi3_30): | |
512 shll2 r0 | |
513 LOCAL(ashlsi3_28): | |
514 shll2 r0 | |
515 LOCAL(ashlsi3_26): | |
516 shll2 r0 | |
517 LOCAL(ashlsi3_24): | |
518 shll16 r0 | |
519 rts | |
520 shll8 r0 | |
521 | |
522 LOCAL(ashlsi3_31): | |
523 shll2 r0 | |
524 LOCAL(ashlsi3_29): | |
525 shll2 r0 | |
526 LOCAL(ashlsi3_27): | |
527 shll2 r0 | |
528 LOCAL(ashlsi3_25): | |
529 shll16 r0 | |
530 shll8 r0 | |
531 rts | |
532 shll r0 | |
533 | |
534 LOCAL(ashlsi3_0): | |
535 rts | |
536 nop | |
537 | |
538 ENDFUNC(GLOBAL(ashlsi3)) | |
539 #endif | |
540 | |
541 #ifdef L_lshiftrt | |
542 | |
543 ! | |
544 ! GLOBAL(lshrsi3) | |
545 ! | |
546 ! Entry: | |
547 ! | |
548 ! r4: Value to shift | |
549 ! r5: Shifts | |
550 ! | |
551 ! Exit: | |
552 ! | |
553 ! r0: Result | |
554 ! | |
555 ! Destroys: | |
556 ! | |
557 ! (none) | |
558 ! | |
559 .global GLOBAL(lshrsi3) | |
560 HIDDEN_FUNC(GLOBAL(lshrsi3)) | |
561 .align 2 | |
562 GLOBAL(lshrsi3): | |
563 mov #31,r0 | |
564 and r0,r5 | |
565 mova LOCAL(lshrsi3_table),r0 | |
566 mov.b @(r0,r5),r5 | |
567 #ifdef __sh1__ | |
568 add r5,r0 | |
569 jmp @r0 | |
570 #else | |
571 braf r5 | |
572 #endif | |
573 mov r4,r0 | |
574 | |
575 .align 2 | |
576 LOCAL(lshrsi3_table): | |
577 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table) | |
578 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table) | |
579 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table) | |
580 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table) | |
581 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table) | |
582 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table) | |
583 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table) | |
584 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table) | |
585 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table) | |
586 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table) | |
587 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table) | |
588 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table) | |
589 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table) | |
590 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table) | |
591 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table) | |
592 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table) | |
593 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table) | |
594 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table) | |
595 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table) | |
596 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table) | |
597 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table) | |
598 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table) | |
599 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table) | |
600 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table) | |
601 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table) | |
602 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table) | |
603 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table) | |
604 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table) | |
605 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table) | |
606 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table) | |
607 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table) | |
608 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table) | |
609 | |
610 LOCAL(lshrsi3_6): | |
611 shlr2 r0 | |
612 LOCAL(lshrsi3_4): | |
613 shlr2 r0 | |
614 LOCAL(lshrsi3_2): | |
615 rts | |
616 shlr2 r0 | |
617 | |
618 LOCAL(lshrsi3_7): | |
619 shlr2 r0 | |
620 LOCAL(lshrsi3_5): | |
621 shlr2 r0 | |
622 LOCAL(lshrsi3_3): | |
623 shlr2 r0 | |
624 LOCAL(lshrsi3_1): | |
625 rts | |
626 shlr r0 | |
627 | |
628 LOCAL(lshrsi3_14): | |
629 shlr2 r0 | |
630 LOCAL(lshrsi3_12): | |
631 shlr2 r0 | |
632 LOCAL(lshrsi3_10): | |
633 shlr2 r0 | |
634 LOCAL(lshrsi3_8): | |
635 rts | |
636 shlr8 r0 | |
637 | |
638 LOCAL(lshrsi3_15): | |
639 shlr2 r0 | |
640 LOCAL(lshrsi3_13): | |
641 shlr2 r0 | |
642 LOCAL(lshrsi3_11): | |
643 shlr2 r0 | |
644 LOCAL(lshrsi3_9): | |
645 shlr8 r0 | |
646 rts | |
647 shlr r0 | |
648 | |
649 LOCAL(lshrsi3_22): | |
650 shlr2 r0 | |
651 LOCAL(lshrsi3_20): | |
652 shlr2 r0 | |
653 LOCAL(lshrsi3_18): | |
654 shlr2 r0 | |
655 LOCAL(lshrsi3_16): | |
656 rts | |
657 shlr16 r0 | |
658 | |
659 LOCAL(lshrsi3_23): | |
660 shlr2 r0 | |
661 LOCAL(lshrsi3_21): | |
662 shlr2 r0 | |
663 LOCAL(lshrsi3_19): | |
664 shlr2 r0 | |
665 LOCAL(lshrsi3_17): | |
666 shlr16 r0 | |
667 rts | |
668 shlr r0 | |
669 | |
670 LOCAL(lshrsi3_30): | |
671 shlr2 r0 | |
672 LOCAL(lshrsi3_28): | |
673 shlr2 r0 | |
674 LOCAL(lshrsi3_26): | |
675 shlr2 r0 | |
676 LOCAL(lshrsi3_24): | |
677 shlr16 r0 | |
678 rts | |
679 shlr8 r0 | |
680 | |
681 LOCAL(lshrsi3_31): | |
682 shlr2 r0 | |
683 LOCAL(lshrsi3_29): | |
684 shlr2 r0 | |
685 LOCAL(lshrsi3_27): | |
686 shlr2 r0 | |
687 LOCAL(lshrsi3_25): | |
688 shlr16 r0 | |
689 shlr8 r0 | |
690 rts | |
691 shlr r0 | |
692 | |
693 LOCAL(lshrsi3_0): | |
694 rts | |
695 nop | |
696 | |
697 ENDFUNC(GLOBAL(lshrsi3)) | |
698 #endif | |
699 | |
700 #ifdef L_movmem | |
701 .text | |
702 .balign 4 | |
703 .global GLOBAL(movmem) | |
704 HIDDEN_FUNC(GLOBAL(movmem)) | |
705 HIDDEN_ALIAS(movstr,movmem) | |
706 /* This would be a lot simpler if r6 contained the byte count | |
707 minus 64, and we wouldn't be called here for a byte count of 64. */ | |
708 GLOBAL(movmem): | |
709 sts.l pr,@-r15 | |
710 shll2 r6 | |
711 bsr GLOBAL(movmemSI52+2) | |
712 mov.l @(48,r5),r0 | |
713 .balign 4 | |
714 LOCAL(movmem_loop): /* Reached with rts */ | |
715 mov.l @(60,r5),r0 | |
716 add #-64,r6 | |
717 mov.l r0,@(60,r4) | |
718 tst r6,r6 | |
719 mov.l @(56,r5),r0 | |
720 bt LOCAL(movmem_done) | |
721 mov.l r0,@(56,r4) | |
722 cmp/pl r6 | |
723 mov.l @(52,r5),r0 | |
724 add #64,r5 | |
725 mov.l r0,@(52,r4) | |
726 add #64,r4 | |
727 bt GLOBAL(movmemSI52) | |
728 ! done all the large groups, do the remainder | |
729 ! jump to movmem+ | |
730 mova GLOBAL(movmemSI4)+4,r0 | |
731 add r6,r0 | |
732 jmp @r0 | |
733 LOCAL(movmem_done): ! share slot insn, works out aligned. | |
734 lds.l @r15+,pr | |
735 mov.l r0,@(56,r4) | |
736 mov.l @(52,r5),r0 | |
737 rts | |
738 mov.l r0,@(52,r4) | |
739 .balign 4 | |
740 ! ??? We need aliases movstr* for movmem* for the older libraries. These | |
741 ! aliases will be removed at the some point in the future. | |
742 .global GLOBAL(movmemSI64) | |
743 HIDDEN_FUNC(GLOBAL(movmemSI64)) | |
744 HIDDEN_ALIAS(movstrSI64,movmemSI64) | |
745 GLOBAL(movmemSI64): | |
746 mov.l @(60,r5),r0 | |
747 mov.l r0,@(60,r4) | |
748 .global GLOBAL(movmemSI60) | |
749 HIDDEN_FUNC(GLOBAL(movmemSI60)) | |
750 HIDDEN_ALIAS(movstrSI60,movmemSI60) | |
751 GLOBAL(movmemSI60): | |
752 mov.l @(56,r5),r0 | |
753 mov.l r0,@(56,r4) | |
754 .global GLOBAL(movmemSI56) | |
755 HIDDEN_FUNC(GLOBAL(movmemSI56)) | |
756 HIDDEN_ALIAS(movstrSI56,movmemSI56) | |
757 GLOBAL(movmemSI56): | |
758 mov.l @(52,r5),r0 | |
759 mov.l r0,@(52,r4) | |
760 .global GLOBAL(movmemSI52) | |
761 HIDDEN_FUNC(GLOBAL(movmemSI52)) | |
762 HIDDEN_ALIAS(movstrSI52,movmemSI52) | |
763 GLOBAL(movmemSI52): | |
764 mov.l @(48,r5),r0 | |
765 mov.l r0,@(48,r4) | |
766 .global GLOBAL(movmemSI48) | |
767 HIDDEN_FUNC(GLOBAL(movmemSI48)) | |
768 HIDDEN_ALIAS(movstrSI48,movmemSI48) | |
769 GLOBAL(movmemSI48): | |
770 mov.l @(44,r5),r0 | |
771 mov.l r0,@(44,r4) | |
772 .global GLOBAL(movmemSI44) | |
773 HIDDEN_FUNC(GLOBAL(movmemSI44)) | |
774 HIDDEN_ALIAS(movstrSI44,movmemSI44) | |
775 GLOBAL(movmemSI44): | |
776 mov.l @(40,r5),r0 | |
777 mov.l r0,@(40,r4) | |
778 .global GLOBAL(movmemSI40) | |
779 HIDDEN_FUNC(GLOBAL(movmemSI40)) | |
780 HIDDEN_ALIAS(movstrSI40,movmemSI40) | |
781 GLOBAL(movmemSI40): | |
782 mov.l @(36,r5),r0 | |
783 mov.l r0,@(36,r4) | |
784 .global GLOBAL(movmemSI36) | |
785 HIDDEN_FUNC(GLOBAL(movmemSI36)) | |
786 HIDDEN_ALIAS(movstrSI36,movmemSI36) | |
787 GLOBAL(movmemSI36): | |
788 mov.l @(32,r5),r0 | |
789 mov.l r0,@(32,r4) | |
790 .global GLOBAL(movmemSI32) | |
791 HIDDEN_FUNC(GLOBAL(movmemSI32)) | |
792 HIDDEN_ALIAS(movstrSI32,movmemSI32) | |
793 GLOBAL(movmemSI32): | |
794 mov.l @(28,r5),r0 | |
795 mov.l r0,@(28,r4) | |
796 .global GLOBAL(movmemSI28) | |
797 HIDDEN_FUNC(GLOBAL(movmemSI28)) | |
798 HIDDEN_ALIAS(movstrSI28,movmemSI28) | |
799 GLOBAL(movmemSI28): | |
800 mov.l @(24,r5),r0 | |
801 mov.l r0,@(24,r4) | |
802 .global GLOBAL(movmemSI24) | |
803 HIDDEN_FUNC(GLOBAL(movmemSI24)) | |
804 HIDDEN_ALIAS(movstrSI24,movmemSI24) | |
805 GLOBAL(movmemSI24): | |
806 mov.l @(20,r5),r0 | |
807 mov.l r0,@(20,r4) | |
808 .global GLOBAL(movmemSI20) | |
809 HIDDEN_FUNC(GLOBAL(movmemSI20)) | |
810 HIDDEN_ALIAS(movstrSI20,movmemSI20) | |
811 GLOBAL(movmemSI20): | |
812 mov.l @(16,r5),r0 | |
813 mov.l r0,@(16,r4) | |
814 .global GLOBAL(movmemSI16) | |
815 HIDDEN_FUNC(GLOBAL(movmemSI16)) | |
816 HIDDEN_ALIAS(movstrSI16,movmemSI16) | |
817 GLOBAL(movmemSI16): | |
818 mov.l @(12,r5),r0 | |
819 mov.l r0,@(12,r4) | |
820 .global GLOBAL(movmemSI12) | |
821 HIDDEN_FUNC(GLOBAL(movmemSI12)) | |
822 HIDDEN_ALIAS(movstrSI12,movmemSI12) | |
823 GLOBAL(movmemSI12): | |
824 mov.l @(8,r5),r0 | |
825 mov.l r0,@(8,r4) | |
826 .global GLOBAL(movmemSI8) | |
827 HIDDEN_FUNC(GLOBAL(movmemSI8)) | |
828 HIDDEN_ALIAS(movstrSI8,movmemSI8) | |
829 GLOBAL(movmemSI8): | |
830 mov.l @(4,r5),r0 | |
831 mov.l r0,@(4,r4) | |
832 .global GLOBAL(movmemSI4) | |
833 HIDDEN_FUNC(GLOBAL(movmemSI4)) | |
834 HIDDEN_ALIAS(movstrSI4,movmemSI4) | |
835 GLOBAL(movmemSI4): | |
836 mov.l @(0,r5),r0 | |
837 rts | |
838 mov.l r0,@(0,r4) | |
839 | |
840 ENDFUNC(GLOBAL(movmemSI64)) | |
841 ENDFUNC(GLOBAL(movmemSI60)) | |
842 ENDFUNC(GLOBAL(movmemSI56)) | |
843 ENDFUNC(GLOBAL(movmemSI52)) | |
844 ENDFUNC(GLOBAL(movmemSI48)) | |
845 ENDFUNC(GLOBAL(movmemSI44)) | |
846 ENDFUNC(GLOBAL(movmemSI40)) | |
847 ENDFUNC(GLOBAL(movmemSI36)) | |
848 ENDFUNC(GLOBAL(movmemSI32)) | |
849 ENDFUNC(GLOBAL(movmemSI28)) | |
850 ENDFUNC(GLOBAL(movmemSI24)) | |
851 ENDFUNC(GLOBAL(movmemSI20)) | |
852 ENDFUNC(GLOBAL(movmemSI16)) | |
853 ENDFUNC(GLOBAL(movmemSI12)) | |
854 ENDFUNC(GLOBAL(movmemSI8)) | |
855 ENDFUNC(GLOBAL(movmemSI4)) | |
856 ENDFUNC(GLOBAL(movmem)) | |
857 #endif | |
858 | |
859 #ifdef L_movmem_i4 | |
860 .text | |
861 .global GLOBAL(movmem_i4_even) | |
862 .global GLOBAL(movmem_i4_odd) | |
863 .global GLOBAL(movmemSI12_i4) | |
864 | |
865 HIDDEN_FUNC(GLOBAL(movmem_i4_even)) | |
866 HIDDEN_FUNC(GLOBAL(movmem_i4_odd)) | |
867 HIDDEN_FUNC(GLOBAL(movmemSI12_i4)) | |
868 | |
869 HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even) | |
870 HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd) | |
871 HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4) | |
872 | |
873 .p2align 5 | |
874 L_movmem_2mod4_end: | |
875 mov.l r0,@(16,r4) | |
876 rts | |
877 mov.l r1,@(20,r4) | |
878 | |
879 .p2align 2 | |
880 | |
881 GLOBAL(movmem_i4_even): | |
882 mov.l @r5+,r0 | |
883 bra L_movmem_start_even | |
884 mov.l @r5+,r1 | |
885 | |
886 GLOBAL(movmem_i4_odd): | |
887 mov.l @r5+,r1 | |
888 add #-4,r4 | |
889 mov.l @r5+,r2 | |
890 mov.l @r5+,r3 | |
891 mov.l r1,@(4,r4) | |
892 mov.l r2,@(8,r4) | |
893 | |
894 L_movmem_loop: | |
895 mov.l r3,@(12,r4) | |
896 dt r6 | |
897 mov.l @r5+,r0 | |
898 bt/s L_movmem_2mod4_end | |
899 mov.l @r5+,r1 | |
900 add #16,r4 | |
901 L_movmem_start_even: | |
902 mov.l @r5+,r2 | |
903 mov.l @r5+,r3 | |
904 mov.l r0,@r4 | |
905 dt r6 | |
906 mov.l r1,@(4,r4) | |
907 bf/s L_movmem_loop | |
908 mov.l r2,@(8,r4) | |
909 rts | |
910 mov.l r3,@(12,r4) | |
911 | |
912 ENDFUNC(GLOBAL(movmem_i4_even)) | |
913 ENDFUNC(GLOBAL(movmem_i4_odd)) | |
914 | |
915 .p2align 4 | |
916 GLOBAL(movmemSI12_i4): | |
917 mov.l @r5,r0 | |
918 mov.l @(4,r5),r1 | |
919 mov.l @(8,r5),r2 | |
920 mov.l r0,@r4 | |
921 mov.l r1,@(4,r4) | |
922 rts | |
923 mov.l r2,@(8,r4) | |
924 | |
925 ENDFUNC(GLOBAL(movmemSI12_i4)) | |
926 #endif | |
927 | |
928 #ifdef L_mulsi3 | |
929 | |
930 | |
931 .global GLOBAL(mulsi3) | |
932 HIDDEN_FUNC(GLOBAL(mulsi3)) | |
933 | |
934 ! r4 = aabb | |
935 ! r5 = ccdd | |
936 ! r0 = aabb*ccdd via partial products | |
937 ! | |
938 ! if aa == 0 and cc = 0 | |
939 ! r0 = bb*dd | |
940 ! | |
941 ! else | |
942 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536) | |
943 ! | |
944 | |
945 GLOBAL(mulsi3): | |
946 mulu.w r4,r5 ! multiply the lsws macl=bb*dd | |
947 mov r5,r3 ! r3 = ccdd | |
948 swap.w r4,r2 ! r2 = bbaa | |
949 xtrct r2,r3 ! r3 = aacc | |
950 tst r3,r3 ! msws zero ? | |
951 bf hiset | |
952 rts ! yes - then we have the answer | |
953 sts macl,r0 | |
954 | |
955 hiset: sts macl,r0 ! r0 = bb*dd | |
956 mulu.w r2,r5 ! brewing macl = aa*dd | |
957 sts macl,r1 | |
958 mulu.w r3,r4 ! brewing macl = cc*bb | |
959 sts macl,r2 | |
960 add r1,r2 | |
961 shll16 r2 | |
962 rts | |
963 add r2,r0 | |
964 | |
965 ENDFUNC(GLOBAL(mulsi3)) | |
966 #endif | |
967 #endif /* ! __SH5__ */ | |
968 #ifdef L_sdivsi3_i4 | |
969 .title "SH DIVIDE" | |
970 !! 4 byte integer Divide code for the Renesas SH | |
971 #ifdef __SH4__ | |
972 !! args in r4 and r5, result in fpul, clobber dr0, dr2 | |
973 | |
974 .global GLOBAL(sdivsi3_i4) | |
975 HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) | |
976 GLOBAL(sdivsi3_i4): | |
977 lds r4,fpul | |
978 float fpul,dr0 | |
979 lds r5,fpul | |
980 float fpul,dr2 | |
981 fdiv dr2,dr0 | |
982 rts | |
983 ftrc dr0,fpul | |
984 | |
985 ENDFUNC(GLOBAL(sdivsi3_i4)) | |
986 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__) | |
987 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2 | |
988 | |
989 #if ! __SH5__ || __SH5__ == 32 | |
990 #if __SH5__ | |
991 .mode SHcompact | |
992 #endif | |
993 .global GLOBAL(sdivsi3_i4) | |
994 HIDDEN_FUNC(GLOBAL(sdivsi3_i4)) | |
995 GLOBAL(sdivsi3_i4): | |
996 sts.l fpscr,@-r15 | |
997 mov #8,r2 | |
998 swap.w r2,r2 | |
999 lds r2,fpscr | |
1000 lds r4,fpul | |
1001 float fpul,dr0 | |
1002 lds r5,fpul | |
1003 float fpul,dr2 | |
1004 fdiv dr2,dr0 | |
1005 ftrc dr0,fpul | |
1006 rts | |
1007 lds.l @r15+,fpscr | |
1008 | |
1009 ENDFUNC(GLOBAL(sdivsi3_i4)) | |
1010 #endif /* ! __SH5__ || __SH5__ == 32 */ | |
1011 #endif /* ! __SH4__ */ | |
1012 #endif | |
1013 | |
1014 #ifdef L_sdivsi3 | |
1015 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with | |
1016 sh2e/sh3e code. */ | |
1017 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) | |
1018 !! | |
1019 !! Steve Chamberlain | |
1020 !! sac@cygnus.com | |
1021 !! | |
1022 !! | |
1023 | |
1024 !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit | |
1025 | |
1026 .global GLOBAL(sdivsi3) | |
1027 #if __SHMEDIA__ | |
1028 #if __SH5__ == 32 | |
1029 .section .text..SHmedia32,"ax" | |
1030 #else | |
1031 .text | |
1032 #endif | |
1033 .align 2 | |
1034 #if 0 | |
1035 /* The assembly code that follows is a hand-optimized version of the C | |
1036 code that follows. Note that the registers that are modified are | |
1037 exactly those listed as clobbered in the patterns divsi3_i1 and | |
1038 divsi3_i1_media. | |
1039 | |
1040 int __sdivsi3 (i, j) | |
1041 int i, j; | |
1042 { | |
1043 register unsigned long long r18 asm ("r18"); | |
1044 register unsigned long long r19 asm ("r19"); | |
1045 register unsigned long long r0 asm ("r0") = 0; | |
1046 register unsigned long long r1 asm ("r1") = 1; | |
1047 register int r2 asm ("r2") = i >> 31; | |
1048 register int r3 asm ("r3") = j >> 31; | |
1049 | |
1050 r2 = r2 ? r2 : r1; | |
1051 r3 = r3 ? r3 : r1; | |
1052 r18 = i * r2; | |
1053 r19 = j * r3; | |
1054 r2 *= r3; | |
1055 | |
1056 r19 <<= 31; | |
1057 r1 <<= 31; | |
1058 do | |
1059 if (r18 >= r19) | |
1060 r0 |= r1, r18 -= r19; | |
1061 while (r19 >>= 1, r1 >>= 1); | |
1062 | |
1063 return r2 * (int)r0; | |
1064 } | |
1065 */ | |
1066 GLOBAL(sdivsi3): | |
1067 pt/l LOCAL(sdivsi3_dontadd), tr2 | |
1068 pt/l LOCAL(sdivsi3_loop), tr1 | |
1069 ptabs/l r18, tr0 | |
1070 movi 0, r0 | |
1071 movi 1, r1 | |
1072 shari.l r4, 31, r2 | |
1073 shari.l r5, 31, r3 | |
1074 cmveq r2, r1, r2 | |
1075 cmveq r3, r1, r3 | |
1076 muls.l r4, r2, r18 | |
1077 muls.l r5, r3, r19 | |
1078 muls.l r2, r3, r2 | |
1079 shlli r19, 31, r19 | |
1080 shlli r1, 31, r1 | |
1081 LOCAL(sdivsi3_loop): | |
1082 bgtu r19, r18, tr2 | |
1083 or r0, r1, r0 | |
1084 sub r18, r19, r18 | |
1085 LOCAL(sdivsi3_dontadd): | |
1086 shlri r1, 1, r1 | |
1087 shlri r19, 1, r19 | |
1088 bnei r1, 0, tr1 | |
1089 muls.l r0, r2, r0 | |
1090 add.l r0, r63, r0 | |
1091 blink tr0, r63 | |
1092 #elif 0 /* ! 0 */ | |
1093 // inputs: r4,r5 | |
1094 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0 | |
1095 // result in r0 | |
1096 GLOBAL(sdivsi3): | |
1097 // can create absolute value without extra latency, | |
1098 // but dependent on proper sign extension of inputs: | |
1099 // shari.l r5,31,r2 | |
1100 // xor r5,r2,r20 | |
1101 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended. | |
1102 shari.l r5,31,r2 | |
1103 ori r2,1,r2 | |
1104 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended. | |
1105 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76 | |
1106 shari.l r4,31,r3 | |
1107 nsb r20,r0 | |
1108 shlld r20,r0,r25 | |
1109 shlri r25,48,r25 | |
1110 sub r19,r25,r1 | |
1111 mmulfx.w r1,r1,r2 | |
1112 mshflo.w r1,r63,r1 | |
1113 // If r4 was to be used in-place instead of r21, could use this sequence | |
1114 // to compute absolute: | |
1115 // sub r63,r4,r19 // compute absolute value of r4 | |
1116 // shlri r4,32,r3 // into lower 32 bit of r4, keeping | |
1117 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact. | |
1118 ori r3,1,r3 | |
1119 mmulfx.w r25,r2,r2 | |
1120 sub r19,r0,r0 | |
1121 muls.l r4,r3,r21 | |
1122 msub.w r1,r2,r2 | |
1123 addi r2,-2,r1 | |
1124 mulu.l r21,r1,r19 | |
1125 mmulfx.w r2,r2,r2 | |
1126 shlli r1,15,r1 | |
1127 shlrd r19,r0,r19 | |
1128 mulu.l r19,r20,r3 | |
1129 mmacnfx.wl r25,r2,r1 | |
1130 ptabs r18,tr0 | |
1131 sub r21,r3,r25 | |
1132 | |
1133 mulu.l r25,r1,r2 | |
1134 addi r0,14,r0 | |
1135 xor r4,r5,r18 | |
1136 shlrd r2,r0,r2 | |
1137 mulu.l r2,r20,r3 | |
1138 add r19,r2,r19 | |
1139 shari.l r18,31,r18 | |
1140 sub r25,r3,r25 | |
1141 | |
1142 mulu.l r25,r1,r2 | |
1143 sub r25,r20,r25 | |
1144 add r19,r18,r19 | |
1145 shlrd r2,r0,r2 | |
1146 mulu.l r2,r20,r3 | |
1147 addi r25,1,r25 | |
1148 add r19,r2,r19 | |
1149 | |
1150 cmpgt r25,r3,r25 | |
1151 add.l r19,r25,r0 | |
1152 xor r0,r18,r0 | |
1153 blink tr0,r63 | |
1154 #else /* ! 0 && ! 0 */ | |
1155 | |
1156 // inputs: r4,r5 | |
1157 // clobbered: r1,r18,r19,r20,r21,r25,tr0 | |
1158 // result in r0 | |
1159 HIDDEN_FUNC(GLOBAL(sdivsi3_2)) | |
1160 #ifndef __pic__ | |
1161 FUNC(GLOBAL(sdivsi3)) | |
1162 GLOBAL(sdivsi3): /* this is the shcompact entry point */ | |
1163 // The special SHmedia entry point sdivsi3_1 prevents accidental linking | |
1164 // with the SHcompact implementation, which clobbers tr1 / tr2. | |
1165 .global GLOBAL(sdivsi3_1) | |
1166 GLOBAL(sdivsi3_1): | |
1167 .global GLOBAL(div_table_internal) | |
1168 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20 | |
1169 shori GLOBAL(div_table_internal) & 65535, r20 | |
1170 #endif | |
1171 .global GLOBAL(sdivsi3_2) | |
1172 // div_table in r20 | |
1173 // clobbered: r1,r18,r19,r21,r25,tr0 | |
1174 GLOBAL(sdivsi3_2): | |
1175 nsb r5, r1 | |
1176 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 | |
1177 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) | |
1178 ldx.ub r20, r21, r19 // u0.8 | |
1179 shari r25, 32, r25 // normalize to s2.30 | |
1180 shlli r21, 1, r21 | |
1181 muls.l r25, r19, r19 // s2.38 | |
1182 ldx.w r20, r21, r21 // s2.14 | |
1183 ptabs r18, tr0 | |
1184 shari r19, 24, r19 // truncate to s2.14 | |
1185 sub r21, r19, r19 // some 11 bit inverse in s1.14 | |
1186 muls.l r19, r19, r21 // u0.28 | |
1187 sub r63, r1, r1 | |
1188 addi r1, 92, r1 | |
1189 muls.l r25, r21, r18 // s2.58 | |
1190 shlli r19, 45, r19 // multiply by two and convert to s2.58 | |
1191 /* bubble */ | |
1192 sub r19, r18, r18 | |
1193 shari r18, 28, r18 // some 22 bit inverse in s1.30 | |
1194 muls.l r18, r25, r0 // s2.60 | |
1195 muls.l r18, r4, r25 // s32.30 | |
1196 /* bubble */ | |
1197 shari r0, 16, r19 // s-16.44 | |
1198 muls.l r19, r18, r19 // s-16.74 | |
1199 shari r25, 63, r0 | |
1200 shari r4, 14, r18 // s19.-14 | |
1201 shari r19, 30, r19 // s-16.44 | |
1202 muls.l r19, r18, r19 // s15.30 | |
1203 xor r21, r0, r21 // You could also use the constant 1 << 27. | |
1204 add r21, r25, r21 | |
1205 sub r21, r19, r21 | |
1206 shard r21, r1, r21 | |
1207 sub r21, r0, r0 | |
1208 blink tr0, r63 | |
1209 #ifndef __pic__ | |
1210 ENDFUNC(GLOBAL(sdivsi3)) | |
1211 #endif | |
1212 ENDFUNC(GLOBAL(sdivsi3_2)) | |
1213 #endif | |
1214 #elif defined __SHMEDIA__ | |
1215 /* m5compact-nofpu */ | |
1216 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2 | |
1217 .mode SHmedia | |
1218 .section .text..SHmedia32,"ax" | |
1219 .align 2 | |
1220 FUNC(GLOBAL(sdivsi3)) | |
1221 GLOBAL(sdivsi3): | |
1222 pt/l LOCAL(sdivsi3_dontsub), tr0 | |
1223 pt/l LOCAL(sdivsi3_loop), tr1 | |
1224 ptabs/l r18,tr2 | |
1225 shari.l r4,31,r18 | |
1226 shari.l r5,31,r19 | |
1227 xor r4,r18,r20 | |
1228 xor r5,r19,r21 | |
1229 sub.l r20,r18,r20 | |
1230 sub.l r21,r19,r21 | |
1231 xor r18,r19,r19 | |
1232 shlli r21,32,r25 | |
1233 addi r25,-1,r21 | |
1234 addz.l r20,r63,r20 | |
1235 LOCAL(sdivsi3_loop): | |
1236 shlli r20,1,r20 | |
1237 bgeu/u r21,r20,tr0 | |
1238 sub r20,r21,r20 | |
1239 LOCAL(sdivsi3_dontsub): | |
1240 addi.l r25,-1,r25 | |
1241 bnei r25,-32,tr1 | |
1242 xor r20,r19,r20 | |
1243 sub.l r20,r19,r0 | |
1244 blink tr2,r63 | |
1245 ENDFUNC(GLOBAL(sdivsi3)) | |
1246 #else /* ! __SHMEDIA__ */ | |
1247 FUNC(GLOBAL(sdivsi3)) | |
1248 GLOBAL(sdivsi3): | |
1249 mov r4,r1 | |
1250 mov r5,r0 | |
1251 | |
1252 tst r0,r0 | |
1253 bt div0 | |
1254 mov #0,r2 | |
1255 div0s r2,r1 | |
1256 subc r3,r3 | |
1257 subc r2,r1 | |
1258 div0s r0,r3 | |
1259 rotcl r1 | |
1260 div1 r0,r3 | |
1261 rotcl r1 | |
1262 div1 r0,r3 | |
1263 rotcl r1 | |
1264 div1 r0,r3 | |
1265 rotcl r1 | |
1266 div1 r0,r3 | |
1267 rotcl r1 | |
1268 div1 r0,r3 | |
1269 rotcl r1 | |
1270 div1 r0,r3 | |
1271 rotcl r1 | |
1272 div1 r0,r3 | |
1273 rotcl r1 | |
1274 div1 r0,r3 | |
1275 rotcl r1 | |
1276 div1 r0,r3 | |
1277 rotcl r1 | |
1278 div1 r0,r3 | |
1279 rotcl r1 | |
1280 div1 r0,r3 | |
1281 rotcl r1 | |
1282 div1 r0,r3 | |
1283 rotcl r1 | |
1284 div1 r0,r3 | |
1285 rotcl r1 | |
1286 div1 r0,r3 | |
1287 rotcl r1 | |
1288 div1 r0,r3 | |
1289 rotcl r1 | |
1290 div1 r0,r3 | |
1291 rotcl r1 | |
1292 div1 r0,r3 | |
1293 rotcl r1 | |
1294 div1 r0,r3 | |
1295 rotcl r1 | |
1296 div1 r0,r3 | |
1297 rotcl r1 | |
1298 div1 r0,r3 | |
1299 rotcl r1 | |
1300 div1 r0,r3 | |
1301 rotcl r1 | |
1302 div1 r0,r3 | |
1303 rotcl r1 | |
1304 div1 r0,r3 | |
1305 rotcl r1 | |
1306 div1 r0,r3 | |
1307 rotcl r1 | |
1308 div1 r0,r3 | |
1309 rotcl r1 | |
1310 div1 r0,r3 | |
1311 rotcl r1 | |
1312 div1 r0,r3 | |
1313 rotcl r1 | |
1314 div1 r0,r3 | |
1315 rotcl r1 | |
1316 div1 r0,r3 | |
1317 rotcl r1 | |
1318 div1 r0,r3 | |
1319 rotcl r1 | |
1320 div1 r0,r3 | |
1321 rotcl r1 | |
1322 div1 r0,r3 | |
1323 rotcl r1 | |
1324 addc r2,r1 | |
1325 rts | |
1326 mov r1,r0 | |
1327 | |
1328 | |
1329 div0: rts | |
1330 mov #0,r0 | |
1331 | |
1332 ENDFUNC(GLOBAL(sdivsi3)) | |
1333 #endif /* ! __SHMEDIA__ */ | |
1334 #endif /* ! __SH4__ */ | |
1335 #endif | |
1336 #ifdef L_udivsi3_i4 | |
1337 | |
1338 .title "SH DIVIDE" | |
1339 !! 4 byte integer Divide code for the Renesas SH | |
1340 #ifdef __SH4__ | |
1341 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4, | |
1342 !! and t bit | |
1343 | |
1344 .global GLOBAL(udivsi3_i4) | |
1345 HIDDEN_FUNC(GLOBAL(udivsi3_i4)) | |
1346 GLOBAL(udivsi3_i4): | |
1347 mov #1,r1 | |
1348 cmp/hi r1,r5 | |
1349 bf trivial | |
1350 rotr r1 | |
1351 xor r1,r4 | |
1352 lds r4,fpul | |
1353 mova L1,r0 | |
1354 #ifdef FMOVD_WORKS | |
1355 fmov.d @r0+,dr4 | |
1356 #else | |
1357 fmov.s @r0+,DR40 | |
1358 fmov.s @r0,DR41 | |
1359 #endif | |
1360 float fpul,dr0 | |
1361 xor r1,r5 | |
1362 lds r5,fpul | |
1363 float fpul,dr2 | |
1364 fadd dr4,dr0 | |
1365 fadd dr4,dr2 | |
1366 fdiv dr2,dr0 | |
1367 rts | |
1368 ftrc dr0,fpul | |
1369 | |
1370 trivial: | |
1371 rts | |
1372 lds r4,fpul | |
1373 | |
1374 .align 2 | |
1375 #ifdef FMOVD_WORKS | |
1376 .align 3 ! make double below 8 byte aligned. | |
1377 #endif | |
1378 L1: | |
1379 .double 2147483648 | |
1380 | |
1381 ENDFUNC(GLOBAL(udivsi3_i4)) | |
1382 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__) | |
1383 #if ! __SH5__ || __SH5__ == 32 | |
1384 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33 | |
1385 .mode SHmedia | |
1386 .global GLOBAL(udivsi3_i4) | |
1387 HIDDEN_FUNC(GLOBAL(udivsi3_i4)) | |
1388 GLOBAL(udivsi3_i4): | |
1389 addz.l r4,r63,r20 | |
1390 addz.l r5,r63,r21 | |
1391 fmov.qd r20,dr0 | |
1392 fmov.qd r21,dr32 | |
1393 ptabs r18,tr0 | |
1394 float.qd dr0,dr0 | |
1395 float.qd dr32,dr32 | |
1396 fdiv.d dr0,dr32,dr0 | |
1397 ftrc.dq dr0,dr32 | |
1398 fmov.s fr33,fr32 | |
1399 blink tr0,r63 | |
1400 | |
1401 ENDFUNC(GLOBAL(udivsi3_i4)) | |
1402 #endif /* ! __SH5__ || __SH5__ == 32 */ | |
1403 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) | |
1404 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4 | |
1405 | |
1406 .global GLOBAL(udivsi3_i4) | |
1407 HIDDEN_FUNC(GLOBAL(udivsi3_i4)) | |
1408 GLOBAL(udivsi3_i4): | |
1409 mov #1,r1 | |
1410 cmp/hi r1,r5 | |
1411 bf trivial | |
1412 sts.l fpscr,@-r15 | |
1413 mova L1,r0 | |
1414 lds.l @r0+,fpscr | |
1415 rotr r1 | |
1416 xor r1,r4 | |
1417 lds r4,fpul | |
1418 #ifdef FMOVD_WORKS | |
1419 fmov.d @r0+,dr4 | |
1420 #else | |
1421 fmov.s @r0+,DR40 | |
1422 fmov.s @r0,DR41 | |
1423 #endif | |
1424 float fpul,dr0 | |
1425 xor r1,r5 | |
1426 lds r5,fpul | |
1427 float fpul,dr2 | |
1428 fadd dr4,dr0 | |
1429 fadd dr4,dr2 | |
1430 fdiv dr2,dr0 | |
1431 ftrc dr0,fpul | |
1432 rts | |
1433 lds.l @r15+,fpscr | |
1434 | |
1435 #ifdef FMOVD_WORKS | |
1436 .align 3 ! make double below 8 byte aligned. | |
1437 #endif | |
1438 trivial: | |
1439 rts | |
1440 lds r4,fpul | |
1441 | |
1442 .align 2 | |
1443 L1: | |
1444 #ifndef FMOVD_WORKS | |
1445 .long 0x80000 | |
1446 #else | |
1447 .long 0x180000 | |
1448 #endif | |
1449 .double 2147483648 | |
1450 | |
1451 ENDFUNC(GLOBAL(udivsi3_i4)) | |
1452 #endif /* ! __SH4__ */ | |
1453 #endif | |
1454 | |
1455 #ifdef L_udivsi3 | |
1456 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with | |
1457 sh2e/sh3e code. */ | |
1458 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__) | |
1459 | |
1460 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit | |
1461 .global GLOBAL(udivsi3) | |
1462 HIDDEN_FUNC(GLOBAL(udivsi3)) | |
1463 | |
1464 #if __SHMEDIA__ | |
1465 #if __SH5__ == 32 | |
1466 .section .text..SHmedia32,"ax" | |
1467 #else | |
1468 .text | |
1469 #endif | |
1470 .align 2 | |
1471 #if 0 | |
1472 /* The assembly code that follows is a hand-optimized version of the C | |
1473 code that follows. Note that the registers that are modified are | |
1474 exactly those listed as clobbered in the patterns udivsi3_i1 and | |
1475 udivsi3_i1_media. | |
1476 | |
1477 unsigned | |
1478 __udivsi3 (i, j) | |
1479 unsigned i, j; | |
1480 { | |
1481 register unsigned long long r0 asm ("r0") = 0; | |
1482 register unsigned long long r18 asm ("r18") = 1; | |
1483 register unsigned long long r4 asm ("r4") = i; | |
1484 register unsigned long long r19 asm ("r19") = j; | |
1485 | |
1486 r19 <<= 31; | |
1487 r18 <<= 31; | |
1488 do | |
1489 if (r4 >= r19) | |
1490 r0 |= r18, r4 -= r19; | |
1491 while (r19 >>= 1, r18 >>= 1); | |
1492 | |
1493 return r0; | |
1494 } | |
1495 */ | |
1496 GLOBAL(udivsi3): | |
1497 pt/l LOCAL(udivsi3_dontadd), tr2 | |
1498 pt/l LOCAL(udivsi3_loop), tr1 | |
1499 ptabs/l r18, tr0 | |
1500 movi 0, r0 | |
1501 movi 1, r18 | |
1502 addz.l r5, r63, r19 | |
1503 addz.l r4, r63, r4 | |
1504 shlli r19, 31, r19 | |
1505 shlli r18, 31, r18 | |
1506 LOCAL(udivsi3_loop): | |
1507 bgtu r19, r4, tr2 | |
1508 or r0, r18, r0 | |
1509 sub r4, r19, r4 | |
1510 LOCAL(udivsi3_dontadd): | |
1511 shlri r18, 1, r18 | |
1512 shlri r19, 1, r19 | |
1513 bnei r18, 0, tr1 | |
1514 blink tr0, r63 | |
1515 #else | |
1516 GLOBAL(udivsi3): | |
1517 // inputs: r4,r5 | |
1518 // clobbered: r18,r19,r20,r21,r22,r25,tr0 | |
1519 // result in r0. | |
1520 addz.l r5,r63,r22 | |
1521 nsb r22,r0 | |
1522 shlld r22,r0,r25 | |
1523 shlri r25,48,r25 | |
1524 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76 | |
1525 sub r20,r25,r21 | |
1526 mmulfx.w r21,r21,r19 | |
1527 mshflo.w r21,r63,r21 | |
1528 ptabs r18,tr0 | |
1529 mmulfx.w r25,r19,r19 | |
1530 sub r20,r0,r0 | |
1531 /* bubble */ | |
1532 msub.w r21,r19,r19 | |
1533 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21 | |
1534 before the msub.w, but we need a different value for | |
1535 r19 to keep errors under control. */ | |
1536 mulu.l r4,r21,r18 | |
1537 mmulfx.w r19,r19,r19 | |
1538 shlli r21,15,r21 | |
1539 shlrd r18,r0,r18 | |
1540 mulu.l r18,r22,r20 | |
1541 mmacnfx.wl r25,r19,r21 | |
1542 /* bubble */ | |
1543 sub r4,r20,r25 | |
1544 | |
1545 mulu.l r25,r21,r19 | |
1546 addi r0,14,r0 | |
1547 /* bubble */ | |
1548 shlrd r19,r0,r19 | |
1549 mulu.l r19,r22,r20 | |
1550 add r18,r19,r18 | |
1551 /* bubble */ | |
1552 sub.l r25,r20,r25 | |
1553 | |
1554 mulu.l r25,r21,r19 | |
1555 addz.l r25,r63,r25 | |
1556 sub r25,r22,r25 | |
1557 shlrd r19,r0,r19 | |
1558 mulu.l r19,r22,r20 | |
1559 addi r25,1,r25 | |
1560 add r18,r19,r18 | |
1561 | |
1562 cmpgt r25,r20,r25 | |
1563 add.l r18,r25,r0 | |
1564 blink tr0,r63 | |
1565 #endif | |
1566 #elif defined (__SHMEDIA__) | |
1567 /* m5compact-nofpu - more emphasis on code size than on speed, but don't | |
1568 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4. | |
1569 So use a short shmedia loop. */ | |
1570 // clobbered: r20,r21,r25,tr0,tr1,tr2 | |
1571 .mode SHmedia | |
1572 .section .text..SHmedia32,"ax" | |
1573 .align 2 | |
1574 GLOBAL(udivsi3): | |
1575 pt/l LOCAL(udivsi3_dontsub), tr0 | |
1576 pt/l LOCAL(udivsi3_loop), tr1 | |
1577 ptabs/l r18,tr2 | |
1578 shlli r5,32,r25 | |
1579 addi r25,-1,r21 | |
1580 addz.l r4,r63,r20 | |
1581 LOCAL(udivsi3_loop): | |
1582 shlli r20,1,r20 | |
1583 bgeu/u r21,r20,tr0 | |
1584 sub r20,r21,r20 | |
1585 LOCAL(udivsi3_dontsub): | |
1586 addi.l r25,-1,r25 | |
1587 bnei r25,-32,tr1 | |
1588 add.l r20,r63,r0 | |
1589 blink tr2,r63 | |
1590 #else /* ! defined (__SHMEDIA__) */ | |
1591 LOCAL(div8): | |
1592 div1 r5,r4 | |
1593 LOCAL(div7): | |
1594 div1 r5,r4; div1 r5,r4; div1 r5,r4 | |
1595 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4 | |
1596 | |
1597 LOCAL(divx4): | |
1598 div1 r5,r4; rotcl r0 | |
1599 div1 r5,r4; rotcl r0 | |
1600 div1 r5,r4; rotcl r0 | |
1601 rts; div1 r5,r4 | |
1602 | |
1603 GLOBAL(udivsi3): | |
1604 sts.l pr,@-r15 | |
1605 extu.w r5,r0 | |
1606 cmp/eq r5,r0 | |
1607 #ifdef __sh1__ | |
1608 bf LOCAL(large_divisor) | |
1609 #else | |
1610 bf/s LOCAL(large_divisor) | |
1611 #endif | |
1612 div0u | |
1613 swap.w r4,r0 | |
1614 shlr16 r4 | |
1615 bsr LOCAL(div8) | |
1616 shll16 r5 | |
1617 bsr LOCAL(div7) | |
1618 div1 r5,r4 | |
1619 xtrct r4,r0 | |
1620 xtrct r0,r4 | |
1621 bsr LOCAL(div8) | |
1622 swap.w r4,r4 | |
1623 bsr LOCAL(div7) | |
1624 div1 r5,r4 | |
1625 lds.l @r15+,pr | |
1626 xtrct r4,r0 | |
1627 swap.w r0,r0 | |
1628 rotcl r0 | |
1629 rts | |
1630 shlr16 r5 | |
1631 | |
1632 LOCAL(large_divisor): | |
1633 #ifdef __sh1__ | |
1634 div0u | |
1635 #endif | |
1636 mov #0,r0 | |
1637 xtrct r4,r0 | |
1638 xtrct r0,r4 | |
1639 bsr LOCAL(divx4) | |
1640 rotcl r0 | |
1641 bsr LOCAL(divx4) | |
1642 rotcl r0 | |
1643 bsr LOCAL(divx4) | |
1644 rotcl r0 | |
1645 bsr LOCAL(divx4) | |
1646 rotcl r0 | |
1647 lds.l @r15+,pr | |
1648 rts | |
1649 rotcl r0 | |
1650 | |
1651 ENDFUNC(GLOBAL(udivsi3)) | |
1652 #endif /* ! __SHMEDIA__ */ | |
1653 #endif /* __SH4__ */ | |
1654 #endif /* L_udivsi3 */ | |
1655 | |
1656 #ifdef L_udivdi3 | |
1657 #ifdef __SHMEDIA__ | |
1658 .mode SHmedia | |
1659 .section .text..SHmedia32,"ax" | |
1660 .align 2 | |
1661 .global GLOBAL(udivdi3) | |
1662 FUNC(GLOBAL(udivdi3)) | |
1663 GLOBAL(udivdi3): | |
1664 HIDDEN_ALIAS(udivdi3_internal,udivdi3) | |
1665 shlri r3,1,r4 | |
1666 nsb r4,r22 | |
1667 shlld r3,r22,r6 | |
1668 shlri r6,49,r5 | |
1669 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ | |
1670 sub r21,r5,r1 | |
1671 mmulfx.w r1,r1,r4 | |
1672 mshflo.w r1,r63,r1 | |
1673 sub r63,r22,r20 // r63 == 64 % 64 | |
1674 mmulfx.w r5,r4,r4 | |
1675 pta LOCAL(large_divisor),tr0 | |
1676 addi r20,32,r9 | |
1677 msub.w r1,r4,r1 | |
1678 madd.w r1,r1,r1 | |
1679 mmulfx.w r1,r1,r4 | |
1680 shlri r6,32,r7 | |
1681 bgt/u r9,r63,tr0 // large_divisor | |
1682 mmulfx.w r5,r4,r4 | |
1683 shlri r2,32+14,r19 | |
1684 addi r22,-31,r0 | |
1685 msub.w r1,r4,r1 | |
1686 | |
1687 mulu.l r1,r7,r4 | |
1688 addi r1,-3,r5 | |
1689 mulu.l r5,r19,r5 | |
1690 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 | |
1691 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as | |
1692 the case may be, %0000000000000000 000.11111111111, still */ | |
1693 muls.l r1,r4,r4 /* leaving at least one sign bit. */ | |
1694 mulu.l r5,r3,r8 | |
1695 mshalds.l r1,r21,r1 | |
1696 shari r4,26,r4 | |
1697 shlld r8,r0,r8 | |
1698 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) | |
1699 sub r2,r8,r2 | |
1700 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ | |
1701 | |
1702 shlri r2,22,r21 | |
1703 mulu.l r21,r1,r21 | |
1704 shlld r5,r0,r8 | |
1705 addi r20,30-22,r0 | |
1706 shlrd r21,r0,r21 | |
1707 mulu.l r21,r3,r5 | |
1708 add r8,r21,r8 | |
1709 mcmpgt.l r21,r63,r21 // See Note 1 | |
1710 addi r20,30,r0 | |
1711 mshfhi.l r63,r21,r21 | |
1712 sub r2,r5,r2 | |
1713 andc r2,r21,r2 | |
1714 | |
1715 /* small divisor: need a third divide step */ | |
1716 mulu.l r2,r1,r7 | |
1717 ptabs r18,tr0 | |
1718 addi r2,1,r2 | |
1719 shlrd r7,r0,r7 | |
1720 mulu.l r7,r3,r5 | |
1721 add r8,r7,r8 | |
1722 sub r2,r3,r2 | |
1723 cmpgt r2,r5,r5 | |
1724 add r8,r5,r2 | |
1725 /* could test r3 here to check for divide by zero. */ | |
1726 blink tr0,r63 | |
1727 | |
1728 LOCAL(large_divisor): | |
1729 mmulfx.w r5,r4,r4 | |
1730 shlrd r2,r9,r25 | |
1731 shlri r25,32,r8 | |
1732 msub.w r1,r4,r1 | |
1733 | |
1734 mulu.l r1,r7,r4 | |
1735 addi r1,-3,r5 | |
1736 mulu.l r5,r8,r5 | |
1737 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 | |
1738 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as | |
1739 the case may be, %0000000000000000 000.11111111111, still */ | |
1740 muls.l r1,r4,r4 /* leaving at least one sign bit. */ | |
1741 shlri r5,14-1,r8 | |
1742 mulu.l r8,r7,r5 | |
1743 mshalds.l r1,r21,r1 | |
1744 shari r4,26,r4 | |
1745 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) | |
1746 sub r25,r5,r25 | |
1747 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ | |
1748 | |
1749 shlri r25,22,r21 | |
1750 mulu.l r21,r1,r21 | |
1751 pta LOCAL(no_lo_adj),tr0 | |
1752 addi r22,32,r0 | |
1753 shlri r21,40,r21 | |
1754 mulu.l r21,r7,r5 | |
1755 add r8,r21,r8 | |
1756 shlld r2,r0,r2 | |
1757 sub r25,r5,r25 | |
1758 bgtu/u r7,r25,tr0 // no_lo_adj | |
1759 addi r8,1,r8 | |
1760 sub r25,r7,r25 | |
1761 LOCAL(no_lo_adj): | |
1762 mextr4 r2,r25,r2 | |
1763 | |
1764 /* large_divisor: only needs a few adjustments. */ | |
1765 mulu.l r8,r6,r5 | |
1766 ptabs r18,tr0 | |
1767 /* bubble */ | |
1768 cmpgtu r5,r2,r5 | |
1769 sub r8,r5,r2 | |
1770 blink tr0,r63 | |
1771 ENDFUNC(GLOBAL(udivdi3)) | |
1772 /* Note 1: To shift the result of the second divide stage so that the result | |
1773 always fits into 32 bits, yet we still reduce the rest sufficiently | |
1774 would require a lot of instructions to do the shifts just right. Using | |
1775 the full 64 bit shift result to multiply with the divisor would require | |
1776 four extra instructions for the upper 32 bits (shift / mulu / shift / sub). | |
1777 Fortunately, if the upper 32 bits of the shift result are nonzero, we | |
1778 know that the rest after taking this partial result into account will | |
1779 fit into 32 bits. So we just clear the upper 32 bits of the rest if the | |
1780 upper 32 bits of the partial result are nonzero. */ | |
1781 #endif /* __SHMEDIA__ */ | |
1782 #endif /* L_udivdi3 */ | |
1783 | |
1784 #ifdef L_divdi3 | |
1785 #ifdef __SHMEDIA__ | |
1786 .mode SHmedia | |
1787 .section .text..SHmedia32,"ax" | |
1788 .align 2 | |
1789 .global GLOBAL(divdi3) | |
1790 FUNC(GLOBAL(divdi3)) | |
1791 GLOBAL(divdi3): | |
1792 pta GLOBAL(udivdi3_internal),tr0 | |
1793 shari r2,63,r22 | |
1794 shari r3,63,r23 | |
1795 xor r2,r22,r2 | |
1796 xor r3,r23,r3 | |
1797 sub r2,r22,r2 | |
1798 sub r3,r23,r3 | |
1799 beq/u r22,r23,tr0 | |
1800 ptabs r18,tr1 | |
1801 blink tr0,r18 | |
1802 sub r63,r2,r2 | |
1803 blink tr1,r63 | |
1804 ENDFUNC(GLOBAL(divdi3)) | |
1805 #endif /* __SHMEDIA__ */ | |
1806 #endif /* L_divdi3 */ | |
1807 | |
1808 #ifdef L_umoddi3 | |
1809 #ifdef __SHMEDIA__ | |
1810 .mode SHmedia | |
1811 .section .text..SHmedia32,"ax" | |
1812 .align 2 | |
1813 .global GLOBAL(umoddi3) | |
1814 FUNC(GLOBAL(umoddi3)) | |
1815 GLOBAL(umoddi3): | |
1816 HIDDEN_ALIAS(umoddi3_internal,umoddi3) | |
1817 shlri r3,1,r4 | |
1818 nsb r4,r22 | |
1819 shlld r3,r22,r6 | |
1820 shlri r6,49,r5 | |
1821 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */ | |
1822 sub r21,r5,r1 | |
1823 mmulfx.w r1,r1,r4 | |
1824 mshflo.w r1,r63,r1 | |
1825 sub r63,r22,r20 // r63 == 64 % 64 | |
1826 mmulfx.w r5,r4,r4 | |
1827 pta LOCAL(large_divisor),tr0 | |
1828 addi r20,32,r9 | |
1829 msub.w r1,r4,r1 | |
1830 madd.w r1,r1,r1 | |
1831 mmulfx.w r1,r1,r4 | |
1832 shlri r6,32,r7 | |
1833 bgt/u r9,r63,tr0 // large_divisor | |
1834 mmulfx.w r5,r4,r4 | |
1835 shlri r2,32+14,r19 | |
1836 addi r22,-31,r0 | |
1837 msub.w r1,r4,r1 | |
1838 | |
1839 mulu.l r1,r7,r4 | |
1840 addi r1,-3,r5 | |
1841 mulu.l r5,r19,r5 | |
1842 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 | |
1843 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as | |
1844 the case may be, %0000000000000000 000.11111111111, still */ | |
1845 muls.l r1,r4,r4 /* leaving at least one sign bit. */ | |
1846 mulu.l r5,r3,r5 | |
1847 mshalds.l r1,r21,r1 | |
1848 shari r4,26,r4 | |
1849 shlld r5,r0,r5 | |
1850 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) | |
1851 sub r2,r5,r2 | |
1852 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ | |
1853 | |
1854 shlri r2,22,r21 | |
1855 mulu.l r21,r1,r21 | |
1856 addi r20,30-22,r0 | |
1857 /* bubble */ /* could test r3 here to check for divide by zero. */ | |
1858 shlrd r21,r0,r21 | |
1859 mulu.l r21,r3,r5 | |
1860 mcmpgt.l r21,r63,r21 // See Note 1 | |
1861 addi r20,30,r0 | |
1862 mshfhi.l r63,r21,r21 | |
1863 sub r2,r5,r2 | |
1864 andc r2,r21,r2 | |
1865 | |
1866 /* small divisor: need a third divide step */ | |
1867 mulu.l r2,r1,r7 | |
1868 ptabs r18,tr0 | |
1869 sub r2,r3,r8 /* re-use r8 here for rest - r3 */ | |
1870 shlrd r7,r0,r7 | |
1871 mulu.l r7,r3,r5 | |
1872 /* bubble */ | |
1873 addi r8,1,r7 | |
1874 cmpgt r7,r5,r7 | |
1875 cmvne r7,r8,r2 | |
1876 sub r2,r5,r2 | |
1877 blink tr0,r63 | |
1878 | |
1879 LOCAL(large_divisor): | |
1880 mmulfx.w r5,r4,r4 | |
1881 shlrd r2,r9,r25 | |
1882 shlri r25,32,r8 | |
1883 msub.w r1,r4,r1 | |
1884 | |
1885 mulu.l r1,r7,r4 | |
1886 addi r1,-3,r5 | |
1887 mulu.l r5,r8,r5 | |
1888 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2 | |
1889 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as | |
1890 the case may be, %0000000000000000 000.11111111111, still */ | |
1891 muls.l r1,r4,r4 /* leaving at least one sign bit. */ | |
1892 shlri r5,14-1,r8 | |
1893 mulu.l r8,r7,r5 | |
1894 mshalds.l r1,r21,r1 | |
1895 shari r4,26,r4 | |
1896 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) | |
1897 sub r25,r5,r25 | |
1898 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ | |
1899 | |
1900 shlri r25,22,r21 | |
1901 mulu.l r21,r1,r21 | |
1902 pta LOCAL(no_lo_adj),tr0 | |
1903 addi r22,32,r0 | |
1904 shlri r21,40,r21 | |
1905 mulu.l r21,r7,r5 | |
1906 add r8,r21,r8 | |
1907 shlld r2,r0,r2 | |
1908 sub r25,r5,r25 | |
1909 bgtu/u r7,r25,tr0 // no_lo_adj | |
1910 addi r8,1,r8 | |
1911 sub r25,r7,r25 | |
1912 LOCAL(no_lo_adj): | |
1913 mextr4 r2,r25,r2 | |
1914 | |
1915 /* large_divisor: only needs a few adjustments. */ | |
1916 mulu.l r8,r6,r5 | |
1917 ptabs r18,tr0 | |
1918 add r2,r6,r7 | |
1919 cmpgtu r5,r2,r8 | |
1920 cmvne r8,r7,r2 | |
1921 sub r2,r5,r2 | |
1922 shlrd r2,r22,r2 | |
1923 blink tr0,r63 | |
1924 ENDFUNC(GLOBAL(umoddi3)) | |
1925 /* Note 1: To shift the result of the second divide stage so that the result | |
1926 always fits into 32 bits, yet we still reduce the rest sufficiently | |
1927 would require a lot of instructions to do the shifts just right. Using | |
1928 the full 64 bit shift result to multiply with the divisor would require | |
1929 four extra instructions for the upper 32 bits (shift / mulu / shift / sub). | |
1930 Fortunately, if the upper 32 bits of the shift result are nonzero, we | |
1931 know that the rest after taking this partial result into account will | |
1932 fit into 32 bits. So we just clear the upper 32 bits of the rest if the | |
1933 upper 32 bits of the partial result are nonzero. */ | |
1934 #endif /* __SHMEDIA__ */ | |
1935 #endif /* L_umoddi3 */ | |
1936 | |
1937 #ifdef L_moddi3 | |
1938 #ifdef __SHMEDIA__ | |
1939 .mode SHmedia | |
1940 .section .text..SHmedia32,"ax" | |
1941 .align 2 | |
1942 .global GLOBAL(moddi3) | |
1943 FUNC(GLOBAL(moddi3)) | |
1944 GLOBAL(moddi3): | |
1945 pta GLOBAL(umoddi3_internal),tr0 | |
1946 shari r2,63,r22 | |
1947 shari r3,63,r23 | |
1948 xor r2,r22,r2 | |
1949 xor r3,r23,r3 | |
1950 sub r2,r22,r2 | |
1951 sub r3,r23,r3 | |
1952 beq/u r22,r63,tr0 | |
1953 ptabs r18,tr1 | |
1954 blink tr0,r18 | |
1955 sub r63,r2,r2 | |
1956 blink tr1,r63 | |
1957 ENDFUNC(GLOBAL(moddi3)) | |
1958 #endif /* __SHMEDIA__ */ | |
1959 #endif /* L_moddi3 */ | |
1960 | |
1961 #ifdef L_set_fpscr | |
1962 #if !defined (__SH2A_NOFPU__) | |
1963 #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32 | |
1964 #ifdef __SH5__ | |
1965 .mode SHcompact | |
1966 #endif | |
1967 .global GLOBAL(set_fpscr) | |
1968 HIDDEN_FUNC(GLOBAL(set_fpscr)) | |
1969 GLOBAL(set_fpscr): | |
1970 lds r4,fpscr | |
1971 #ifdef __PIC__ | |
1972 mov.l r12,@-r15 | |
1973 #ifdef __vxworks | |
1974 mov.l LOCAL(set_fpscr_L0_base),r12 | |
1975 mov.l LOCAL(set_fpscr_L0_index),r0 | |
1976 mov.l @r12,r12 | |
1977 mov.l @(r0,r12),r12 | |
1978 #else | |
1979 mova LOCAL(set_fpscr_L0),r0 | |
1980 mov.l LOCAL(set_fpscr_L0),r12 | |
1981 add r0,r12 | |
1982 #endif | |
1983 mov.l LOCAL(set_fpscr_L1),r0 | |
1984 mov.l @(r0,r12),r1 | |
1985 mov.l @r15+,r12 | |
1986 #else | |
1987 mov.l LOCAL(set_fpscr_L1),r1 | |
1988 #endif | |
1989 swap.w r4,r0 | |
1990 or #24,r0 | |
1991 #ifndef FMOVD_WORKS | |
1992 xor #16,r0 | |
1993 #endif | |
1994 #if defined(__SH4__) || defined (__SH2A_DOUBLE__) | |
1995 swap.w r0,r3 | |
1996 mov.l r3,@(4,r1) | |
1997 #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ | |
1998 swap.w r0,r2 | |
1999 mov.l r2,@r1 | |
2000 #endif | |
2001 #ifndef FMOVD_WORKS | |
2002 xor #8,r0 | |
2003 #else | |
2004 xor #24,r0 | |
2005 #endif | |
2006 #if defined(__SH4__) || defined (__SH2A_DOUBLE__) | |
2007 swap.w r0,r2 | |
2008 rts | |
2009 mov.l r2,@r1 | |
2010 #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */ | |
2011 swap.w r0,r3 | |
2012 rts | |
2013 mov.l r3,@(4,r1) | |
2014 #endif | |
2015 .align 2 | |
2016 #ifdef __PIC__ | |
2017 #ifdef __vxworks | |
2018 LOCAL(set_fpscr_L0_base): | |
2019 .long ___GOTT_BASE__ | |
2020 LOCAL(set_fpscr_L0_index): | |
2021 .long ___GOTT_INDEX__ | |
2022 #else | |
2023 LOCAL(set_fpscr_L0): | |
2024 .long _GLOBAL_OFFSET_TABLE_ | |
2025 #endif | |
2026 LOCAL(set_fpscr_L1): | |
2027 .long GLOBAL(fpscr_values@GOT) | |
2028 #else | |
2029 LOCAL(set_fpscr_L1): | |
2030 .long GLOBAL(fpscr_values) | |
2031 #endif | |
2032 | |
2033 ENDFUNC(GLOBAL(set_fpscr)) | |
2034 #ifndef NO_FPSCR_VALUES | |
2035 #ifdef __ELF__ | |
2036 .comm GLOBAL(fpscr_values),8,4 | |
2037 #else | |
2038 .comm GLOBAL(fpscr_values),8 | |
2039 #endif /* ELF */ | |
2040 #endif /* NO_FPSCR_VALUES */ | |
2041 #endif /* SH2E / SH3E / SH4 */ | |
2042 #endif /* __SH2A_NOFPU__ */ | |
2043 #endif /* L_set_fpscr */ | |
2044 #ifdef L_ic_invalidate | |
2045 #if __SH5__ == 32 | |
2046 .mode SHmedia | |
2047 .section .text..SHmedia32,"ax" | |
2048 .align 2 | |
2049 .global GLOBAL(init_trampoline) | |
2050 HIDDEN_FUNC(GLOBAL(init_trampoline)) | |
2051 GLOBAL(init_trampoline): | |
2052 st.l r0,8,r2 | |
2053 #ifdef __LITTLE_ENDIAN__ | |
2054 movi 9,r20 | |
2055 shori 0x402b,r20 | |
2056 shori 0xd101,r20 | |
2057 shori 0xd002,r20 | |
2058 #else | |
2059 movi 0xffffffffffffd002,r20 | |
2060 shori 0xd101,r20 | |
2061 shori 0x402b,r20 | |
2062 shori 9,r20 | |
2063 #endif | |
2064 st.q r0,0,r20 | |
2065 st.l r0,12,r3 | |
2066 ENDFUNC(GLOBAL(init_trampoline)) | |
2067 .global GLOBAL(ic_invalidate) | |
2068 HIDDEN_FUNC(GLOBAL(ic_invalidate)) | |
2069 GLOBAL(ic_invalidate): | |
2070 ocbwb r0,0 | |
2071 synco | |
2072 icbi r0, 0 | |
2073 ptabs r18, tr0 | |
2074 synci | |
2075 blink tr0, r63 | |
2076 ENDFUNC(GLOBAL(ic_invalidate)) | |
2077 #elif defined(__SH4A__) | |
2078 .global GLOBAL(ic_invalidate) | |
2079 HIDDEN_FUNC(GLOBAL(ic_invalidate)) | |
2080 GLOBAL(ic_invalidate): | |
2081 ocbwb @r4 | |
2082 synco | |
2083 rts | |
2084 icbi @r4 | |
2085 ENDFUNC(GLOBAL(ic_invalidate)) | |
2086 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) | |
2087 /* For system code, we use ic_invalidate_line_i, but user code | |
2088 needs a different mechanism. A kernel call is generally not | |
2089 available, and it would also be slow. Different SH4 variants use | |
2090 different sizes and associativities of the Icache. We use a small | |
2091 bit of dispatch code that can be put hidden in every shared object, | |
2092 which calls the actual processor-specific invalidation code in a | |
2093 separate module. | |
2094 Or if you have operating system support, the OS could mmap the | |
2095 procesor-specific code from a single page, since it is highly | |
2096 repetitive. */ | |
2097 .global GLOBAL(ic_invalidate) | |
2098 HIDDEN_FUNC(GLOBAL(ic_invalidate)) | |
2099 GLOBAL(ic_invalidate): | |
2100 #ifdef __pic__ | |
2101 #ifdef __vxworks | |
2102 mov.l 1f,r1 | |
2103 mov.l 2f,r0 | |
2104 mov.l @r1,r1 | |
2105 mov.l 0f,r2 | |
2106 mov.l @(r0,r1),r0 | |
2107 #else | |
2108 mov.l 1f,r1 | |
2109 mova 1f,r0 | |
2110 mov.l 0f,r2 | |
2111 add r1,r0 | |
2112 #endif | |
2113 mov.l @(r0,r2),r1 | |
2114 #else | |
2115 mov.l 0f,r1 | |
2116 #endif | |
2117 ocbwb @r4 | |
2118 mov.l @(8,r1),r0 | |
2119 sub r1,r4 | |
2120 and r4,r0 | |
2121 add r1,r0 | |
2122 jmp @r0 | |
2123 mov.l @(4,r1),r0 | |
2124 .align 2 | |
2125 #ifndef __pic__ | |
2126 0: .long GLOBAL(ic_invalidate_array) | |
2127 #else /* __pic__ */ | |
2128 .global GLOBAL(ic_invalidate_array) | |
2129 0: .long GLOBAL(ic_invalidate_array)@GOT | |
2130 #ifdef __vxworks | |
2131 1: .long ___GOTT_BASE__ | |
2132 2: .long ___GOTT_INDEX__ | |
2133 #else | |
2134 1: .long _GLOBAL_OFFSET_TABLE_ | |
2135 #endif | |
2136 ENDFUNC(GLOBAL(ic_invalidate)) | |
2137 #endif /* __pic__ */ | |
2138 #endif /* SH4 */ | |
2139 #endif /* L_ic_invalidate */ | |
2140 | |
2141 #ifdef L_ic_invalidate_array | |
2142 #if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)))) | |
2143 .global GLOBAL(ic_invalidate_array) | |
2144 /* This is needed when an SH4 dso with trampolines is used on SH4A. */ | |
2145 .global GLOBAL(ic_invalidate_array) | |
2146 FUNC(GLOBAL(ic_invalidate_array)) | |
2147 GLOBAL(ic_invalidate_array): | |
2148 add r1,r4 | |
2149 synco | |
2150 rts | |
2151 icbi @r4 | |
2152 .long 0 | |
2153 ENDFUNC(GLOBAL(ic_invalidate_array)) | |
2154 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__)) | |
2155 .global GLOBAL(ic_invalidate_array) | |
2156 .p2align 5 | |
2157 FUNC(GLOBAL(ic_invalidate_array)) | |
2158 /* This must be aligned to the beginning of a cache line. */ | |
2159 GLOBAL(ic_invalidate_array): | |
2160 #ifndef WAYS | |
2161 #define WAYS 4 | |
2162 #define WAY_SIZE 0x4000 | |
2163 #endif | |
2164 #if WAYS == 1 | |
2165 .rept WAY_SIZE * WAYS / 32 | |
2166 rts | |
2167 nop | |
2168 .rept 7 | |
2169 .long WAY_SIZE - 32 | |
2170 .endr | |
2171 .endr | |
2172 #elif WAYS <= 6 | |
2173 .rept WAY_SIZE * WAYS / 32 | |
2174 braf r0 | |
2175 add #-8,r0 | |
2176 .long WAY_SIZE + 8 | |
2177 .long WAY_SIZE - 32 | |
2178 .rept WAYS-2 | |
2179 braf r0 | |
2180 nop | |
2181 .endr | |
2182 .rept 7 - WAYS | |
2183 rts | |
2184 nop | |
2185 .endr | |
2186 .endr | |
2187 #else /* WAYS > 6 */ | |
2188 /* This variant needs two different pages for mmap-ing. */ | |
2189 .rept WAYS-1 | |
2190 .rept WAY_SIZE / 32 | |
2191 braf r0 | |
2192 nop | |
2193 .long WAY_SIZE | |
2194 .rept 6 | |
2195 .long WAY_SIZE - 32 | |
2196 .endr | |
2197 .endr | |
2198 .endr | |
2199 .rept WAY_SIZE / 32 | |
2200 rts | |
2201 .rept 15 | |
2202 nop | |
2203 .endr | |
2204 .endr | |
2205 #endif /* WAYS */ | |
2206 ENDFUNC(GLOBAL(ic_invalidate_array)) | |
2207 #endif /* SH4 */ | |
2208 #endif /* L_ic_invalidate_array */ | |
2209 | |
2210 #if defined (__SH5__) && __SH5__ == 32 | |
2211 #ifdef L_shcompact_call_trampoline | |
2212 .section .rodata | |
2213 .align 1 | |
2214 LOCAL(ct_main_table): | |
2215 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label) | |
2216 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label) | |
2217 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label) | |
2218 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label) | |
2219 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label) | |
2220 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label) | |
2221 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label) | |
2222 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label) | |
2223 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label) | |
2224 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label) | |
2225 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label) | |
2226 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label) | |
2227 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label) | |
2228 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label) | |
2229 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label) | |
2230 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label) | |
2231 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label) | |
2232 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label) | |
2233 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label) | |
2234 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label) | |
2235 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label) | |
2236 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label) | |
2237 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label) | |
2238 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label) | |
2239 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label) | |
2240 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label) | |
2241 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label) | |
2242 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) | |
2243 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) | |
2244 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label) | |
2245 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label) | |
2246 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label) | |
2247 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label) | |
2248 .mode SHmedia | |
2249 .section .text..SHmedia32, "ax" | |
2250 .align 2 | |
2251 | |
2252 /* This function loads 64-bit general-purpose registers from the | |
2253 stack, from a memory address contained in them or from an FP | |
2254 register, according to a cookie passed in r1. Its execution | |
2255 time is linear on the number of registers that actually have | |
2256 to be copied. See sh.h for details on the actual bit pattern. | |
2257 | |
2258 The function to be called is passed in r0. If a 32-bit return | |
2259 value is expected, the actual function will be tail-called, | |
2260 otherwise the return address will be stored in r10 (that the | |
2261 caller should expect to be clobbered) and the return value | |
2262 will be expanded into r2/r3 upon return. */ | |
2263 | |
2264 .global GLOBAL(GCC_shcompact_call_trampoline) | |
2265 FUNC(GLOBAL(GCC_shcompact_call_trampoline)) | |
2266 GLOBAL(GCC_shcompact_call_trampoline): | |
2267 ptabs/l r0, tr0 /* Prepare to call the actual function. */ | |
2268 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0 | |
2269 pt/l LOCAL(ct_loop), tr1 | |
2270 addz.l r1, r63, r1 | |
2271 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0 | |
2272 LOCAL(ct_loop): | |
2273 nsb r1, r28 | |
2274 shlli r28, 1, r29 | |
2275 ldx.w r0, r29, r30 | |
2276 LOCAL(ct_main_label): | |
2277 ptrel/l r30, tr2 | |
2278 blink tr2, r63 | |
2279 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */ | |
2280 /* It must be dr0, so just do it. */ | |
2281 fmov.dq dr0, r2 | |
2282 movi 7, r30 | |
2283 shlli r30, 29, r31 | |
2284 andc r1, r31, r1 | |
2285 blink tr1, r63 | |
2286 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */ | |
2287 /* It is either dr0 or dr2. */ | |
2288 movi 7, r30 | |
2289 shlri r1, 26, r32 | |
2290 shlli r30, 26, r31 | |
2291 andc r1, r31, r1 | |
2292 fmov.dq dr0, r3 | |
2293 beqi/l r32, 4, tr1 | |
2294 fmov.dq dr2, r3 | |
2295 blink tr1, r63 | |
2296 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */ | |
2297 shlri r1, 23 - 3, r34 | |
2298 andi r34, 3 << 3, r33 | |
2299 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32 | |
2300 LOCAL(ct_r4_fp_base): | |
2301 ptrel/l r32, tr2 | |
2302 movi 7, r30 | |
2303 shlli r30, 23, r31 | |
2304 andc r1, r31, r1 | |
2305 blink tr2, r63 | |
2306 LOCAL(ct_r4_fp_copy): | |
2307 fmov.dq dr0, r4 | |
2308 blink tr1, r63 | |
2309 fmov.dq dr2, r4 | |
2310 blink tr1, r63 | |
2311 fmov.dq dr4, r4 | |
2312 blink tr1, r63 | |
2313 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */ | |
2314 shlri r1, 20 - 3, r34 | |
2315 andi r34, 3 << 3, r33 | |
2316 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32 | |
2317 LOCAL(ct_r5_fp_base): | |
2318 ptrel/l r32, tr2 | |
2319 movi 7, r30 | |
2320 shlli r30, 20, r31 | |
2321 andc r1, r31, r1 | |
2322 blink tr2, r63 | |
2323 LOCAL(ct_r5_fp_copy): | |
2324 fmov.dq dr0, r5 | |
2325 blink tr1, r63 | |
2326 fmov.dq dr2, r5 | |
2327 blink tr1, r63 | |
2328 fmov.dq dr4, r5 | |
2329 blink tr1, r63 | |
2330 fmov.dq dr6, r5 | |
2331 blink tr1, r63 | |
2332 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */ | |
2333 /* It must be dr8. */ | |
2334 fmov.dq dr8, r6 | |
2335 movi 15, r30 | |
2336 shlli r30, 16, r31 | |
2337 andc r1, r31, r1 | |
2338 blink tr1, r63 | |
2339 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */ | |
2340 shlri r1, 16 - 3, r34 | |
2341 andi r34, 3 << 3, r33 | |
2342 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32 | |
2343 LOCAL(ct_r6_fp_base): | |
2344 ptrel/l r32, tr2 | |
2345 movi 7, r30 | |
2346 shlli r30, 16, r31 | |
2347 andc r1, r31, r1 | |
2348 blink tr2, r63 | |
2349 LOCAL(ct_r6_fp_copy): | |
2350 fmov.dq dr0, r6 | |
2351 blink tr1, r63 | |
2352 fmov.dq dr2, r6 | |
2353 blink tr1, r63 | |
2354 fmov.dq dr4, r6 | |
2355 blink tr1, r63 | |
2356 fmov.dq dr6, r6 | |
2357 blink tr1, r63 | |
2358 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */ | |
2359 /* It is either dr8 or dr10. */ | |
2360 movi 15 << 12, r31 | |
2361 shlri r1, 12, r32 | |
2362 andc r1, r31, r1 | |
2363 fmov.dq dr8, r7 | |
2364 beqi/l r32, 8, tr1 | |
2365 fmov.dq dr10, r7 | |
2366 blink tr1, r63 | |
2367 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */ | |
2368 shlri r1, 12 - 3, r34 | |
2369 andi r34, 3 << 3, r33 | |
2370 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32 | |
2371 LOCAL(ct_r7_fp_base): | |
2372 ptrel/l r32, tr2 | |
2373 movi 7 << 12, r31 | |
2374 andc r1, r31, r1 | |
2375 blink tr2, r63 | |
2376 LOCAL(ct_r7_fp_copy): | |
2377 fmov.dq dr0, r7 | |
2378 blink tr1, r63 | |
2379 fmov.dq dr2, r7 | |
2380 blink tr1, r63 | |
2381 fmov.dq dr4, r7 | |
2382 blink tr1, r63 | |
2383 fmov.dq dr6, r7 | |
2384 blink tr1, r63 | |
2385 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */ | |
2386 /* It is either dr8 or dr10. */ | |
2387 movi 15 << 8, r31 | |
2388 andi r1, 1 << 8, r32 | |
2389 andc r1, r31, r1 | |
2390 fmov.dq dr8, r8 | |
2391 beq/l r32, r63, tr1 | |
2392 fmov.dq dr10, r8 | |
2393 blink tr1, r63 | |
2394 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */ | |
2395 shlri r1, 8 - 3, r34 | |
2396 andi r34, 3 << 3, r33 | |
2397 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32 | |
2398 LOCAL(ct_r8_fp_base): | |
2399 ptrel/l r32, tr2 | |
2400 movi 7 << 8, r31 | |
2401 andc r1, r31, r1 | |
2402 blink tr2, r63 | |
2403 LOCAL(ct_r8_fp_copy): | |
2404 fmov.dq dr0, r8 | |
2405 blink tr1, r63 | |
2406 fmov.dq dr2, r8 | |
2407 blink tr1, r63 | |
2408 fmov.dq dr4, r8 | |
2409 blink tr1, r63 | |
2410 fmov.dq dr6, r8 | |
2411 blink tr1, r63 | |
2412 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */ | |
2413 /* It is either dr8 or dr10. */ | |
2414 movi 15 << 4, r31 | |
2415 andi r1, 1 << 4, r32 | |
2416 andc r1, r31, r1 | |
2417 fmov.dq dr8, r9 | |
2418 beq/l r32, r63, tr1 | |
2419 fmov.dq dr10, r9 | |
2420 blink tr1, r63 | |
2421 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */ | |
2422 shlri r1, 4 - 3, r34 | |
2423 andi r34, 3 << 3, r33 | |
2424 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32 | |
2425 LOCAL(ct_r9_fp_base): | |
2426 ptrel/l r32, tr2 | |
2427 movi 7 << 4, r31 | |
2428 andc r1, r31, r1 | |
2429 blink tr2, r63 | |
2430 LOCAL(ct_r9_fp_copy): | |
2431 fmov.dq dr0, r9 | |
2432 blink tr1, r63 | |
2433 fmov.dq dr2, r9 | |
2434 blink tr1, r63 | |
2435 fmov.dq dr4, r9 | |
2436 blink tr1, r63 | |
2437 fmov.dq dr6, r9 | |
2438 blink tr1, r63 | |
2439 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */ | |
2440 pt/l LOCAL(ct_r2_load), tr2 | |
2441 movi 3, r30 | |
2442 shlli r30, 29, r31 | |
2443 and r1, r31, r32 | |
2444 andc r1, r31, r1 | |
2445 beq/l r31, r32, tr2 | |
2446 addi.l r2, 8, r3 | |
2447 ldx.q r2, r63, r2 | |
2448 /* Fall through. */ | |
2449 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */ | |
2450 pt/l LOCAL(ct_r3_load), tr2 | |
2451 movi 3, r30 | |
2452 shlli r30, 26, r31 | |
2453 and r1, r31, r32 | |
2454 andc r1, r31, r1 | |
2455 beq/l r31, r32, tr2 | |
2456 addi.l r3, 8, r4 | |
2457 ldx.q r3, r63, r3 | |
2458 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */ | |
2459 pt/l LOCAL(ct_r4_load), tr2 | |
2460 movi 3, r30 | |
2461 shlli r30, 23, r31 | |
2462 and r1, r31, r32 | |
2463 andc r1, r31, r1 | |
2464 beq/l r31, r32, tr2 | |
2465 addi.l r4, 8, r5 | |
2466 ldx.q r4, r63, r4 | |
2467 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */ | |
2468 pt/l LOCAL(ct_r5_load), tr2 | |
2469 movi 3, r30 | |
2470 shlli r30, 20, r31 | |
2471 and r1, r31, r32 | |
2472 andc r1, r31, r1 | |
2473 beq/l r31, r32, tr2 | |
2474 addi.l r5, 8, r6 | |
2475 ldx.q r5, r63, r5 | |
2476 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */ | |
2477 pt/l LOCAL(ct_r6_load), tr2 | |
2478 movi 3 << 16, r31 | |
2479 and r1, r31, r32 | |
2480 andc r1, r31, r1 | |
2481 beq/l r31, r32, tr2 | |
2482 addi.l r6, 8, r7 | |
2483 ldx.q r6, r63, r6 | |
2484 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */ | |
2485 pt/l LOCAL(ct_r7_load), tr2 | |
2486 movi 3 << 12, r31 | |
2487 and r1, r31, r32 | |
2488 andc r1, r31, r1 | |
2489 beq/l r31, r32, tr2 | |
2490 addi.l r7, 8, r8 | |
2491 ldx.q r7, r63, r7 | |
2492 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */ | |
2493 pt/l LOCAL(ct_r8_load), tr2 | |
2494 movi 3 << 8, r31 | |
2495 and r1, r31, r32 | |
2496 andc r1, r31, r1 | |
2497 beq/l r31, r32, tr2 | |
2498 addi.l r8, 8, r9 | |
2499 ldx.q r8, r63, r8 | |
2500 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */ | |
2501 pt/l LOCAL(ct_check_tramp), tr2 | |
2502 ldx.q r9, r63, r9 | |
2503 blink tr2, r63 | |
2504 LOCAL(ct_r2_load): | |
2505 ldx.q r2, r63, r2 | |
2506 blink tr1, r63 | |
2507 LOCAL(ct_r3_load): | |
2508 ldx.q r3, r63, r3 | |
2509 blink tr1, r63 | |
2510 LOCAL(ct_r4_load): | |
2511 ldx.q r4, r63, r4 | |
2512 blink tr1, r63 | |
2513 LOCAL(ct_r5_load): | |
2514 ldx.q r5, r63, r5 | |
2515 blink tr1, r63 | |
2516 LOCAL(ct_r6_load): | |
2517 ldx.q r6, r63, r6 | |
2518 blink tr1, r63 | |
2519 LOCAL(ct_r7_load): | |
2520 ldx.q r7, r63, r7 | |
2521 blink tr1, r63 | |
2522 LOCAL(ct_r8_load): | |
2523 ldx.q r8, r63, r8 | |
2524 blink tr1, r63 | |
2525 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */ | |
2526 movi 1, r30 | |
2527 ldx.q r15, r63, r2 | |
2528 shlli r30, 29, r31 | |
2529 addi.l r15, 8, r15 | |
2530 andc r1, r31, r1 | |
2531 blink tr1, r63 | |
2532 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */ | |
2533 movi 1, r30 | |
2534 ldx.q r15, r63, r3 | |
2535 shlli r30, 26, r31 | |
2536 addi.l r15, 8, r15 | |
2537 andc r1, r31, r1 | |
2538 blink tr1, r63 | |
2539 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */ | |
2540 movi 1, r30 | |
2541 ldx.q r15, r63, r4 | |
2542 shlli r30, 23, r31 | |
2543 addi.l r15, 8, r15 | |
2544 andc r1, r31, r1 | |
2545 blink tr1, r63 | |
2546 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */ | |
2547 movi 1, r30 | |
2548 ldx.q r15, r63, r5 | |
2549 shlli r30, 20, r31 | |
2550 addi.l r15, 8, r15 | |
2551 andc r1, r31, r1 | |
2552 blink tr1, r63 | |
2553 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */ | |
2554 movi 1, r30 | |
2555 ldx.q r15, r63, r6 | |
2556 shlli r30, 16, r31 | |
2557 addi.l r15, 8, r15 | |
2558 andc r1, r31, r1 | |
2559 blink tr1, r63 | |
2560 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */ | |
2561 ldx.q r15, r63, r7 | |
2562 movi 1 << 12, r31 | |
2563 addi.l r15, 8, r15 | |
2564 andc r1, r31, r1 | |
2565 blink tr1, r63 | |
2566 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */ | |
2567 ldx.q r15, r63, r8 | |
2568 movi 1 << 8, r31 | |
2569 addi.l r15, 8, r15 | |
2570 andc r1, r31, r1 | |
2571 blink tr1, r63 | |
2572 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */ | |
2573 andi r1, 7 << 1, r30 | |
2574 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32 | |
2575 shlli r30, 2, r31 | |
2576 shori LOCAL(ct_end_of_pop_seq) & 65535, r32 | |
2577 sub.l r32, r31, r33 | |
2578 ptabs/l r33, tr2 | |
2579 blink tr2, r63 | |
2580 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */ | |
2581 ldx.q r15, r63, r3 | |
2582 addi.l r15, 8, r15 | |
2583 ldx.q r15, r63, r4 | |
2584 addi.l r15, 8, r15 | |
2585 ldx.q r15, r63, r5 | |
2586 addi.l r15, 8, r15 | |
2587 ldx.q r15, r63, r6 | |
2588 addi.l r15, 8, r15 | |
2589 ldx.q r15, r63, r7 | |
2590 addi.l r15, 8, r15 | |
2591 ldx.q r15, r63, r8 | |
2592 addi.l r15, 8, r15 | |
2593 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */ | |
2594 ldx.q r15, r63, r9 | |
2595 addi.l r15, 8, r15 | |
2596 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */ | |
2597 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */ | |
2598 pt/u LOCAL(ct_ret_wide), tr2 | |
2599 andi r1, 1, r1 | |
2600 bne/u r1, r63, tr2 | |
2601 LOCAL(ct_call_func): /* Just branch to the function. */ | |
2602 blink tr0, r63 | |
2603 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its | |
2604 64-bit return value. */ | |
2605 add.l r18, r63, r10 | |
2606 blink tr0, r18 | |
2607 ptabs r10, tr0 | |
2608 #if __LITTLE_ENDIAN__ | |
2609 shari r2, 32, r3 | |
2610 add.l r2, r63, r2 | |
2611 #else | |
2612 add.l r2, r63, r3 | |
2613 shari r2, 32, r2 | |
2614 #endif | |
2615 blink tr0, r63 | |
2616 | |
2617 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline)) | |
2618 #endif /* L_shcompact_call_trampoline */ | |
2619 | |
2620 #ifdef L_shcompact_return_trampoline | |
2621 /* This function does the converse of the code in `ret_wide' | |
2622 above. It is tail-called by SHcompact functions returning | |
2623 64-bit non-floating-point values, to pack the 32-bit values in | |
2624 r2 and r3 into r2. */ | |
2625 | |
2626 .mode SHmedia | |
2627 .section .text..SHmedia32, "ax" | |
2628 .align 2 | |
2629 .global GLOBAL(GCC_shcompact_return_trampoline) | |
2630 HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline)) | |
2631 GLOBAL(GCC_shcompact_return_trampoline): | |
2632 ptabs/l r18, tr0 | |
2633 #if __LITTLE_ENDIAN__ | |
2634 addz.l r2, r63, r2 | |
2635 shlli r3, 32, r3 | |
2636 #else | |
2637 addz.l r3, r63, r3 | |
2638 shlli r2, 32, r2 | |
2639 #endif | |
2640 or r3, r2, r2 | |
2641 blink tr0, r63 | |
2642 | |
2643 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline)) | |
2644 #endif /* L_shcompact_return_trampoline */ | |
2645 | |
2646 #ifdef L_shcompact_incoming_args | |
2647 .section .rodata | |
2648 .align 1 | |
2649 LOCAL(ia_main_table): | |
2650 .word 1 /* Invalid, just loop */ | |
2651 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label) | |
2652 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label) | |
2653 .word 1 /* Invalid, just loop */ | |
2654 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label) | |
2655 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label) | |
2656 .word 1 /* Invalid, just loop */ | |
2657 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label) | |
2658 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label) | |
2659 .word 1 /* Invalid, just loop */ | |
2660 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label) | |
2661 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label) | |
2662 .word 1 /* Invalid, just loop */ | |
2663 .word 1 /* Invalid, just loop */ | |
2664 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label) | |
2665 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label) | |
2666 .word 1 /* Invalid, just loop */ | |
2667 .word 1 /* Invalid, just loop */ | |
2668 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label) | |
2669 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label) | |
2670 .word 1 /* Invalid, just loop */ | |
2671 .word 1 /* Invalid, just loop */ | |
2672 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label) | |
2673 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label) | |
2674 .word 1 /* Invalid, just loop */ | |
2675 .word 1 /* Invalid, just loop */ | |
2676 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label) | |
2677 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) | |
2678 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) | |
2679 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label) | |
2680 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label) | |
2681 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) | |
2682 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label) | |
2683 .mode SHmedia | |
2684 .section .text..SHmedia32, "ax" | |
2685 .align 2 | |
2686 | |
2687 /* This function stores 64-bit general-purpose registers back in | |
2688 the stack, and loads the address in which each register | |
2689 was stored into itself. The lower 32 bits of r17 hold the address | |
2690 to begin storing, and the upper 32 bits of r17 hold the cookie. | |
2691 Its execution time is linear on the | |
2692 number of registers that actually have to be copied, and it is | |
2693 optimized for structures larger than 64 bits, as opposed to | |
2694 individual `long long' arguments. See sh.h for details on the | |
2695 actual bit pattern. */ | |
2696 | |
2697 .global GLOBAL(GCC_shcompact_incoming_args) | |
2698 FUNC(GLOBAL(GCC_shcompact_incoming_args)) | |
2699 GLOBAL(GCC_shcompact_incoming_args): | |
2700 ptabs/l r18, tr0 /* Prepare to return. */ | |
2701 shlri r17, 32, r0 /* Load the cookie. */ | |
2702 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43 | |
2703 pt/l LOCAL(ia_loop), tr1 | |
2704 add.l r17, r63, r17 | |
2705 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43 | |
2706 LOCAL(ia_loop): | |
2707 nsb r0, r36 | |
2708 shlli r36, 1, r37 | |
2709 ldx.w r43, r37, r38 | |
2710 LOCAL(ia_main_label): | |
2711 ptrel/l r38, tr2 | |
2712 blink tr2, r63 | |
2713 LOCAL(ia_r2_ld): /* Store r2 and load its address. */ | |
2714 movi 3, r38 | |
2715 shlli r38, 29, r39 | |
2716 and r0, r39, r40 | |
2717 andc r0, r39, r0 | |
2718 stx.q r17, r63, r2 | |
2719 add.l r17, r63, r2 | |
2720 addi.l r17, 8, r17 | |
2721 beq/u r39, r40, tr1 | |
2722 LOCAL(ia_r3_ld): /* Store r3 and load its address. */ | |
2723 movi 3, r38 | |
2724 shlli r38, 26, r39 | |
2725 and r0, r39, r40 | |
2726 andc r0, r39, r0 | |
2727 stx.q r17, r63, r3 | |
2728 add.l r17, r63, r3 | |
2729 addi.l r17, 8, r17 | |
2730 beq/u r39, r40, tr1 | |
2731 LOCAL(ia_r4_ld): /* Store r4 and load its address. */ | |
2732 movi 3, r38 | |
2733 shlli r38, 23, r39 | |
2734 and r0, r39, r40 | |
2735 andc r0, r39, r0 | |
2736 stx.q r17, r63, r4 | |
2737 add.l r17, r63, r4 | |
2738 addi.l r17, 8, r17 | |
2739 beq/u r39, r40, tr1 | |
2740 LOCAL(ia_r5_ld): /* Store r5 and load its address. */ | |
2741 movi 3, r38 | |
2742 shlli r38, 20, r39 | |
2743 and r0, r39, r40 | |
2744 andc r0, r39, r0 | |
2745 stx.q r17, r63, r5 | |
2746 add.l r17, r63, r5 | |
2747 addi.l r17, 8, r17 | |
2748 beq/u r39, r40, tr1 | |
2749 LOCAL(ia_r6_ld): /* Store r6 and load its address. */ | |
2750 movi 3, r38 | |
2751 shlli r38, 16, r39 | |
2752 and r0, r39, r40 | |
2753 andc r0, r39, r0 | |
2754 stx.q r17, r63, r6 | |
2755 add.l r17, r63, r6 | |
2756 addi.l r17, 8, r17 | |
2757 beq/u r39, r40, tr1 | |
2758 LOCAL(ia_r7_ld): /* Store r7 and load its address. */ | |
2759 movi 3 << 12, r39 | |
2760 and r0, r39, r40 | |
2761 andc r0, r39, r0 | |
2762 stx.q r17, r63, r7 | |
2763 add.l r17, r63, r7 | |
2764 addi.l r17, 8, r17 | |
2765 beq/u r39, r40, tr1 | |
2766 LOCAL(ia_r8_ld): /* Store r8 and load its address. */ | |
2767 movi 3 << 8, r39 | |
2768 and r0, r39, r40 | |
2769 andc r0, r39, r0 | |
2770 stx.q r17, r63, r8 | |
2771 add.l r17, r63, r8 | |
2772 addi.l r17, 8, r17 | |
2773 beq/u r39, r40, tr1 | |
2774 LOCAL(ia_r9_ld): /* Store r9 and load its address. */ | |
2775 stx.q r17, r63, r9 | |
2776 add.l r17, r63, r9 | |
2777 blink tr0, r63 | |
2778 LOCAL(ia_r2_push): /* Push r2 onto the stack. */ | |
2779 movi 1, r38 | |
2780 shlli r38, 29, r39 | |
2781 andc r0, r39, r0 | |
2782 stx.q r17, r63, r2 | |
2783 addi.l r17, 8, r17 | |
2784 blink tr1, r63 | |
2785 LOCAL(ia_r3_push): /* Push r3 onto the stack. */ | |
2786 movi 1, r38 | |
2787 shlli r38, 26, r39 | |
2788 andc r0, r39, r0 | |
2789 stx.q r17, r63, r3 | |
2790 addi.l r17, 8, r17 | |
2791 blink tr1, r63 | |
2792 LOCAL(ia_r4_push): /* Push r4 onto the stack. */ | |
2793 movi 1, r38 | |
2794 shlli r38, 23, r39 | |
2795 andc r0, r39, r0 | |
2796 stx.q r17, r63, r4 | |
2797 addi.l r17, 8, r17 | |
2798 blink tr1, r63 | |
2799 LOCAL(ia_r5_push): /* Push r5 onto the stack. */ | |
2800 movi 1, r38 | |
2801 shlli r38, 20, r39 | |
2802 andc r0, r39, r0 | |
2803 stx.q r17, r63, r5 | |
2804 addi.l r17, 8, r17 | |
2805 blink tr1, r63 | |
2806 LOCAL(ia_r6_push): /* Push r6 onto the stack. */ | |
2807 movi 1, r38 | |
2808 shlli r38, 16, r39 | |
2809 andc r0, r39, r0 | |
2810 stx.q r17, r63, r6 | |
2811 addi.l r17, 8, r17 | |
2812 blink tr1, r63 | |
2813 LOCAL(ia_r7_push): /* Push r7 onto the stack. */ | |
2814 movi 1 << 12, r39 | |
2815 andc r0, r39, r0 | |
2816 stx.q r17, r63, r7 | |
2817 addi.l r17, 8, r17 | |
2818 blink tr1, r63 | |
2819 LOCAL(ia_r8_push): /* Push r8 onto the stack. */ | |
2820 movi 1 << 8, r39 | |
2821 andc r0, r39, r0 | |
2822 stx.q r17, r63, r8 | |
2823 addi.l r17, 8, r17 | |
2824 blink tr1, r63 | |
2825 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */ | |
2826 andi r0, 7 << 1, r38 | |
2827 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40 | |
2828 shlli r38, 2, r39 | |
2829 shori LOCAL(ia_end_of_push_seq) & 65535, r40 | |
2830 sub.l r40, r39, r41 | |
2831 ptabs/l r41, tr2 | |
2832 blink tr2, r63 | |
2833 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */ | |
2834 stx.q r17, r63, r3 | |
2835 addi.l r17, 8, r17 | |
2836 stx.q r17, r63, r4 | |
2837 addi.l r17, 8, r17 | |
2838 stx.q r17, r63, r5 | |
2839 addi.l r17, 8, r17 | |
2840 stx.q r17, r63, r6 | |
2841 addi.l r17, 8, r17 | |
2842 stx.q r17, r63, r7 | |
2843 addi.l r17, 8, r17 | |
2844 stx.q r17, r63, r8 | |
2845 addi.l r17, 8, r17 | |
2846 LOCAL(ia_r9_push): /* Push r9 onto the stack. */ | |
2847 stx.q r17, r63, r9 | |
2848 LOCAL(ia_return): /* Return. */ | |
2849 blink tr0, r63 | |
2850 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */ | |
2851 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args)) | |
2852 #endif /* L_shcompact_incoming_args */ | |
2853 #endif | |
2854 #if __SH5__ | |
2855 #ifdef L_nested_trampoline | |
2856 #if __SH5__ == 32 | |
2857 .section .text..SHmedia32,"ax" | |
2858 #else | |
2859 .text | |
2860 #endif | |
2861 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */ | |
2862 .global GLOBAL(GCC_nested_trampoline) | |
2863 HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline)) | |
2864 GLOBAL(GCC_nested_trampoline): | |
2865 .mode SHmedia | |
2866 ptrel/u r63, tr0 | |
2867 gettr tr0, r0 | |
2868 #if __SH5__ == 64 | |
2869 ld.q r0, 24, r1 | |
2870 #else | |
2871 ld.l r0, 24, r1 | |
2872 #endif | |
2873 ptabs/l r1, tr1 | |
2874 #if __SH5__ == 64 | |
2875 ld.q r0, 32, r1 | |
2876 #else | |
2877 ld.l r0, 28, r1 | |
2878 #endif | |
2879 blink tr1, r63 | |
2880 | |
2881 ENDFUNC(GLOBAL(GCC_nested_trampoline)) | |
2882 #endif /* L_nested_trampoline */ | |
2883 #endif /* __SH5__ */ | |
2884 #if __SH5__ == 32 | |
2885 #ifdef L_push_pop_shmedia_regs | |
2886 .section .text..SHmedia32,"ax" | |
2887 .mode SHmedia | |
2888 .align 2 | |
2889 #ifndef __SH4_NOFPU__ | |
2890 .global GLOBAL(GCC_push_shmedia_regs) | |
2891 FUNC(GLOBAL(GCC_push_shmedia_regs)) | |
2892 GLOBAL(GCC_push_shmedia_regs): | |
2893 addi.l r15, -14*8, r15 | |
2894 fst.d r15, 13*8, dr62 | |
2895 fst.d r15, 12*8, dr60 | |
2896 fst.d r15, 11*8, dr58 | |
2897 fst.d r15, 10*8, dr56 | |
2898 fst.d r15, 9*8, dr54 | |
2899 fst.d r15, 8*8, dr52 | |
2900 fst.d r15, 7*8, dr50 | |
2901 fst.d r15, 6*8, dr48 | |
2902 fst.d r15, 5*8, dr46 | |
2903 fst.d r15, 4*8, dr44 | |
2904 fst.d r15, 3*8, dr42 | |
2905 fst.d r15, 2*8, dr40 | |
2906 fst.d r15, 1*8, dr38 | |
2907 fst.d r15, 0*8, dr36 | |
2908 #else /* ! __SH4_NOFPU__ */ | |
2909 .global GLOBAL(GCC_push_shmedia_regs_nofpu) | |
2910 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) | |
2911 GLOBAL(GCC_push_shmedia_regs_nofpu): | |
2912 #endif /* ! __SH4_NOFPU__ */ | |
2913 ptabs/l r18, tr0 | |
2914 addi.l r15, -27*8, r15 | |
2915 gettr tr7, r62 | |
2916 gettr tr6, r61 | |
2917 gettr tr5, r60 | |
2918 st.q r15, 26*8, r62 | |
2919 st.q r15, 25*8, r61 | |
2920 st.q r15, 24*8, r60 | |
2921 st.q r15, 23*8, r59 | |
2922 st.q r15, 22*8, r58 | |
2923 st.q r15, 21*8, r57 | |
2924 st.q r15, 20*8, r56 | |
2925 st.q r15, 19*8, r55 | |
2926 st.q r15, 18*8, r54 | |
2927 st.q r15, 17*8, r53 | |
2928 st.q r15, 16*8, r52 | |
2929 st.q r15, 15*8, r51 | |
2930 st.q r15, 14*8, r50 | |
2931 st.q r15, 13*8, r49 | |
2932 st.q r15, 12*8, r48 | |
2933 st.q r15, 11*8, r47 | |
2934 st.q r15, 10*8, r46 | |
2935 st.q r15, 9*8, r45 | |
2936 st.q r15, 8*8, r44 | |
2937 st.q r15, 7*8, r35 | |
2938 st.q r15, 6*8, r34 | |
2939 st.q r15, 5*8, r33 | |
2940 st.q r15, 4*8, r32 | |
2941 st.q r15, 3*8, r31 | |
2942 st.q r15, 2*8, r30 | |
2943 st.q r15, 1*8, r29 | |
2944 st.q r15, 0*8, r28 | |
2945 blink tr0, r63 | |
2946 #ifndef __SH4_NOFPU__ | |
2947 ENDFUNC(GLOBAL(GCC_push_shmedia_regs)) | |
2948 #else | |
2949 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu)) | |
2950 #endif | |
2951 #ifndef __SH4_NOFPU__ | |
2952 .global GLOBAL(GCC_pop_shmedia_regs) | |
2953 FUNC(GLOBAL(GCC_pop_shmedia_regs)) | |
2954 GLOBAL(GCC_pop_shmedia_regs): | |
2955 pt .L0, tr1 | |
2956 movi 41*8, r0 | |
2957 fld.d r15, 40*8, dr62 | |
2958 fld.d r15, 39*8, dr60 | |
2959 fld.d r15, 38*8, dr58 | |
2960 fld.d r15, 37*8, dr56 | |
2961 fld.d r15, 36*8, dr54 | |
2962 fld.d r15, 35*8, dr52 | |
2963 fld.d r15, 34*8, dr50 | |
2964 fld.d r15, 33*8, dr48 | |
2965 fld.d r15, 32*8, dr46 | |
2966 fld.d r15, 31*8, dr44 | |
2967 fld.d r15, 30*8, dr42 | |
2968 fld.d r15, 29*8, dr40 | |
2969 fld.d r15, 28*8, dr38 | |
2970 fld.d r15, 27*8, dr36 | |
2971 blink tr1, r63 | |
2972 #else /* ! __SH4_NOFPU__ */ | |
2973 .global GLOBAL(GCC_pop_shmedia_regs_nofpu) | |
2974 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) | |
2975 GLOBAL(GCC_pop_shmedia_regs_nofpu): | |
2976 #endif /* ! __SH4_NOFPU__ */ | |
2977 movi 27*8, r0 | |
2978 .L0: | |
2979 ptabs r18, tr0 | |
2980 ld.q r15, 26*8, r62 | |
2981 ld.q r15, 25*8, r61 | |
2982 ld.q r15, 24*8, r60 | |
2983 ptabs r62, tr7 | |
2984 ptabs r61, tr6 | |
2985 ptabs r60, tr5 | |
2986 ld.q r15, 23*8, r59 | |
2987 ld.q r15, 22*8, r58 | |
2988 ld.q r15, 21*8, r57 | |
2989 ld.q r15, 20*8, r56 | |
2990 ld.q r15, 19*8, r55 | |
2991 ld.q r15, 18*8, r54 | |
2992 ld.q r15, 17*8, r53 | |
2993 ld.q r15, 16*8, r52 | |
2994 ld.q r15, 15*8, r51 | |
2995 ld.q r15, 14*8, r50 | |
2996 ld.q r15, 13*8, r49 | |
2997 ld.q r15, 12*8, r48 | |
2998 ld.q r15, 11*8, r47 | |
2999 ld.q r15, 10*8, r46 | |
3000 ld.q r15, 9*8, r45 | |
3001 ld.q r15, 8*8, r44 | |
3002 ld.q r15, 7*8, r35 | |
3003 ld.q r15, 6*8, r34 | |
3004 ld.q r15, 5*8, r33 | |
3005 ld.q r15, 4*8, r32 | |
3006 ld.q r15, 3*8, r31 | |
3007 ld.q r15, 2*8, r30 | |
3008 ld.q r15, 1*8, r29 | |
3009 ld.q r15, 0*8, r28 | |
3010 add.l r15, r0, r15 | |
3011 blink tr0, r63 | |
3012 | |
3013 #ifndef __SH4_NOFPU__ | |
3014 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs)) | |
3015 #else | |
3016 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu)) | |
3017 #endif | |
3018 #endif /* __SH5__ == 32 */ | |
3019 #endif /* L_push_pop_shmedia_regs */ | |
3020 | |
3021 #ifdef L_div_table | |
3022 #if __SH5__ | |
3023 #if defined(__pic__) && defined(__SHMEDIA__) | |
3024 .global GLOBAL(sdivsi3) | |
3025 FUNC(GLOBAL(sdivsi3)) | |
3026 #if __SH5__ == 32 | |
3027 .section .text..SHmedia32,"ax" | |
3028 #else | |
3029 .text | |
3030 #endif | |
3031 #if 0 | |
3032 /* ??? FIXME: Presumably due to a linker bug, exporting data symbols | |
3033 in a text section does not work (at least for shared libraries): | |
3034 the linker sets the LSB of the address as if this was SHmedia code. */ | |
3035 #define TEXT_DATA_BUG | |
3036 #endif | |
3037 .align 2 | |
3038 // inputs: r4,r5 | |
3039 // clobbered: r1,r18,r19,r20,r21,r25,tr0 | |
3040 // result in r0 | |
3041 .global GLOBAL(sdivsi3) | |
3042 GLOBAL(sdivsi3): | |
3043 #ifdef TEXT_DATA_BUG | |
3044 ptb datalabel Local_div_table,tr0 | |
3045 #else | |
3046 ptb GLOBAL(div_table_internal),tr0 | |
3047 #endif | |
3048 nsb r5, r1 | |
3049 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62 | |
3050 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1) | |
3051 /* bubble */ | |
3052 gettr tr0,r20 | |
3053 ldx.ub r20, r21, r19 // u0.8 | |
3054 shari r25, 32, r25 // normalize to s2.30 | |
3055 shlli r21, 1, r21 | |
3056 muls.l r25, r19, r19 // s2.38 | |
3057 ldx.w r20, r21, r21 // s2.14 | |
3058 ptabs r18, tr0 | |
3059 shari r19, 24, r19 // truncate to s2.14 | |
3060 sub r21, r19, r19 // some 11 bit inverse in s1.14 | |
3061 muls.l r19, r19, r21 // u0.28 | |
3062 sub r63, r1, r1 | |
3063 addi r1, 92, r1 | |
3064 muls.l r25, r21, r18 // s2.58 | |
3065 shlli r19, 45, r19 // multiply by two and convert to s2.58 | |
3066 /* bubble */ | |
3067 sub r19, r18, r18 | |
3068 shari r18, 28, r18 // some 22 bit inverse in s1.30 | |
3069 muls.l r18, r25, r0 // s2.60 | |
3070 muls.l r18, r4, r25 // s32.30 | |
3071 /* bubble */ | |
3072 shari r0, 16, r19 // s-16.44 | |
3073 muls.l r19, r18, r19 // s-16.74 | |
3074 shari r25, 63, r0 | |
3075 shari r4, 14, r18 // s19.-14 | |
3076 shari r19, 30, r19 // s-16.44 | |
3077 muls.l r19, r18, r19 // s15.30 | |
3078 xor r21, r0, r21 // You could also use the constant 1 << 27. | |
3079 add r21, r25, r21 | |
3080 sub r21, r19, r21 | |
3081 shard r21, r1, r21 | |
3082 sub r21, r0, r0 | |
3083 blink tr0, r63 | |
3084 ENDFUNC(GLOBAL(sdivsi3)) | |
3085 /* This table has been generated by divtab.c . | |
3086 Defects for bias -330: | |
3087 Max defect: 6.081536e-07 at -1.000000e+00 | |
3088 Min defect: 2.849516e-08 at 1.030651e+00 | |
3089 Max 2nd step defect: 9.606539e-12 at -1.000000e+00 | |
3090 Min 2nd step defect: 0.000000e+00 at 0.000000e+00 | |
3091 Defect at 1: 1.238659e-07 | |
3092 Defect at -2: 1.061708e-07 */ | |
3093 #else /* ! __pic__ || ! __SHMEDIA__ */ | |
3094 .section .rodata | |
3095 #endif /* __pic__ */ | |
3096 #if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__) | |
3097 .balign 2 | |
3098 .type Local_div_table,@object | |
3099 .size Local_div_table,128 | |
3100 /* negative division constants */ | |
3101 .word -16638 | |
3102 .word -17135 | |
3103 .word -17737 | |
3104 .word -18433 | |
3105 .word -19103 | |
3106 .word -19751 | |
3107 .word -20583 | |
3108 .word -21383 | |
3109 .word -22343 | |
3110 .word -23353 | |
3111 .word -24407 | |
3112 .word -25582 | |
3113 .word -26863 | |
3114 .word -28382 | |
3115 .word -29965 | |
3116 .word -31800 | |
3117 /* negative division factors */ | |
3118 .byte 66 | |
3119 .byte 70 | |
3120 .byte 75 | |
3121 .byte 81 | |
3122 .byte 87 | |
3123 .byte 93 | |
3124 .byte 101 | |
3125 .byte 109 | |
3126 .byte 119 | |
3127 .byte 130 | |
3128 .byte 142 | |
3129 .byte 156 | |
3130 .byte 172 | |
3131 .byte 192 | |
3132 .byte 214 | |
3133 .byte 241 | |
3134 .skip 16 | |
3135 Local_div_table: | |
3136 .skip 16 | |
3137 /* positive division factors */ | |
3138 .byte 241 | |
3139 .byte 214 | |
3140 .byte 192 | |
3141 .byte 172 | |
3142 .byte 156 | |
3143 .byte 142 | |
3144 .byte 130 | |
3145 .byte 119 | |
3146 .byte 109 | |
3147 .byte 101 | |
3148 .byte 93 | |
3149 .byte 87 | |
3150 .byte 81 | |
3151 .byte 75 | |
3152 .byte 70 | |
3153 .byte 66 | |
3154 /* positive division constants */ | |
3155 .word 31801 | |
3156 .word 29966 | |
3157 .word 28383 | |
3158 .word 26864 | |
3159 .word 25583 | |
3160 .word 24408 | |
3161 .word 23354 | |
3162 .word 22344 | |
3163 .word 21384 | |
3164 .word 20584 | |
3165 .word 19752 | |
3166 .word 19104 | |
3167 .word 18434 | |
3168 .word 17738 | |
3169 .word 17136 | |
3170 .word 16639 | |
3171 .section .rodata | |
3172 #endif /* TEXT_DATA_BUG */ | |
3173 .balign 2 | |
3174 .type GLOBAL(div_table),@object | |
3175 .size GLOBAL(div_table),128 | |
3176 /* negative division constants */ | |
3177 .word -16638 | |
3178 .word -17135 | |
3179 .word -17737 | |
3180 .word -18433 | |
3181 .word -19103 | |
3182 .word -19751 | |
3183 .word -20583 | |
3184 .word -21383 | |
3185 .word -22343 | |
3186 .word -23353 | |
3187 .word -24407 | |
3188 .word -25582 | |
3189 .word -26863 | |
3190 .word -28382 | |
3191 .word -29965 | |
3192 .word -31800 | |
3193 /* negative division factors */ | |
3194 .byte 66 | |
3195 .byte 70 | |
3196 .byte 75 | |
3197 .byte 81 | |
3198 .byte 87 | |
3199 .byte 93 | |
3200 .byte 101 | |
3201 .byte 109 | |
3202 .byte 119 | |
3203 .byte 130 | |
3204 .byte 142 | |
3205 .byte 156 | |
3206 .byte 172 | |
3207 .byte 192 | |
3208 .byte 214 | |
3209 .byte 241 | |
3210 .skip 16 | |
3211 .global GLOBAL(div_table) | |
3212 GLOBAL(div_table): | |
3213 HIDDEN_ALIAS(div_table_internal,div_table) | |
3214 .skip 16 | |
3215 /* positive division factors */ | |
3216 .byte 241 | |
3217 .byte 214 | |
3218 .byte 192 | |
3219 .byte 172 | |
3220 .byte 156 | |
3221 .byte 142 | |
3222 .byte 130 | |
3223 .byte 119 | |
3224 .byte 109 | |
3225 .byte 101 | |
3226 .byte 93 | |
3227 .byte 87 | |
3228 .byte 81 | |
3229 .byte 75 | |
3230 .byte 70 | |
3231 .byte 66 | |
3232 /* positive division constants */ | |
3233 .word 31801 | |
3234 .word 29966 | |
3235 .word 28383 | |
3236 .word 26864 | |
3237 .word 25583 | |
3238 .word 24408 | |
3239 .word 23354 | |
3240 .word 22344 | |
3241 .word 21384 | |
3242 .word 20584 | |
3243 .word 19752 | |
3244 .word 19104 | |
3245 .word 18434 | |
3246 .word 17738 | |
3247 .word 17136 | |
3248 .word 16639 | |
3249 | |
3250 #elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__) | |
3251 /* This code used shld, thus is not suitable for SH1 / SH2. */ | |
3252 | |
3253 /* Signed / unsigned division without use of FPU, optimized for SH4. | |
3254 Uses a lookup table for divisors in the range -128 .. +128, and | |
3255 div1 with case distinction for larger divisors in three more ranges. | |
3256 The code is lumped together with the table to allow the use of mova. */ | |
3257 #ifdef __LITTLE_ENDIAN__ | |
3258 #define L_LSB 0 | |
3259 #define L_LSWMSB 1 | |
3260 #define L_MSWLSB 2 | |
3261 #else | |
3262 #define L_LSB 3 | |
3263 #define L_LSWMSB 2 | |
3264 #define L_MSWLSB 1 | |
3265 #endif | |
3266 | |
3267 .balign 4 | |
3268 .global GLOBAL(udivsi3_i4i) | |
3269 FUNC(GLOBAL(udivsi3_i4i)) | |
3270 GLOBAL(udivsi3_i4i): | |
3271 mov.w LOCAL(c128_w), r1 | |
3272 div0u | |
3273 mov r4,r0 | |
3274 shlr8 r0 | |
3275 cmp/hi r1,r5 | |
3276 extu.w r5,r1 | |
3277 bf LOCAL(udiv_le128) | |
3278 cmp/eq r5,r1 | |
3279 bf LOCAL(udiv_ge64k) | |
3280 shlr r0 | |
3281 mov r5,r1 | |
3282 shll16 r5 | |
3283 mov.l r4,@-r15 | |
3284 div1 r5,r0 | |
3285 mov.l r1,@-r15 | |
3286 div1 r5,r0 | |
3287 div1 r5,r0 | |
3288 bra LOCAL(udiv_25) | |
3289 div1 r5,r0 | |
3290 | |
3291 LOCAL(div_le128): | |
3292 mova LOCAL(div_table_ix),r0 | |
3293 bra LOCAL(div_le128_2) | |
3294 mov.b @(r0,r5),r1 | |
3295 LOCAL(udiv_le128): | |
3296 mov.l r4,@-r15 | |
3297 mova LOCAL(div_table_ix),r0 | |
3298 mov.b @(r0,r5),r1 | |
3299 mov.l r5,@-r15 | |
3300 LOCAL(div_le128_2): | |
3301 mova LOCAL(div_table_inv),r0 | |
3302 mov.l @(r0,r1),r1 | |
3303 mov r5,r0 | |
3304 tst #0xfe,r0 | |
3305 mova LOCAL(div_table_clz),r0 | |
3306 dmulu.l r1,r4 | |
3307 mov.b @(r0,r5),r1 | |
3308 bt/s LOCAL(div_by_1) | |
3309 mov r4,r0 | |
3310 mov.l @r15+,r5 | |
3311 sts mach,r0 | |
3312 /* clrt */ | |
3313 addc r4,r0 | |
3314 mov.l @r15+,r4 | |
3315 rotcr r0 | |
3316 rts | |
3317 shld r1,r0 | |
3318 | |
3319 LOCAL(div_by_1_neg): | |
3320 neg r4,r0 | |
3321 LOCAL(div_by_1): | |
3322 mov.l @r15+,r5 | |
3323 rts | |
3324 mov.l @r15+,r4 | |
3325 | |
3326 LOCAL(div_ge64k): | |
3327 bt/s LOCAL(div_r8) | |
3328 div0u | |
3329 shll8 r5 | |
3330 bra LOCAL(div_ge64k_2) | |
3331 div1 r5,r0 | |
3332 LOCAL(udiv_ge64k): | |
3333 cmp/hi r0,r5 | |
3334 mov r5,r1 | |
3335 bt LOCAL(udiv_r8) | |
3336 shll8 r5 | |
3337 mov.l r4,@-r15 | |
3338 div1 r5,r0 | |
3339 mov.l r1,@-r15 | |
3340 LOCAL(div_ge64k_2): | |
3341 div1 r5,r0 | |
3342 mov.l LOCAL(zero_l),r1 | |
3343 .rept 4 | |
3344 div1 r5,r0 | |
3345 .endr | |
3346 mov.l r1,@-r15 | |
3347 div1 r5,r0 | |
3348 mov.w LOCAL(m256_w),r1 | |
3349 div1 r5,r0 | |
3350 mov.b r0,@(L_LSWMSB,r15) | |
3351 xor r4,r0 | |
3352 and r1,r0 | |
3353 bra LOCAL(div_ge64k_end) | |
3354 xor r4,r0 | |
3355 | |
3356 LOCAL(div_r8): | |
3357 shll16 r4 | |
3358 bra LOCAL(div_r8_2) | |
3359 shll8 r4 | |
3360 LOCAL(udiv_r8): | |
3361 mov.l r4,@-r15 | |
3362 shll16 r4 | |
3363 clrt | |
3364 shll8 r4 | |
3365 mov.l r5,@-r15 | |
3366 LOCAL(div_r8_2): | |
3367 rotcl r4 | |
3368 mov r0,r1 | |
3369 div1 r5,r1 | |
3370 mov r4,r0 | |
3371 rotcl r0 | |
3372 mov r5,r4 | |
3373 div1 r5,r1 | |
3374 .rept 5 | |
3375 rotcl r0; div1 r5,r1 | |
3376 .endr | |
3377 rotcl r0 | |
3378 mov.l @r15+,r5 | |
3379 div1 r4,r1 | |
3380 mov.l @r15+,r4 | |
3381 rts | |
3382 rotcl r0 | |
3383 | |
3384 ENDFUNC(GLOBAL(udivsi3_i4i)) | |
3385 | |
3386 .global GLOBAL(sdivsi3_i4i) | |
3387 FUNC(GLOBAL(sdivsi3_i4i)) | |
3388 /* This is link-compatible with a GLOBAL(sdivsi3) call, | |
3389 but we effectively clobber only r1. */ | |
3390 GLOBAL(sdivsi3_i4i): | |
3391 mov.l r4,@-r15 | |
3392 cmp/pz r5 | |
3393 mov.w LOCAL(c128_w), r1 | |
3394 bt/s LOCAL(pos_divisor) | |
3395 cmp/pz r4 | |
3396 mov.l r5,@-r15 | |
3397 neg r5,r5 | |
3398 bt/s LOCAL(neg_result) | |
3399 cmp/hi r1,r5 | |
3400 neg r4,r4 | |
3401 LOCAL(pos_result): | |
3402 extu.w r5,r0 | |
3403 bf LOCAL(div_le128) | |
3404 cmp/eq r5,r0 | |
3405 mov r4,r0 | |
3406 shlr8 r0 | |
3407 bf/s LOCAL(div_ge64k) | |
3408 cmp/hi r0,r5 | |
3409 div0u | |
3410 shll16 r5 | |
3411 div1 r5,r0 | |
3412 div1 r5,r0 | |
3413 div1 r5,r0 | |
3414 LOCAL(udiv_25): | |
3415 mov.l LOCAL(zero_l),r1 | |
3416 div1 r5,r0 | |
3417 div1 r5,r0 | |
3418 mov.l r1,@-r15 | |
3419 .rept 3 | |
3420 div1 r5,r0 | |
3421 .endr | |
3422 mov.b r0,@(L_MSWLSB,r15) | |
3423 xtrct r4,r0 | |
3424 swap.w r0,r0 | |
3425 .rept 8 | |
3426 div1 r5,r0 | |
3427 .endr | |
3428 mov.b r0,@(L_LSWMSB,r15) | |
3429 LOCAL(div_ge64k_end): | |
3430 .rept 8 | |
3431 div1 r5,r0 | |
3432 .endr | |
3433 mov.l @r15+,r4 ! zero-extension and swap using LS unit. | |
3434 extu.b r0,r0 | |
3435 mov.l @r15+,r5 | |
3436 or r4,r0 | |
3437 mov.l @r15+,r4 | |
3438 rts | |
3439 rotcl r0 | |
3440 | |
3441 LOCAL(div_le128_neg): | |
3442 tst #0xfe,r0 | |
3443 mova LOCAL(div_table_ix),r0 | |
3444 mov.b @(r0,r5),r1 | |
3445 mova LOCAL(div_table_inv),r0 | |
3446 bt/s LOCAL(div_by_1_neg) | |
3447 mov.l @(r0,r1),r1 | |
3448 mova LOCAL(div_table_clz),r0 | |
3449 dmulu.l r1,r4 | |
3450 mov.b @(r0,r5),r1 | |
3451 mov.l @r15+,r5 | |
3452 sts mach,r0 | |
3453 /* clrt */ | |
3454 addc r4,r0 | |
3455 mov.l @r15+,r4 | |
3456 rotcr r0 | |
3457 shld r1,r0 | |
3458 rts | |
3459 neg r0,r0 | |
3460 | |
3461 LOCAL(pos_divisor): | |
3462 mov.l r5,@-r15 | |
3463 bt/s LOCAL(pos_result) | |
3464 cmp/hi r1,r5 | |
3465 neg r4,r4 | |
3466 LOCAL(neg_result): | |
3467 extu.w r5,r0 | |
3468 bf LOCAL(div_le128_neg) | |
3469 cmp/eq r5,r0 | |
3470 mov r4,r0 | |
3471 shlr8 r0 | |
3472 bf/s LOCAL(div_ge64k_neg) | |
3473 cmp/hi r0,r5 | |
3474 div0u | |
3475 mov.l LOCAL(zero_l),r1 | |
3476 shll16 r5 | |
3477 div1 r5,r0 | |
3478 mov.l r1,@-r15 | |
3479 .rept 7 | |
3480 div1 r5,r0 | |
3481 .endr | |
3482 mov.b r0,@(L_MSWLSB,r15) | |
3483 xtrct r4,r0 | |
3484 swap.w r0,r0 | |
3485 .rept 8 | |
3486 div1 r5,r0 | |
3487 .endr | |
3488 mov.b r0,@(L_LSWMSB,r15) | |
3489 LOCAL(div_ge64k_neg_end): | |
3490 .rept 8 | |
3491 div1 r5,r0 | |
3492 .endr | |
3493 mov.l @r15+,r4 ! zero-extension and swap using LS unit. | |
3494 extu.b r0,r1 | |
3495 mov.l @r15+,r5 | |
3496 or r4,r1 | |
3497 LOCAL(div_r8_neg_end): | |
3498 mov.l @r15+,r4 | |
3499 rotcl r1 | |
3500 rts | |
3501 neg r1,r0 | |
3502 | |
3503 LOCAL(div_ge64k_neg): | |
3504 bt/s LOCAL(div_r8_neg) | |
3505 div0u | |
3506 shll8 r5 | |
3507 mov.l LOCAL(zero_l),r1 | |
3508 .rept 6 | |
3509 div1 r5,r0 | |
3510 .endr | |
3511 mov.l r1,@-r15 | |
3512 div1 r5,r0 | |
3513 mov.w LOCAL(m256_w),r1 | |
3514 div1 r5,r0 | |
3515 mov.b r0,@(L_LSWMSB,r15) | |
3516 xor r4,r0 | |
3517 and r1,r0 | |
3518 bra LOCAL(div_ge64k_neg_end) | |
3519 xor r4,r0 | |
3520 | |
3521 LOCAL(c128_w): | |
3522 .word 128 | |
3523 | |
3524 LOCAL(div_r8_neg): | |
3525 clrt | |
3526 shll16 r4 | |
3527 mov r4,r1 | |
3528 shll8 r1 | |
3529 mov r5,r4 | |
3530 .rept 7 | |
3531 rotcl r1; div1 r5,r0 | |
3532 .endr | |
3533 mov.l @r15+,r5 | |
3534 rotcl r1 | |
3535 bra LOCAL(div_r8_neg_end) | |
3536 div1 r4,r0 | |
3537 | |
3538 LOCAL(m256_w): | |
3539 .word 0xff00 | |
3540 /* This table has been generated by divtab-sh4.c. */ | |
3541 .balign 4 | |
3542 LOCAL(div_table_clz): | |
3543 .byte 0 | |
3544 .byte 1 | |
3545 .byte 0 | |
3546 .byte -1 | |
3547 .byte -1 | |
3548 .byte -2 | |
3549 .byte -2 | |
3550 .byte -2 | |
3551 .byte -2 | |
3552 .byte -3 | |
3553 .byte -3 | |
3554 .byte -3 | |
3555 .byte -3 | |
3556 .byte -3 | |
3557 .byte -3 | |
3558 .byte -3 | |
3559 .byte -3 | |
3560 .byte -4 | |
3561 .byte -4 | |
3562 .byte -4 | |
3563 .byte -4 | |
3564 .byte -4 | |
3565 .byte -4 | |
3566 .byte -4 | |
3567 .byte -4 | |
3568 .byte -4 | |
3569 .byte -4 | |
3570 .byte -4 | |
3571 .byte -4 | |
3572 .byte -4 | |
3573 .byte -4 | |
3574 .byte -4 | |
3575 .byte -4 | |
3576 .byte -5 | |
3577 .byte -5 | |
3578 .byte -5 | |
3579 .byte -5 | |
3580 .byte -5 | |
3581 .byte -5 | |
3582 .byte -5 | |
3583 .byte -5 | |
3584 .byte -5 | |
3585 .byte -5 | |
3586 .byte -5 | |
3587 .byte -5 | |
3588 .byte -5 | |
3589 .byte -5 | |
3590 .byte -5 | |
3591 .byte -5 | |
3592 .byte -5 | |
3593 .byte -5 | |
3594 .byte -5 | |
3595 .byte -5 | |
3596 .byte -5 | |
3597 .byte -5 | |
3598 .byte -5 | |
3599 .byte -5 | |
3600 .byte -5 | |
3601 .byte -5 | |
3602 .byte -5 | |
3603 .byte -5 | |
3604 .byte -5 | |
3605 .byte -5 | |
3606 .byte -5 | |
3607 .byte -5 | |
3608 .byte -6 | |
3609 .byte -6 | |
3610 .byte -6 | |
3611 .byte -6 | |
3612 .byte -6 | |
3613 .byte -6 | |
3614 .byte -6 | |
3615 .byte -6 | |
3616 .byte -6 | |
3617 .byte -6 | |
3618 .byte -6 | |
3619 .byte -6 | |
3620 .byte -6 | |
3621 .byte -6 | |
3622 .byte -6 | |
3623 .byte -6 | |
3624 .byte -6 | |
3625 .byte -6 | |
3626 .byte -6 | |
3627 .byte -6 | |
3628 .byte -6 | |
3629 .byte -6 | |
3630 .byte -6 | |
3631 .byte -6 | |
3632 .byte -6 | |
3633 .byte -6 | |
3634 .byte -6 | |
3635 .byte -6 | |
3636 .byte -6 | |
3637 .byte -6 | |
3638 .byte -6 | |
3639 .byte -6 | |
3640 .byte -6 | |
3641 .byte -6 | |
3642 .byte -6 | |
3643 .byte -6 | |
3644 .byte -6 | |
3645 .byte -6 | |
3646 .byte -6 | |
3647 .byte -6 | |
3648 .byte -6 | |
3649 .byte -6 | |
3650 .byte -6 | |
3651 .byte -6 | |
3652 .byte -6 | |
3653 .byte -6 | |
3654 .byte -6 | |
3655 .byte -6 | |
3656 .byte -6 | |
3657 .byte -6 | |
3658 .byte -6 | |
3659 .byte -6 | |
3660 .byte -6 | |
3661 .byte -6 | |
3662 .byte -6 | |
3663 .byte -6 | |
3664 .byte -6 | |
3665 .byte -6 | |
3666 .byte -6 | |
3667 .byte -6 | |
3668 .byte -6 | |
3669 .byte -6 | |
3670 .byte -6 | |
3671 /* Lookup table translating positive divisor to index into table of | |
3672 normalized inverse. N.B. the '0' entry is also the last entry of the | |
3673 previous table, and causes an unaligned access for division by zero. */ | |
3674 LOCAL(div_table_ix): | |
3675 .byte -6 | |
3676 .byte -128 | |
3677 .byte -128 | |
3678 .byte 0 | |
3679 .byte -128 | |
3680 .byte -64 | |
3681 .byte 0 | |
3682 .byte 64 | |
3683 .byte -128 | |
3684 .byte -96 | |
3685 .byte -64 | |
3686 .byte -32 | |
3687 .byte 0 | |
3688 .byte 32 | |
3689 .byte 64 | |
3690 .byte 96 | |
3691 .byte -128 | |
3692 .byte -112 | |
3693 .byte -96 | |
3694 .byte -80 | |
3695 .byte -64 | |
3696 .byte -48 | |
3697 .byte -32 | |
3698 .byte -16 | |
3699 .byte 0 | |
3700 .byte 16 | |
3701 .byte 32 | |
3702 .byte 48 | |
3703 .byte 64 | |
3704 .byte 80 | |
3705 .byte 96 | |
3706 .byte 112 | |
3707 .byte -128 | |
3708 .byte -120 | |
3709 .byte -112 | |
3710 .byte -104 | |
3711 .byte -96 | |
3712 .byte -88 | |
3713 .byte -80 | |
3714 .byte -72 | |
3715 .byte -64 | |
3716 .byte -56 | |
3717 .byte -48 | |
3718 .byte -40 | |
3719 .byte -32 | |
3720 .byte -24 | |
3721 .byte -16 | |
3722 .byte -8 | |
3723 .byte 0 | |
3724 .byte 8 | |
3725 .byte 16 | |
3726 .byte 24 | |
3727 .byte 32 | |
3728 .byte 40 | |
3729 .byte 48 | |
3730 .byte 56 | |
3731 .byte 64 | |
3732 .byte 72 | |
3733 .byte 80 | |
3734 .byte 88 | |
3735 .byte 96 | |
3736 .byte 104 | |
3737 .byte 112 | |
3738 .byte 120 | |
3739 .byte -128 | |
3740 .byte -124 | |
3741 .byte -120 | |
3742 .byte -116 | |
3743 .byte -112 | |
3744 .byte -108 | |
3745 .byte -104 | |
3746 .byte -100 | |
3747 .byte -96 | |
3748 .byte -92 | |
3749 .byte -88 | |
3750 .byte -84 | |
3751 .byte -80 | |
3752 .byte -76 | |
3753 .byte -72 | |
3754 .byte -68 | |
3755 .byte -64 | |
3756 .byte -60 | |
3757 .byte -56 | |
3758 .byte -52 | |
3759 .byte -48 | |
3760 .byte -44 | |
3761 .byte -40 | |
3762 .byte -36 | |
3763 .byte -32 | |
3764 .byte -28 | |
3765 .byte -24 | |
3766 .byte -20 | |
3767 .byte -16 | |
3768 .byte -12 | |
3769 .byte -8 | |
3770 .byte -4 | |
3771 .byte 0 | |
3772 .byte 4 | |
3773 .byte 8 | |
3774 .byte 12 | |
3775 .byte 16 | |
3776 .byte 20 | |
3777 .byte 24 | |
3778 .byte 28 | |
3779 .byte 32 | |
3780 .byte 36 | |
3781 .byte 40 | |
3782 .byte 44 | |
3783 .byte 48 | |
3784 .byte 52 | |
3785 .byte 56 | |
3786 .byte 60 | |
3787 .byte 64 | |
3788 .byte 68 | |
3789 .byte 72 | |
3790 .byte 76 | |
3791 .byte 80 | |
3792 .byte 84 | |
3793 .byte 88 | |
3794 .byte 92 | |
3795 .byte 96 | |
3796 .byte 100 | |
3797 .byte 104 | |
3798 .byte 108 | |
3799 .byte 112 | |
3800 .byte 116 | |
3801 .byte 120 | |
3802 .byte 124 | |
3803 .byte -128 | |
3804 /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */ | |
3805 .balign 4 | |
3806 LOCAL(zero_l): | |
3807 .long 0x0 | |
3808 .long 0xF81F81F9 | |
3809 .long 0xF07C1F08 | |
3810 .long 0xE9131AC0 | |
3811 .long 0xE1E1E1E2 | |
3812 .long 0xDAE6076C | |
3813 .long 0xD41D41D5 | |
3814 .long 0xCD856891 | |
3815 .long 0xC71C71C8 | |
3816 .long 0xC0E07039 | |
3817 .long 0xBACF914D | |
3818 .long 0xB4E81B4F | |
3819 .long 0xAF286BCB | |
3820 .long 0xA98EF607 | |
3821 .long 0xA41A41A5 | |
3822 .long 0x9EC8E952 | |
3823 .long 0x9999999A | |
3824 .long 0x948B0FCE | |
3825 .long 0x8F9C18FA | |
3826 .long 0x8ACB90F7 | |
3827 .long 0x86186187 | |
3828 .long 0x81818182 | |
3829 .long 0x7D05F418 | |
3830 .long 0x78A4C818 | |
3831 .long 0x745D1746 | |
3832 .long 0x702E05C1 | |
3833 .long 0x6C16C16D | |
3834 .long 0x68168169 | |
3835 .long 0x642C8591 | |
3836 .long 0x60581606 | |
3837 .long 0x5C9882BA | |
3838 .long 0x58ED2309 | |
3839 LOCAL(div_table_inv): | |
3840 .long 0x55555556 | |
3841 .long 0x51D07EAF | |
3842 .long 0x4E5E0A73 | |
3843 .long 0x4AFD6A06 | |
3844 .long 0x47AE147B | |
3845 .long 0x446F8657 | |
3846 .long 0x41414142 | |
3847 .long 0x3E22CBCF | |
3848 .long 0x3B13B13C | |
3849 .long 0x38138139 | |
3850 .long 0x3521CFB3 | |
3851 .long 0x323E34A3 | |
3852 .long 0x2F684BDB | |
3853 .long 0x2C9FB4D9 | |
3854 .long 0x29E4129F | |
3855 .long 0x27350B89 | |
3856 .long 0x24924925 | |
3857 .long 0x21FB7813 | |
3858 .long 0x1F7047DD | |
3859 .long 0x1CF06ADB | |
3860 .long 0x1A7B9612 | |
3861 .long 0x18118119 | |
3862 .long 0x15B1E5F8 | |
3863 .long 0x135C8114 | |
3864 .long 0x11111112 | |
3865 .long 0xECF56BF | |
3866 .long 0xC9714FC | |
3867 .long 0xA6810A7 | |
3868 .long 0x8421085 | |
3869 .long 0x624DD30 | |
3870 .long 0x4104105 | |
3871 .long 0x2040811 | |
3872 /* maximum error: 0.987342 scaled: 0.921875*/ | |
3873 | |
3874 ENDFUNC(GLOBAL(sdivsi3_i4i)) | |
3875 #endif /* SH3 / SH4 */ | |
3876 | |
3877 #endif /* L_div_table */ | |
3878 | |
3879 #ifdef L_udiv_qrnnd_16 | |
3880 #if !__SHMEDIA__ | |
3881 HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16)) | |
3882 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ | |
3883 /* n1 < d, but n1 might be larger than d1. */ | |
3884 .global GLOBAL(udiv_qrnnd_16) | |
3885 .balign 8 | |
3886 GLOBAL(udiv_qrnnd_16): | |
3887 div0u | |
3888 cmp/hi r6,r0 | |
3889 bt .Lots | |
3890 .rept 16 | |
3891 div1 r6,r0 | |
3892 .endr | |
3893 extu.w r0,r1 | |
3894 bt 0f | |
3895 add r6,r0 | |
3896 0: rotcl r1 | |
3897 mulu.w r1,r5 | |
3898 xtrct r4,r0 | |
3899 swap.w r0,r0 | |
3900 sts macl,r2 | |
3901 cmp/hs r2,r0 | |
3902 sub r2,r0 | |
3903 bt 0f | |
3904 addc r5,r0 | |
3905 add #-1,r1 | |
3906 bt 0f | |
3907 1: add #-1,r1 | |
3908 rts | |
3909 add r5,r0 | |
3910 .balign 8 | |
3911 .Lots: | |
3912 sub r5,r0 | |
3913 swap.w r4,r1 | |
3914 xtrct r0,r1 | |
3915 clrt | |
3916 mov r1,r0 | |
3917 addc r5,r0 | |
3918 mov #-1,r1 | |
3919 SL1(bf, 1b, | |
3920 shlr16 r1) | |
3921 0: rts | |
3922 nop | |
3923 ENDFUNC(GLOBAL(udiv_qrnnd_16)) | |
3924 #endif /* !__SHMEDIA__ */ | |
3925 #endif /* L_udiv_qrnnd_16 */ |