comparison gcc/config/sh/lib1funcs.asm @ 0:a06113de4d67

first commit
author kent <kent@cr.ie.u-ryukyu.ac.jp>
date Fri, 17 Jul 2009 14:47:48 +0900
parents
children 77e2b8dfacca
comparison
equal deleted inserted replaced
-1:000000000000 0:a06113de4d67
1 /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
2 2004, 2005, 2006, 2009
3 Free Software Foundation, Inc.
4
5 This file is free software; you can redistribute it and/or modify it
6 under the terms of the GNU General Public License as published by the
7 Free Software Foundation; either version 3, or (at your option) any
8 later version.
9
10 This file is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24
25 !! libgcc routines for the Renesas / SuperH SH CPUs.
26 !! Contributed by Steve Chamberlain.
27 !! sac@cygnus.com
28
29 !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
30 !! recoded in assembly by Toshiyasu Morita
31 !! tm@netcom.com
32
33 /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
34 ELF local label prefixes by J"orn Rennecke
35 amylaar@cygnus.com */
36
37 #include "lib1funcs.h"
38
39 /* t-vxworks needs to build both PIC and non-PIC versions of libgcc,
40 so it is more convenient to define NO_FPSCR_VALUES here than to
41 define it on the command line. */
42 #if defined __vxworks && defined __PIC__
43 #define NO_FPSCR_VALUES
44 #endif
45
46 #if ! __SH5__
47 #ifdef L_ashiftrt
48 .global GLOBAL(ashiftrt_r4_0)
49 .global GLOBAL(ashiftrt_r4_1)
50 .global GLOBAL(ashiftrt_r4_2)
51 .global GLOBAL(ashiftrt_r4_3)
52 .global GLOBAL(ashiftrt_r4_4)
53 .global GLOBAL(ashiftrt_r4_5)
54 .global GLOBAL(ashiftrt_r4_6)
55 .global GLOBAL(ashiftrt_r4_7)
56 .global GLOBAL(ashiftrt_r4_8)
57 .global GLOBAL(ashiftrt_r4_9)
58 .global GLOBAL(ashiftrt_r4_10)
59 .global GLOBAL(ashiftrt_r4_11)
60 .global GLOBAL(ashiftrt_r4_12)
61 .global GLOBAL(ashiftrt_r4_13)
62 .global GLOBAL(ashiftrt_r4_14)
63 .global GLOBAL(ashiftrt_r4_15)
64 .global GLOBAL(ashiftrt_r4_16)
65 .global GLOBAL(ashiftrt_r4_17)
66 .global GLOBAL(ashiftrt_r4_18)
67 .global GLOBAL(ashiftrt_r4_19)
68 .global GLOBAL(ashiftrt_r4_20)
69 .global GLOBAL(ashiftrt_r4_21)
70 .global GLOBAL(ashiftrt_r4_22)
71 .global GLOBAL(ashiftrt_r4_23)
72 .global GLOBAL(ashiftrt_r4_24)
73 .global GLOBAL(ashiftrt_r4_25)
74 .global GLOBAL(ashiftrt_r4_26)
75 .global GLOBAL(ashiftrt_r4_27)
76 .global GLOBAL(ashiftrt_r4_28)
77 .global GLOBAL(ashiftrt_r4_29)
78 .global GLOBAL(ashiftrt_r4_30)
79 .global GLOBAL(ashiftrt_r4_31)
80 .global GLOBAL(ashiftrt_r4_32)
81
82 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_0))
83 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_1))
84 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_2))
85 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_3))
86 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_4))
87 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_5))
88 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_6))
89 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_7))
90 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_8))
91 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_9))
92 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_10))
93 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_11))
94 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_12))
95 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_13))
96 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_14))
97 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_15))
98 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_16))
99 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_17))
100 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_18))
101 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_19))
102 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_20))
103 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_21))
104 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_22))
105 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_23))
106 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_24))
107 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_25))
108 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_26))
109 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_27))
110 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_28))
111 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_29))
112 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_30))
113 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_31))
114 HIDDEN_FUNC(GLOBAL(ashiftrt_r4_32))
115
116 .align 1
117 GLOBAL(ashiftrt_r4_32):
118 GLOBAL(ashiftrt_r4_31):
119 rotcl r4
120 rts
121 subc r4,r4
122
123 GLOBAL(ashiftrt_r4_30):
124 shar r4
125 GLOBAL(ashiftrt_r4_29):
126 shar r4
127 GLOBAL(ashiftrt_r4_28):
128 shar r4
129 GLOBAL(ashiftrt_r4_27):
130 shar r4
131 GLOBAL(ashiftrt_r4_26):
132 shar r4
133 GLOBAL(ashiftrt_r4_25):
134 shar r4
135 GLOBAL(ashiftrt_r4_24):
136 shlr16 r4
137 shlr8 r4
138 rts
139 exts.b r4,r4
140
141 GLOBAL(ashiftrt_r4_23):
142 shar r4
143 GLOBAL(ashiftrt_r4_22):
144 shar r4
145 GLOBAL(ashiftrt_r4_21):
146 shar r4
147 GLOBAL(ashiftrt_r4_20):
148 shar r4
149 GLOBAL(ashiftrt_r4_19):
150 shar r4
151 GLOBAL(ashiftrt_r4_18):
152 shar r4
153 GLOBAL(ashiftrt_r4_17):
154 shar r4
155 GLOBAL(ashiftrt_r4_16):
156 shlr16 r4
157 rts
158 exts.w r4,r4
159
160 GLOBAL(ashiftrt_r4_15):
161 shar r4
162 GLOBAL(ashiftrt_r4_14):
163 shar r4
164 GLOBAL(ashiftrt_r4_13):
165 shar r4
166 GLOBAL(ashiftrt_r4_12):
167 shar r4
168 GLOBAL(ashiftrt_r4_11):
169 shar r4
170 GLOBAL(ashiftrt_r4_10):
171 shar r4
172 GLOBAL(ashiftrt_r4_9):
173 shar r4
174 GLOBAL(ashiftrt_r4_8):
175 shar r4
176 GLOBAL(ashiftrt_r4_7):
177 shar r4
178 GLOBAL(ashiftrt_r4_6):
179 shar r4
180 GLOBAL(ashiftrt_r4_5):
181 shar r4
182 GLOBAL(ashiftrt_r4_4):
183 shar r4
184 GLOBAL(ashiftrt_r4_3):
185 shar r4
186 GLOBAL(ashiftrt_r4_2):
187 shar r4
188 GLOBAL(ashiftrt_r4_1):
189 rts
190 shar r4
191
192 GLOBAL(ashiftrt_r4_0):
193 rts
194 nop
195
196 ENDFUNC(GLOBAL(ashiftrt_r4_0))
197 ENDFUNC(GLOBAL(ashiftrt_r4_1))
198 ENDFUNC(GLOBAL(ashiftrt_r4_2))
199 ENDFUNC(GLOBAL(ashiftrt_r4_3))
200 ENDFUNC(GLOBAL(ashiftrt_r4_4))
201 ENDFUNC(GLOBAL(ashiftrt_r4_5))
202 ENDFUNC(GLOBAL(ashiftrt_r4_6))
203 ENDFUNC(GLOBAL(ashiftrt_r4_7))
204 ENDFUNC(GLOBAL(ashiftrt_r4_8))
205 ENDFUNC(GLOBAL(ashiftrt_r4_9))
206 ENDFUNC(GLOBAL(ashiftrt_r4_10))
207 ENDFUNC(GLOBAL(ashiftrt_r4_11))
208 ENDFUNC(GLOBAL(ashiftrt_r4_12))
209 ENDFUNC(GLOBAL(ashiftrt_r4_13))
210 ENDFUNC(GLOBAL(ashiftrt_r4_14))
211 ENDFUNC(GLOBAL(ashiftrt_r4_15))
212 ENDFUNC(GLOBAL(ashiftrt_r4_16))
213 ENDFUNC(GLOBAL(ashiftrt_r4_17))
214 ENDFUNC(GLOBAL(ashiftrt_r4_18))
215 ENDFUNC(GLOBAL(ashiftrt_r4_19))
216 ENDFUNC(GLOBAL(ashiftrt_r4_20))
217 ENDFUNC(GLOBAL(ashiftrt_r4_21))
218 ENDFUNC(GLOBAL(ashiftrt_r4_22))
219 ENDFUNC(GLOBAL(ashiftrt_r4_23))
220 ENDFUNC(GLOBAL(ashiftrt_r4_24))
221 ENDFUNC(GLOBAL(ashiftrt_r4_25))
222 ENDFUNC(GLOBAL(ashiftrt_r4_26))
223 ENDFUNC(GLOBAL(ashiftrt_r4_27))
224 ENDFUNC(GLOBAL(ashiftrt_r4_28))
225 ENDFUNC(GLOBAL(ashiftrt_r4_29))
226 ENDFUNC(GLOBAL(ashiftrt_r4_30))
227 ENDFUNC(GLOBAL(ashiftrt_r4_31))
228 ENDFUNC(GLOBAL(ashiftrt_r4_32))
229 #endif
230
231 #ifdef L_ashiftrt_n
232
233 !
234 ! GLOBAL(ashrsi3)
235 !
236 ! Entry:
237 !
238 ! r4: Value to shift
239 ! r5: Shifts
240 !
241 ! Exit:
242 !
243 ! r0: Result
244 !
245 ! Destroys:
246 !
247 ! (none)
248 !
249
250 .global GLOBAL(ashrsi3)
251 HIDDEN_FUNC(GLOBAL(ashrsi3))
252 .align 2
253 GLOBAL(ashrsi3):
254 mov #31,r0
255 and r0,r5
256 mova LOCAL(ashrsi3_table),r0
257 mov.b @(r0,r5),r5
258 #ifdef __sh1__
259 add r5,r0
260 jmp @r0
261 #else
262 braf r5
263 #endif
264 mov r4,r0
265
266 .align 2
267 LOCAL(ashrsi3_table):
268 .byte LOCAL(ashrsi3_0)-LOCAL(ashrsi3_table)
269 .byte LOCAL(ashrsi3_1)-LOCAL(ashrsi3_table)
270 .byte LOCAL(ashrsi3_2)-LOCAL(ashrsi3_table)
271 .byte LOCAL(ashrsi3_3)-LOCAL(ashrsi3_table)
272 .byte LOCAL(ashrsi3_4)-LOCAL(ashrsi3_table)
273 .byte LOCAL(ashrsi3_5)-LOCAL(ashrsi3_table)
274 .byte LOCAL(ashrsi3_6)-LOCAL(ashrsi3_table)
275 .byte LOCAL(ashrsi3_7)-LOCAL(ashrsi3_table)
276 .byte LOCAL(ashrsi3_8)-LOCAL(ashrsi3_table)
277 .byte LOCAL(ashrsi3_9)-LOCAL(ashrsi3_table)
278 .byte LOCAL(ashrsi3_10)-LOCAL(ashrsi3_table)
279 .byte LOCAL(ashrsi3_11)-LOCAL(ashrsi3_table)
280 .byte LOCAL(ashrsi3_12)-LOCAL(ashrsi3_table)
281 .byte LOCAL(ashrsi3_13)-LOCAL(ashrsi3_table)
282 .byte LOCAL(ashrsi3_14)-LOCAL(ashrsi3_table)
283 .byte LOCAL(ashrsi3_15)-LOCAL(ashrsi3_table)
284 .byte LOCAL(ashrsi3_16)-LOCAL(ashrsi3_table)
285 .byte LOCAL(ashrsi3_17)-LOCAL(ashrsi3_table)
286 .byte LOCAL(ashrsi3_18)-LOCAL(ashrsi3_table)
287 .byte LOCAL(ashrsi3_19)-LOCAL(ashrsi3_table)
288 .byte LOCAL(ashrsi3_20)-LOCAL(ashrsi3_table)
289 .byte LOCAL(ashrsi3_21)-LOCAL(ashrsi3_table)
290 .byte LOCAL(ashrsi3_22)-LOCAL(ashrsi3_table)
291 .byte LOCAL(ashrsi3_23)-LOCAL(ashrsi3_table)
292 .byte LOCAL(ashrsi3_24)-LOCAL(ashrsi3_table)
293 .byte LOCAL(ashrsi3_25)-LOCAL(ashrsi3_table)
294 .byte LOCAL(ashrsi3_26)-LOCAL(ashrsi3_table)
295 .byte LOCAL(ashrsi3_27)-LOCAL(ashrsi3_table)
296 .byte LOCAL(ashrsi3_28)-LOCAL(ashrsi3_table)
297 .byte LOCAL(ashrsi3_29)-LOCAL(ashrsi3_table)
298 .byte LOCAL(ashrsi3_30)-LOCAL(ashrsi3_table)
299 .byte LOCAL(ashrsi3_31)-LOCAL(ashrsi3_table)
300
301 LOCAL(ashrsi3_31):
302 rotcl r0
303 rts
304 subc r0,r0
305
306 LOCAL(ashrsi3_30):
307 shar r0
308 LOCAL(ashrsi3_29):
309 shar r0
310 LOCAL(ashrsi3_28):
311 shar r0
312 LOCAL(ashrsi3_27):
313 shar r0
314 LOCAL(ashrsi3_26):
315 shar r0
316 LOCAL(ashrsi3_25):
317 shar r0
318 LOCAL(ashrsi3_24):
319 shlr16 r0
320 shlr8 r0
321 rts
322 exts.b r0,r0
323
324 LOCAL(ashrsi3_23):
325 shar r0
326 LOCAL(ashrsi3_22):
327 shar r0
328 LOCAL(ashrsi3_21):
329 shar r0
330 LOCAL(ashrsi3_20):
331 shar r0
332 LOCAL(ashrsi3_19):
333 shar r0
334 LOCAL(ashrsi3_18):
335 shar r0
336 LOCAL(ashrsi3_17):
337 shar r0
338 LOCAL(ashrsi3_16):
339 shlr16 r0
340 rts
341 exts.w r0,r0
342
343 LOCAL(ashrsi3_15):
344 shar r0
345 LOCAL(ashrsi3_14):
346 shar r0
347 LOCAL(ashrsi3_13):
348 shar r0
349 LOCAL(ashrsi3_12):
350 shar r0
351 LOCAL(ashrsi3_11):
352 shar r0
353 LOCAL(ashrsi3_10):
354 shar r0
355 LOCAL(ashrsi3_9):
356 shar r0
357 LOCAL(ashrsi3_8):
358 shar r0
359 LOCAL(ashrsi3_7):
360 shar r0
361 LOCAL(ashrsi3_6):
362 shar r0
363 LOCAL(ashrsi3_5):
364 shar r0
365 LOCAL(ashrsi3_4):
366 shar r0
367 LOCAL(ashrsi3_3):
368 shar r0
369 LOCAL(ashrsi3_2):
370 shar r0
371 LOCAL(ashrsi3_1):
372 rts
373 shar r0
374
375 LOCAL(ashrsi3_0):
376 rts
377 nop
378
379 ENDFUNC(GLOBAL(ashrsi3))
380 #endif
381
382 #ifdef L_ashiftlt
383
384 !
385 ! GLOBAL(ashlsi3)
386 !
387 ! Entry:
388 !
389 ! r4: Value to shift
390 ! r5: Shifts
391 !
392 ! Exit:
393 !
394 ! r0: Result
395 !
396 ! Destroys:
397 !
398 ! (none)
399 !
400 .global GLOBAL(ashlsi3)
401 HIDDEN_FUNC(GLOBAL(ashlsi3))
402 .align 2
403 GLOBAL(ashlsi3):
404 mov #31,r0
405 and r0,r5
406 mova LOCAL(ashlsi3_table),r0
407 mov.b @(r0,r5),r5
408 #ifdef __sh1__
409 add r5,r0
410 jmp @r0
411 #else
412 braf r5
413 #endif
414 mov r4,r0
415
416 .align 2
417 LOCAL(ashlsi3_table):
418 .byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table)
419 .byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table)
420 .byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table)
421 .byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table)
422 .byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table)
423 .byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table)
424 .byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
425 .byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
426 .byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
427 .byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
428 .byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
429 .byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
430 .byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
431 .byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
432 .byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
433 .byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
434 .byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
435 .byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
436 .byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
437 .byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
438 .byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
439 .byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
440 .byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
441 .byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
442 .byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
443 .byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
444 .byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
445 .byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
446 .byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
447 .byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
448 .byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
449 .byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
450
451 LOCAL(ashlsi3_6):
452 shll2 r0
453 LOCAL(ashlsi3_4):
454 shll2 r0
455 LOCAL(ashlsi3_2):
456 rts
457 shll2 r0
458
459 LOCAL(ashlsi3_7):
460 shll2 r0
461 LOCAL(ashlsi3_5):
462 shll2 r0
463 LOCAL(ashlsi3_3):
464 shll2 r0
465 LOCAL(ashlsi3_1):
466 rts
467 shll r0
468
469 LOCAL(ashlsi3_14):
470 shll2 r0
471 LOCAL(ashlsi3_12):
472 shll2 r0
473 LOCAL(ashlsi3_10):
474 shll2 r0
475 LOCAL(ashlsi3_8):
476 rts
477 shll8 r0
478
479 LOCAL(ashlsi3_15):
480 shll2 r0
481 LOCAL(ashlsi3_13):
482 shll2 r0
483 LOCAL(ashlsi3_11):
484 shll2 r0
485 LOCAL(ashlsi3_9):
486 shll8 r0
487 rts
488 shll r0
489
490 LOCAL(ashlsi3_22):
491 shll2 r0
492 LOCAL(ashlsi3_20):
493 shll2 r0
494 LOCAL(ashlsi3_18):
495 shll2 r0
496 LOCAL(ashlsi3_16):
497 rts
498 shll16 r0
499
500 LOCAL(ashlsi3_23):
501 shll2 r0
502 LOCAL(ashlsi3_21):
503 shll2 r0
504 LOCAL(ashlsi3_19):
505 shll2 r0
506 LOCAL(ashlsi3_17):
507 shll16 r0
508 rts
509 shll r0
510
511 LOCAL(ashlsi3_30):
512 shll2 r0
513 LOCAL(ashlsi3_28):
514 shll2 r0
515 LOCAL(ashlsi3_26):
516 shll2 r0
517 LOCAL(ashlsi3_24):
518 shll16 r0
519 rts
520 shll8 r0
521
522 LOCAL(ashlsi3_31):
523 shll2 r0
524 LOCAL(ashlsi3_29):
525 shll2 r0
526 LOCAL(ashlsi3_27):
527 shll2 r0
528 LOCAL(ashlsi3_25):
529 shll16 r0
530 shll8 r0
531 rts
532 shll r0
533
534 LOCAL(ashlsi3_0):
535 rts
536 nop
537
538 ENDFUNC(GLOBAL(ashlsi3))
539 #endif
540
541 #ifdef L_lshiftrt
542
543 !
544 ! GLOBAL(lshrsi3)
545 !
546 ! Entry:
547 !
548 ! r4: Value to shift
549 ! r5: Shifts
550 !
551 ! Exit:
552 !
553 ! r0: Result
554 !
555 ! Destroys:
556 !
557 ! (none)
558 !
559 .global GLOBAL(lshrsi3)
560 HIDDEN_FUNC(GLOBAL(lshrsi3))
561 .align 2
562 GLOBAL(lshrsi3):
563 mov #31,r0
564 and r0,r5
565 mova LOCAL(lshrsi3_table),r0
566 mov.b @(r0,r5),r5
567 #ifdef __sh1__
568 add r5,r0
569 jmp @r0
570 #else
571 braf r5
572 #endif
573 mov r4,r0
574
575 .align 2
576 LOCAL(lshrsi3_table):
577 .byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table)
578 .byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table)
579 .byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table)
580 .byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table)
581 .byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table)
582 .byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table)
583 .byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
584 .byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
585 .byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
586 .byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
587 .byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
588 .byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
589 .byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
590 .byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
591 .byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
592 .byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
593 .byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
594 .byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
595 .byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
596 .byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
597 .byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
598 .byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
599 .byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
600 .byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
601 .byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
602 .byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
603 .byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
604 .byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
605 .byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
606 .byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
607 .byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
608 .byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
609
610 LOCAL(lshrsi3_6):
611 shlr2 r0
612 LOCAL(lshrsi3_4):
613 shlr2 r0
614 LOCAL(lshrsi3_2):
615 rts
616 shlr2 r0
617
618 LOCAL(lshrsi3_7):
619 shlr2 r0
620 LOCAL(lshrsi3_5):
621 shlr2 r0
622 LOCAL(lshrsi3_3):
623 shlr2 r0
624 LOCAL(lshrsi3_1):
625 rts
626 shlr r0
627
628 LOCAL(lshrsi3_14):
629 shlr2 r0
630 LOCAL(lshrsi3_12):
631 shlr2 r0
632 LOCAL(lshrsi3_10):
633 shlr2 r0
634 LOCAL(lshrsi3_8):
635 rts
636 shlr8 r0
637
638 LOCAL(lshrsi3_15):
639 shlr2 r0
640 LOCAL(lshrsi3_13):
641 shlr2 r0
642 LOCAL(lshrsi3_11):
643 shlr2 r0
644 LOCAL(lshrsi3_9):
645 shlr8 r0
646 rts
647 shlr r0
648
649 LOCAL(lshrsi3_22):
650 shlr2 r0
651 LOCAL(lshrsi3_20):
652 shlr2 r0
653 LOCAL(lshrsi3_18):
654 shlr2 r0
655 LOCAL(lshrsi3_16):
656 rts
657 shlr16 r0
658
659 LOCAL(lshrsi3_23):
660 shlr2 r0
661 LOCAL(lshrsi3_21):
662 shlr2 r0
663 LOCAL(lshrsi3_19):
664 shlr2 r0
665 LOCAL(lshrsi3_17):
666 shlr16 r0
667 rts
668 shlr r0
669
670 LOCAL(lshrsi3_30):
671 shlr2 r0
672 LOCAL(lshrsi3_28):
673 shlr2 r0
674 LOCAL(lshrsi3_26):
675 shlr2 r0
676 LOCAL(lshrsi3_24):
677 shlr16 r0
678 rts
679 shlr8 r0
680
681 LOCAL(lshrsi3_31):
682 shlr2 r0
683 LOCAL(lshrsi3_29):
684 shlr2 r0
685 LOCAL(lshrsi3_27):
686 shlr2 r0
687 LOCAL(lshrsi3_25):
688 shlr16 r0
689 shlr8 r0
690 rts
691 shlr r0
692
693 LOCAL(lshrsi3_0):
694 rts
695 nop
696
697 ENDFUNC(GLOBAL(lshrsi3))
698 #endif
699
700 #ifdef L_movmem
701 .text
702 .balign 4
703 .global GLOBAL(movmem)
704 HIDDEN_FUNC(GLOBAL(movmem))
705 HIDDEN_ALIAS(movstr,movmem)
706 /* This would be a lot simpler if r6 contained the byte count
707 minus 64, and we wouldn't be called here for a byte count of 64. */
708 GLOBAL(movmem):
709 sts.l pr,@-r15
710 shll2 r6
711 bsr GLOBAL(movmemSI52+2)
712 mov.l @(48,r5),r0
713 .balign 4
714 LOCAL(movmem_loop): /* Reached with rts */
715 mov.l @(60,r5),r0
716 add #-64,r6
717 mov.l r0,@(60,r4)
718 tst r6,r6
719 mov.l @(56,r5),r0
720 bt LOCAL(movmem_done)
721 mov.l r0,@(56,r4)
722 cmp/pl r6
723 mov.l @(52,r5),r0
724 add #64,r5
725 mov.l r0,@(52,r4)
726 add #64,r4
727 bt GLOBAL(movmemSI52)
728 ! done all the large groups, do the remainder
729 ! jump to movmem+
730 mova GLOBAL(movmemSI4)+4,r0
731 add r6,r0
732 jmp @r0
733 LOCAL(movmem_done): ! share slot insn, works out aligned.
734 lds.l @r15+,pr
735 mov.l r0,@(56,r4)
736 mov.l @(52,r5),r0
737 rts
738 mov.l r0,@(52,r4)
739 .balign 4
740 ! ??? We need aliases movstr* for movmem* for the older libraries. These
741 ! aliases will be removed at the some point in the future.
742 .global GLOBAL(movmemSI64)
743 HIDDEN_FUNC(GLOBAL(movmemSI64))
744 HIDDEN_ALIAS(movstrSI64,movmemSI64)
745 GLOBAL(movmemSI64):
746 mov.l @(60,r5),r0
747 mov.l r0,@(60,r4)
748 .global GLOBAL(movmemSI60)
749 HIDDEN_FUNC(GLOBAL(movmemSI60))
750 HIDDEN_ALIAS(movstrSI60,movmemSI60)
751 GLOBAL(movmemSI60):
752 mov.l @(56,r5),r0
753 mov.l r0,@(56,r4)
754 .global GLOBAL(movmemSI56)
755 HIDDEN_FUNC(GLOBAL(movmemSI56))
756 HIDDEN_ALIAS(movstrSI56,movmemSI56)
757 GLOBAL(movmemSI56):
758 mov.l @(52,r5),r0
759 mov.l r0,@(52,r4)
760 .global GLOBAL(movmemSI52)
761 HIDDEN_FUNC(GLOBAL(movmemSI52))
762 HIDDEN_ALIAS(movstrSI52,movmemSI52)
763 GLOBAL(movmemSI52):
764 mov.l @(48,r5),r0
765 mov.l r0,@(48,r4)
766 .global GLOBAL(movmemSI48)
767 HIDDEN_FUNC(GLOBAL(movmemSI48))
768 HIDDEN_ALIAS(movstrSI48,movmemSI48)
769 GLOBAL(movmemSI48):
770 mov.l @(44,r5),r0
771 mov.l r0,@(44,r4)
772 .global GLOBAL(movmemSI44)
773 HIDDEN_FUNC(GLOBAL(movmemSI44))
774 HIDDEN_ALIAS(movstrSI44,movmemSI44)
775 GLOBAL(movmemSI44):
776 mov.l @(40,r5),r0
777 mov.l r0,@(40,r4)
778 .global GLOBAL(movmemSI40)
779 HIDDEN_FUNC(GLOBAL(movmemSI40))
780 HIDDEN_ALIAS(movstrSI40,movmemSI40)
781 GLOBAL(movmemSI40):
782 mov.l @(36,r5),r0
783 mov.l r0,@(36,r4)
784 .global GLOBAL(movmemSI36)
785 HIDDEN_FUNC(GLOBAL(movmemSI36))
786 HIDDEN_ALIAS(movstrSI36,movmemSI36)
787 GLOBAL(movmemSI36):
788 mov.l @(32,r5),r0
789 mov.l r0,@(32,r4)
790 .global GLOBAL(movmemSI32)
791 HIDDEN_FUNC(GLOBAL(movmemSI32))
792 HIDDEN_ALIAS(movstrSI32,movmemSI32)
793 GLOBAL(movmemSI32):
794 mov.l @(28,r5),r0
795 mov.l r0,@(28,r4)
796 .global GLOBAL(movmemSI28)
797 HIDDEN_FUNC(GLOBAL(movmemSI28))
798 HIDDEN_ALIAS(movstrSI28,movmemSI28)
799 GLOBAL(movmemSI28):
800 mov.l @(24,r5),r0
801 mov.l r0,@(24,r4)
802 .global GLOBAL(movmemSI24)
803 HIDDEN_FUNC(GLOBAL(movmemSI24))
804 HIDDEN_ALIAS(movstrSI24,movmemSI24)
805 GLOBAL(movmemSI24):
806 mov.l @(20,r5),r0
807 mov.l r0,@(20,r4)
808 .global GLOBAL(movmemSI20)
809 HIDDEN_FUNC(GLOBAL(movmemSI20))
810 HIDDEN_ALIAS(movstrSI20,movmemSI20)
811 GLOBAL(movmemSI20):
812 mov.l @(16,r5),r0
813 mov.l r0,@(16,r4)
814 .global GLOBAL(movmemSI16)
815 HIDDEN_FUNC(GLOBAL(movmemSI16))
816 HIDDEN_ALIAS(movstrSI16,movmemSI16)
817 GLOBAL(movmemSI16):
818 mov.l @(12,r5),r0
819 mov.l r0,@(12,r4)
820 .global GLOBAL(movmemSI12)
821 HIDDEN_FUNC(GLOBAL(movmemSI12))
822 HIDDEN_ALIAS(movstrSI12,movmemSI12)
823 GLOBAL(movmemSI12):
824 mov.l @(8,r5),r0
825 mov.l r0,@(8,r4)
826 .global GLOBAL(movmemSI8)
827 HIDDEN_FUNC(GLOBAL(movmemSI8))
828 HIDDEN_ALIAS(movstrSI8,movmemSI8)
829 GLOBAL(movmemSI8):
830 mov.l @(4,r5),r0
831 mov.l r0,@(4,r4)
832 .global GLOBAL(movmemSI4)
833 HIDDEN_FUNC(GLOBAL(movmemSI4))
834 HIDDEN_ALIAS(movstrSI4,movmemSI4)
835 GLOBAL(movmemSI4):
836 mov.l @(0,r5),r0
837 rts
838 mov.l r0,@(0,r4)
839
840 ENDFUNC(GLOBAL(movmemSI64))
841 ENDFUNC(GLOBAL(movmemSI60))
842 ENDFUNC(GLOBAL(movmemSI56))
843 ENDFUNC(GLOBAL(movmemSI52))
844 ENDFUNC(GLOBAL(movmemSI48))
845 ENDFUNC(GLOBAL(movmemSI44))
846 ENDFUNC(GLOBAL(movmemSI40))
847 ENDFUNC(GLOBAL(movmemSI36))
848 ENDFUNC(GLOBAL(movmemSI32))
849 ENDFUNC(GLOBAL(movmemSI28))
850 ENDFUNC(GLOBAL(movmemSI24))
851 ENDFUNC(GLOBAL(movmemSI20))
852 ENDFUNC(GLOBAL(movmemSI16))
853 ENDFUNC(GLOBAL(movmemSI12))
854 ENDFUNC(GLOBAL(movmemSI8))
855 ENDFUNC(GLOBAL(movmemSI4))
856 ENDFUNC(GLOBAL(movmem))
857 #endif
858
859 #ifdef L_movmem_i4
860 .text
861 .global GLOBAL(movmem_i4_even)
862 .global GLOBAL(movmem_i4_odd)
863 .global GLOBAL(movmemSI12_i4)
864
865 HIDDEN_FUNC(GLOBAL(movmem_i4_even))
866 HIDDEN_FUNC(GLOBAL(movmem_i4_odd))
867 HIDDEN_FUNC(GLOBAL(movmemSI12_i4))
868
869 HIDDEN_ALIAS(movstr_i4_even,movmem_i4_even)
870 HIDDEN_ALIAS(movstr_i4_odd,movmem_i4_odd)
871 HIDDEN_ALIAS(movstrSI12_i4,movmemSI12_i4)
872
873 .p2align 5
874 L_movmem_2mod4_end:
875 mov.l r0,@(16,r4)
876 rts
877 mov.l r1,@(20,r4)
878
879 .p2align 2
880
881 GLOBAL(movmem_i4_even):
882 mov.l @r5+,r0
883 bra L_movmem_start_even
884 mov.l @r5+,r1
885
886 GLOBAL(movmem_i4_odd):
887 mov.l @r5+,r1
888 add #-4,r4
889 mov.l @r5+,r2
890 mov.l @r5+,r3
891 mov.l r1,@(4,r4)
892 mov.l r2,@(8,r4)
893
894 L_movmem_loop:
895 mov.l r3,@(12,r4)
896 dt r6
897 mov.l @r5+,r0
898 bt/s L_movmem_2mod4_end
899 mov.l @r5+,r1
900 add #16,r4
901 L_movmem_start_even:
902 mov.l @r5+,r2
903 mov.l @r5+,r3
904 mov.l r0,@r4
905 dt r6
906 mov.l r1,@(4,r4)
907 bf/s L_movmem_loop
908 mov.l r2,@(8,r4)
909 rts
910 mov.l r3,@(12,r4)
911
912 ENDFUNC(GLOBAL(movmem_i4_even))
913 ENDFUNC(GLOBAL(movmem_i4_odd))
914
915 .p2align 4
916 GLOBAL(movmemSI12_i4):
917 mov.l @r5,r0
918 mov.l @(4,r5),r1
919 mov.l @(8,r5),r2
920 mov.l r0,@r4
921 mov.l r1,@(4,r4)
922 rts
923 mov.l r2,@(8,r4)
924
925 ENDFUNC(GLOBAL(movmemSI12_i4))
926 #endif
927
928 #ifdef L_mulsi3
929
930
931 .global GLOBAL(mulsi3)
932 HIDDEN_FUNC(GLOBAL(mulsi3))
933
934 ! r4 = aabb
935 ! r5 = ccdd
936 ! r0 = aabb*ccdd via partial products
937 !
938 ! if aa == 0 and cc = 0
939 ! r0 = bb*dd
940 !
941 ! else
942 ! aa = bb*dd + (aa*dd*65536) + (cc*bb*65536)
943 !
944
945 GLOBAL(mulsi3):
946 mulu.w r4,r5 ! multiply the lsws macl=bb*dd
947 mov r5,r3 ! r3 = ccdd
948 swap.w r4,r2 ! r2 = bbaa
949 xtrct r2,r3 ! r3 = aacc
950 tst r3,r3 ! msws zero ?
951 bf hiset
952 rts ! yes - then we have the answer
953 sts macl,r0
954
955 hiset: sts macl,r0 ! r0 = bb*dd
956 mulu.w r2,r5 ! brewing macl = aa*dd
957 sts macl,r1
958 mulu.w r3,r4 ! brewing macl = cc*bb
959 sts macl,r2
960 add r1,r2
961 shll16 r2
962 rts
963 add r2,r0
964
965 ENDFUNC(GLOBAL(mulsi3))
966 #endif
967 #endif /* ! __SH5__ */
968 #ifdef L_sdivsi3_i4
969 .title "SH DIVIDE"
970 !! 4 byte integer Divide code for the Renesas SH
971 #ifdef __SH4__
972 !! args in r4 and r5, result in fpul, clobber dr0, dr2
973
974 .global GLOBAL(sdivsi3_i4)
975 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
976 GLOBAL(sdivsi3_i4):
977 lds r4,fpul
978 float fpul,dr0
979 lds r5,fpul
980 float fpul,dr2
981 fdiv dr2,dr0
982 rts
983 ftrc dr0,fpul
984
985 ENDFUNC(GLOBAL(sdivsi3_i4))
986 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
987 !! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
988
989 #if ! __SH5__ || __SH5__ == 32
990 #if __SH5__
991 .mode SHcompact
992 #endif
993 .global GLOBAL(sdivsi3_i4)
994 HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
995 GLOBAL(sdivsi3_i4):
996 sts.l fpscr,@-r15
997 mov #8,r2
998 swap.w r2,r2
999 lds r2,fpscr
1000 lds r4,fpul
1001 float fpul,dr0
1002 lds r5,fpul
1003 float fpul,dr2
1004 fdiv dr2,dr0
1005 ftrc dr0,fpul
1006 rts
1007 lds.l @r15+,fpscr
1008
1009 ENDFUNC(GLOBAL(sdivsi3_i4))
1010 #endif /* ! __SH5__ || __SH5__ == 32 */
1011 #endif /* ! __SH4__ */
1012 #endif
1013
1014 #ifdef L_sdivsi3
1015 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1016 sh2e/sh3e code. */
1017 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1018 !!
1019 !! Steve Chamberlain
1020 !! sac@cygnus.com
1021 !!
1022 !!
1023
1024 !! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
1025
1026 .global GLOBAL(sdivsi3)
1027 #if __SHMEDIA__
1028 #if __SH5__ == 32
1029 .section .text..SHmedia32,"ax"
1030 #else
1031 .text
1032 #endif
1033 .align 2
1034 #if 0
1035 /* The assembly code that follows is a hand-optimized version of the C
1036 code that follows. Note that the registers that are modified are
1037 exactly those listed as clobbered in the patterns divsi3_i1 and
1038 divsi3_i1_media.
1039
1040 int __sdivsi3 (i, j)
1041 int i, j;
1042 {
1043 register unsigned long long r18 asm ("r18");
1044 register unsigned long long r19 asm ("r19");
1045 register unsigned long long r0 asm ("r0") = 0;
1046 register unsigned long long r1 asm ("r1") = 1;
1047 register int r2 asm ("r2") = i >> 31;
1048 register int r3 asm ("r3") = j >> 31;
1049
1050 r2 = r2 ? r2 : r1;
1051 r3 = r3 ? r3 : r1;
1052 r18 = i * r2;
1053 r19 = j * r3;
1054 r2 *= r3;
1055
1056 r19 <<= 31;
1057 r1 <<= 31;
1058 do
1059 if (r18 >= r19)
1060 r0 |= r1, r18 -= r19;
1061 while (r19 >>= 1, r1 >>= 1);
1062
1063 return r2 * (int)r0;
1064 }
1065 */
1066 GLOBAL(sdivsi3):
1067 pt/l LOCAL(sdivsi3_dontadd), tr2
1068 pt/l LOCAL(sdivsi3_loop), tr1
1069 ptabs/l r18, tr0
1070 movi 0, r0
1071 movi 1, r1
1072 shari.l r4, 31, r2
1073 shari.l r5, 31, r3
1074 cmveq r2, r1, r2
1075 cmveq r3, r1, r3
1076 muls.l r4, r2, r18
1077 muls.l r5, r3, r19
1078 muls.l r2, r3, r2
1079 shlli r19, 31, r19
1080 shlli r1, 31, r1
1081 LOCAL(sdivsi3_loop):
1082 bgtu r19, r18, tr2
1083 or r0, r1, r0
1084 sub r18, r19, r18
1085 LOCAL(sdivsi3_dontadd):
1086 shlri r1, 1, r1
1087 shlri r19, 1, r19
1088 bnei r1, 0, tr1
1089 muls.l r0, r2, r0
1090 add.l r0, r63, r0
1091 blink tr0, r63
1092 #elif 0 /* ! 0 */
1093 // inputs: r4,r5
1094 // clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
1095 // result in r0
1096 GLOBAL(sdivsi3):
1097 // can create absolute value without extra latency,
1098 // but dependent on proper sign extension of inputs:
1099 // shari.l r5,31,r2
1100 // xor r5,r2,r20
1101 // sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
1102 shari.l r5,31,r2
1103 ori r2,1,r2
1104 muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
1105 movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
1106 shari.l r4,31,r3
1107 nsb r20,r0
1108 shlld r20,r0,r25
1109 shlri r25,48,r25
1110 sub r19,r25,r1
1111 mmulfx.w r1,r1,r2
1112 mshflo.w r1,r63,r1
1113 // If r4 was to be used in-place instead of r21, could use this sequence
1114 // to compute absolute:
1115 // sub r63,r4,r19 // compute absolute value of r4
1116 // shlri r4,32,r3 // into lower 32 bit of r4, keeping
1117 // mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
1118 ori r3,1,r3
1119 mmulfx.w r25,r2,r2
1120 sub r19,r0,r0
1121 muls.l r4,r3,r21
1122 msub.w r1,r2,r2
1123 addi r2,-2,r1
1124 mulu.l r21,r1,r19
1125 mmulfx.w r2,r2,r2
1126 shlli r1,15,r1
1127 shlrd r19,r0,r19
1128 mulu.l r19,r20,r3
1129 mmacnfx.wl r25,r2,r1
1130 ptabs r18,tr0
1131 sub r21,r3,r25
1132
1133 mulu.l r25,r1,r2
1134 addi r0,14,r0
1135 xor r4,r5,r18
1136 shlrd r2,r0,r2
1137 mulu.l r2,r20,r3
1138 add r19,r2,r19
1139 shari.l r18,31,r18
1140 sub r25,r3,r25
1141
1142 mulu.l r25,r1,r2
1143 sub r25,r20,r25
1144 add r19,r18,r19
1145 shlrd r2,r0,r2
1146 mulu.l r2,r20,r3
1147 addi r25,1,r25
1148 add r19,r2,r19
1149
1150 cmpgt r25,r3,r25
1151 add.l r19,r25,r0
1152 xor r0,r18,r0
1153 blink tr0,r63
1154 #else /* ! 0 && ! 0 */
1155
1156 // inputs: r4,r5
1157 // clobbered: r1,r18,r19,r20,r21,r25,tr0
1158 // result in r0
1159 HIDDEN_FUNC(GLOBAL(sdivsi3_2))
1160 #ifndef __pic__
1161 FUNC(GLOBAL(sdivsi3))
1162 GLOBAL(sdivsi3): /* this is the shcompact entry point */
1163 // The special SHmedia entry point sdivsi3_1 prevents accidental linking
1164 // with the SHcompact implementation, which clobbers tr1 / tr2.
1165 .global GLOBAL(sdivsi3_1)
1166 GLOBAL(sdivsi3_1):
1167 .global GLOBAL(div_table_internal)
1168 movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
1169 shori GLOBAL(div_table_internal) & 65535, r20
1170 #endif
1171 .global GLOBAL(sdivsi3_2)
1172 // div_table in r20
1173 // clobbered: r1,r18,r19,r21,r25,tr0
1174 GLOBAL(sdivsi3_2):
1175 nsb r5, r1
1176 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
1177 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
1178 ldx.ub r20, r21, r19 // u0.8
1179 shari r25, 32, r25 // normalize to s2.30
1180 shlli r21, 1, r21
1181 muls.l r25, r19, r19 // s2.38
1182 ldx.w r20, r21, r21 // s2.14
1183 ptabs r18, tr0
1184 shari r19, 24, r19 // truncate to s2.14
1185 sub r21, r19, r19 // some 11 bit inverse in s1.14
1186 muls.l r19, r19, r21 // u0.28
1187 sub r63, r1, r1
1188 addi r1, 92, r1
1189 muls.l r25, r21, r18 // s2.58
1190 shlli r19, 45, r19 // multiply by two and convert to s2.58
1191 /* bubble */
1192 sub r19, r18, r18
1193 shari r18, 28, r18 // some 22 bit inverse in s1.30
1194 muls.l r18, r25, r0 // s2.60
1195 muls.l r18, r4, r25 // s32.30
1196 /* bubble */
1197 shari r0, 16, r19 // s-16.44
1198 muls.l r19, r18, r19 // s-16.74
1199 shari r25, 63, r0
1200 shari r4, 14, r18 // s19.-14
1201 shari r19, 30, r19 // s-16.44
1202 muls.l r19, r18, r19 // s15.30
1203 xor r21, r0, r21 // You could also use the constant 1 << 27.
1204 add r21, r25, r21
1205 sub r21, r19, r21
1206 shard r21, r1, r21
1207 sub r21, r0, r0
1208 blink tr0, r63
1209 #ifndef __pic__
1210 ENDFUNC(GLOBAL(sdivsi3))
1211 #endif
1212 ENDFUNC(GLOBAL(sdivsi3_2))
1213 #endif
1214 #elif defined __SHMEDIA__
1215 /* m5compact-nofpu */
1216 // clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
1217 .mode SHmedia
1218 .section .text..SHmedia32,"ax"
1219 .align 2
1220 FUNC(GLOBAL(sdivsi3))
1221 GLOBAL(sdivsi3):
1222 pt/l LOCAL(sdivsi3_dontsub), tr0
1223 pt/l LOCAL(sdivsi3_loop), tr1
1224 ptabs/l r18,tr2
1225 shari.l r4,31,r18
1226 shari.l r5,31,r19
1227 xor r4,r18,r20
1228 xor r5,r19,r21
1229 sub.l r20,r18,r20
1230 sub.l r21,r19,r21
1231 xor r18,r19,r19
1232 shlli r21,32,r25
1233 addi r25,-1,r21
1234 addz.l r20,r63,r20
1235 LOCAL(sdivsi3_loop):
1236 shlli r20,1,r20
1237 bgeu/u r21,r20,tr0
1238 sub r20,r21,r20
1239 LOCAL(sdivsi3_dontsub):
1240 addi.l r25,-1,r25
1241 bnei r25,-32,tr1
1242 xor r20,r19,r20
1243 sub.l r20,r19,r0
1244 blink tr2,r63
1245 ENDFUNC(GLOBAL(sdivsi3))
1246 #else /* ! __SHMEDIA__ */
1247 FUNC(GLOBAL(sdivsi3))
1248 GLOBAL(sdivsi3):
1249 mov r4,r1
1250 mov r5,r0
1251
1252 tst r0,r0
1253 bt div0
1254 mov #0,r2
1255 div0s r2,r1
1256 subc r3,r3
1257 subc r2,r1
1258 div0s r0,r3
1259 rotcl r1
1260 div1 r0,r3
1261 rotcl r1
1262 div1 r0,r3
1263 rotcl r1
1264 div1 r0,r3
1265 rotcl r1
1266 div1 r0,r3
1267 rotcl r1
1268 div1 r0,r3
1269 rotcl r1
1270 div1 r0,r3
1271 rotcl r1
1272 div1 r0,r3
1273 rotcl r1
1274 div1 r0,r3
1275 rotcl r1
1276 div1 r0,r3
1277 rotcl r1
1278 div1 r0,r3
1279 rotcl r1
1280 div1 r0,r3
1281 rotcl r1
1282 div1 r0,r3
1283 rotcl r1
1284 div1 r0,r3
1285 rotcl r1
1286 div1 r0,r3
1287 rotcl r1
1288 div1 r0,r3
1289 rotcl r1
1290 div1 r0,r3
1291 rotcl r1
1292 div1 r0,r3
1293 rotcl r1
1294 div1 r0,r3
1295 rotcl r1
1296 div1 r0,r3
1297 rotcl r1
1298 div1 r0,r3
1299 rotcl r1
1300 div1 r0,r3
1301 rotcl r1
1302 div1 r0,r3
1303 rotcl r1
1304 div1 r0,r3
1305 rotcl r1
1306 div1 r0,r3
1307 rotcl r1
1308 div1 r0,r3
1309 rotcl r1
1310 div1 r0,r3
1311 rotcl r1
1312 div1 r0,r3
1313 rotcl r1
1314 div1 r0,r3
1315 rotcl r1
1316 div1 r0,r3
1317 rotcl r1
1318 div1 r0,r3
1319 rotcl r1
1320 div1 r0,r3
1321 rotcl r1
1322 div1 r0,r3
1323 rotcl r1
1324 addc r2,r1
1325 rts
1326 mov r1,r0
1327
1328
1329 div0: rts
1330 mov #0,r0
1331
1332 ENDFUNC(GLOBAL(sdivsi3))
1333 #endif /* ! __SHMEDIA__ */
1334 #endif /* ! __SH4__ */
1335 #endif
1336 #ifdef L_udivsi3_i4
1337
1338 .title "SH DIVIDE"
1339 !! 4 byte integer Divide code for the Renesas SH
1340 #ifdef __SH4__
1341 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4,
1342 !! and t bit
1343
1344 .global GLOBAL(udivsi3_i4)
1345 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1346 GLOBAL(udivsi3_i4):
1347 mov #1,r1
1348 cmp/hi r1,r5
1349 bf trivial
1350 rotr r1
1351 xor r1,r4
1352 lds r4,fpul
1353 mova L1,r0
1354 #ifdef FMOVD_WORKS
1355 fmov.d @r0+,dr4
1356 #else
1357 fmov.s @r0+,DR40
1358 fmov.s @r0,DR41
1359 #endif
1360 float fpul,dr0
1361 xor r1,r5
1362 lds r5,fpul
1363 float fpul,dr2
1364 fadd dr4,dr0
1365 fadd dr4,dr2
1366 fdiv dr2,dr0
1367 rts
1368 ftrc dr0,fpul
1369
1370 trivial:
1371 rts
1372 lds r4,fpul
1373
1374 .align 2
1375 #ifdef FMOVD_WORKS
1376 .align 3 ! make double below 8 byte aligned.
1377 #endif
1378 L1:
1379 .double 2147483648
1380
1381 ENDFUNC(GLOBAL(udivsi3_i4))
1382 #elif defined (__SH5__) && ! defined (__SH4_NOFPU__)
1383 #if ! __SH5__ || __SH5__ == 32
1384 !! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
1385 .mode SHmedia
1386 .global GLOBAL(udivsi3_i4)
1387 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1388 GLOBAL(udivsi3_i4):
1389 addz.l r4,r63,r20
1390 addz.l r5,r63,r21
1391 fmov.qd r20,dr0
1392 fmov.qd r21,dr32
1393 ptabs r18,tr0
1394 float.qd dr0,dr0
1395 float.qd dr32,dr32
1396 fdiv.d dr0,dr32,dr0
1397 ftrc.dq dr0,dr32
1398 fmov.s fr33,fr32
1399 blink tr0,r63
1400
1401 ENDFUNC(GLOBAL(udivsi3_i4))
1402 #endif /* ! __SH5__ || __SH5__ == 32 */
1403 #elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
1404 !! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
1405
1406 .global GLOBAL(udivsi3_i4)
1407 HIDDEN_FUNC(GLOBAL(udivsi3_i4))
1408 GLOBAL(udivsi3_i4):
1409 mov #1,r1
1410 cmp/hi r1,r5
1411 bf trivial
1412 sts.l fpscr,@-r15
1413 mova L1,r0
1414 lds.l @r0+,fpscr
1415 rotr r1
1416 xor r1,r4
1417 lds r4,fpul
1418 #ifdef FMOVD_WORKS
1419 fmov.d @r0+,dr4
1420 #else
1421 fmov.s @r0+,DR40
1422 fmov.s @r0,DR41
1423 #endif
1424 float fpul,dr0
1425 xor r1,r5
1426 lds r5,fpul
1427 float fpul,dr2
1428 fadd dr4,dr0
1429 fadd dr4,dr2
1430 fdiv dr2,dr0
1431 ftrc dr0,fpul
1432 rts
1433 lds.l @r15+,fpscr
1434
1435 #ifdef FMOVD_WORKS
1436 .align 3 ! make double below 8 byte aligned.
1437 #endif
1438 trivial:
1439 rts
1440 lds r4,fpul
1441
1442 .align 2
1443 L1:
1444 #ifndef FMOVD_WORKS
1445 .long 0x80000
1446 #else
1447 .long 0x180000
1448 #endif
1449 .double 2147483648
1450
1451 ENDFUNC(GLOBAL(udivsi3_i4))
1452 #endif /* ! __SH4__ */
1453 #endif
1454
1455 #ifdef L_udivsi3
1456 /* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
1457 sh2e/sh3e code. */
1458 #if (! defined(__SH4__) && ! defined (__SH4_SINGLE__)) || defined (__linux__)
1459
1460 !! args in r4 and r5, result in r0, clobbers r4, pr, and t bit
1461 .global GLOBAL(udivsi3)
1462 HIDDEN_FUNC(GLOBAL(udivsi3))
1463
1464 #if __SHMEDIA__
1465 #if __SH5__ == 32
1466 .section .text..SHmedia32,"ax"
1467 #else
1468 .text
1469 #endif
1470 .align 2
1471 #if 0
1472 /* The assembly code that follows is a hand-optimized version of the C
1473 code that follows. Note that the registers that are modified are
1474 exactly those listed as clobbered in the patterns udivsi3_i1 and
1475 udivsi3_i1_media.
1476
1477 unsigned
1478 __udivsi3 (i, j)
1479 unsigned i, j;
1480 {
1481 register unsigned long long r0 asm ("r0") = 0;
1482 register unsigned long long r18 asm ("r18") = 1;
1483 register unsigned long long r4 asm ("r4") = i;
1484 register unsigned long long r19 asm ("r19") = j;
1485
1486 r19 <<= 31;
1487 r18 <<= 31;
1488 do
1489 if (r4 >= r19)
1490 r0 |= r18, r4 -= r19;
1491 while (r19 >>= 1, r18 >>= 1);
1492
1493 return r0;
1494 }
1495 */
1496 GLOBAL(udivsi3):
1497 pt/l LOCAL(udivsi3_dontadd), tr2
1498 pt/l LOCAL(udivsi3_loop), tr1
1499 ptabs/l r18, tr0
1500 movi 0, r0
1501 movi 1, r18
1502 addz.l r5, r63, r19
1503 addz.l r4, r63, r4
1504 shlli r19, 31, r19
1505 shlli r18, 31, r18
1506 LOCAL(udivsi3_loop):
1507 bgtu r19, r4, tr2
1508 or r0, r18, r0
1509 sub r4, r19, r4
1510 LOCAL(udivsi3_dontadd):
1511 shlri r18, 1, r18
1512 shlri r19, 1, r19
1513 bnei r18, 0, tr1
1514 blink tr0, r63
1515 #else
1516 GLOBAL(udivsi3):
1517 // inputs: r4,r5
1518 // clobbered: r18,r19,r20,r21,r22,r25,tr0
1519 // result in r0.
1520 addz.l r5,r63,r22
1521 nsb r22,r0
1522 shlld r22,r0,r25
1523 shlri r25,48,r25
1524 movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
1525 sub r20,r25,r21
1526 mmulfx.w r21,r21,r19
1527 mshflo.w r21,r63,r21
1528 ptabs r18,tr0
1529 mmulfx.w r25,r19,r19
1530 sub r20,r0,r0
1531 /* bubble */
1532 msub.w r21,r19,r19
1533 addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
1534 before the msub.w, but we need a different value for
1535 r19 to keep errors under control. */
1536 mulu.l r4,r21,r18
1537 mmulfx.w r19,r19,r19
1538 shlli r21,15,r21
1539 shlrd r18,r0,r18
1540 mulu.l r18,r22,r20
1541 mmacnfx.wl r25,r19,r21
1542 /* bubble */
1543 sub r4,r20,r25
1544
1545 mulu.l r25,r21,r19
1546 addi r0,14,r0
1547 /* bubble */
1548 shlrd r19,r0,r19
1549 mulu.l r19,r22,r20
1550 add r18,r19,r18
1551 /* bubble */
1552 sub.l r25,r20,r25
1553
1554 mulu.l r25,r21,r19
1555 addz.l r25,r63,r25
1556 sub r25,r22,r25
1557 shlrd r19,r0,r19
1558 mulu.l r19,r22,r20
1559 addi r25,1,r25
1560 add r18,r19,r18
1561
1562 cmpgt r25,r20,r25
1563 add.l r18,r25,r0
1564 blink tr0,r63
1565 #endif
1566 #elif defined (__SHMEDIA__)
1567 /* m5compact-nofpu - more emphasis on code size than on speed, but don't
1568 ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
1569 So use a short shmedia loop. */
1570 // clobbered: r20,r21,r25,tr0,tr1,tr2
1571 .mode SHmedia
1572 .section .text..SHmedia32,"ax"
1573 .align 2
1574 GLOBAL(udivsi3):
1575 pt/l LOCAL(udivsi3_dontsub), tr0
1576 pt/l LOCAL(udivsi3_loop), tr1
1577 ptabs/l r18,tr2
1578 shlli r5,32,r25
1579 addi r25,-1,r21
1580 addz.l r4,r63,r20
1581 LOCAL(udivsi3_loop):
1582 shlli r20,1,r20
1583 bgeu/u r21,r20,tr0
1584 sub r20,r21,r20
1585 LOCAL(udivsi3_dontsub):
1586 addi.l r25,-1,r25
1587 bnei r25,-32,tr1
1588 add.l r20,r63,r0
1589 blink tr2,r63
1590 #else /* ! defined (__SHMEDIA__) */
1591 LOCAL(div8):
1592 div1 r5,r4
1593 LOCAL(div7):
1594 div1 r5,r4; div1 r5,r4; div1 r5,r4
1595 div1 r5,r4; div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
1596
1597 LOCAL(divx4):
1598 div1 r5,r4; rotcl r0
1599 div1 r5,r4; rotcl r0
1600 div1 r5,r4; rotcl r0
1601 rts; div1 r5,r4
1602
1603 GLOBAL(udivsi3):
1604 sts.l pr,@-r15
1605 extu.w r5,r0
1606 cmp/eq r5,r0
1607 #ifdef __sh1__
1608 bf LOCAL(large_divisor)
1609 #else
1610 bf/s LOCAL(large_divisor)
1611 #endif
1612 div0u
1613 swap.w r4,r0
1614 shlr16 r4
1615 bsr LOCAL(div8)
1616 shll16 r5
1617 bsr LOCAL(div7)
1618 div1 r5,r4
1619 xtrct r4,r0
1620 xtrct r0,r4
1621 bsr LOCAL(div8)
1622 swap.w r4,r4
1623 bsr LOCAL(div7)
1624 div1 r5,r4
1625 lds.l @r15+,pr
1626 xtrct r4,r0
1627 swap.w r0,r0
1628 rotcl r0
1629 rts
1630 shlr16 r5
1631
1632 LOCAL(large_divisor):
1633 #ifdef __sh1__
1634 div0u
1635 #endif
1636 mov #0,r0
1637 xtrct r4,r0
1638 xtrct r0,r4
1639 bsr LOCAL(divx4)
1640 rotcl r0
1641 bsr LOCAL(divx4)
1642 rotcl r0
1643 bsr LOCAL(divx4)
1644 rotcl r0
1645 bsr LOCAL(divx4)
1646 rotcl r0
1647 lds.l @r15+,pr
1648 rts
1649 rotcl r0
1650
1651 ENDFUNC(GLOBAL(udivsi3))
1652 #endif /* ! __SHMEDIA__ */
1653 #endif /* __SH4__ */
1654 #endif /* L_udivsi3 */
1655
1656 #ifdef L_udivdi3
1657 #ifdef __SHMEDIA__
1658 .mode SHmedia
1659 .section .text..SHmedia32,"ax"
1660 .align 2
1661 .global GLOBAL(udivdi3)
1662 FUNC(GLOBAL(udivdi3))
1663 GLOBAL(udivdi3):
1664 HIDDEN_ALIAS(udivdi3_internal,udivdi3)
1665 shlri r3,1,r4
1666 nsb r4,r22
1667 shlld r3,r22,r6
1668 shlri r6,49,r5
1669 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1670 sub r21,r5,r1
1671 mmulfx.w r1,r1,r4
1672 mshflo.w r1,r63,r1
1673 sub r63,r22,r20 // r63 == 64 % 64
1674 mmulfx.w r5,r4,r4
1675 pta LOCAL(large_divisor),tr0
1676 addi r20,32,r9
1677 msub.w r1,r4,r1
1678 madd.w r1,r1,r1
1679 mmulfx.w r1,r1,r4
1680 shlri r6,32,r7
1681 bgt/u r9,r63,tr0 // large_divisor
1682 mmulfx.w r5,r4,r4
1683 shlri r2,32+14,r19
1684 addi r22,-31,r0
1685 msub.w r1,r4,r1
1686
1687 mulu.l r1,r7,r4
1688 addi r1,-3,r5
1689 mulu.l r5,r19,r5
1690 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1691 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1692 the case may be, %0000000000000000 000.11111111111, still */
1693 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1694 mulu.l r5,r3,r8
1695 mshalds.l r1,r21,r1
1696 shari r4,26,r4
1697 shlld r8,r0,r8
1698 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1699 sub r2,r8,r2
1700 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1701
1702 shlri r2,22,r21
1703 mulu.l r21,r1,r21
1704 shlld r5,r0,r8
1705 addi r20,30-22,r0
1706 shlrd r21,r0,r21
1707 mulu.l r21,r3,r5
1708 add r8,r21,r8
1709 mcmpgt.l r21,r63,r21 // See Note 1
1710 addi r20,30,r0
1711 mshfhi.l r63,r21,r21
1712 sub r2,r5,r2
1713 andc r2,r21,r2
1714
1715 /* small divisor: need a third divide step */
1716 mulu.l r2,r1,r7
1717 ptabs r18,tr0
1718 addi r2,1,r2
1719 shlrd r7,r0,r7
1720 mulu.l r7,r3,r5
1721 add r8,r7,r8
1722 sub r2,r3,r2
1723 cmpgt r2,r5,r5
1724 add r8,r5,r2
1725 /* could test r3 here to check for divide by zero. */
1726 blink tr0,r63
1727
1728 LOCAL(large_divisor):
1729 mmulfx.w r5,r4,r4
1730 shlrd r2,r9,r25
1731 shlri r25,32,r8
1732 msub.w r1,r4,r1
1733
1734 mulu.l r1,r7,r4
1735 addi r1,-3,r5
1736 mulu.l r5,r8,r5
1737 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1738 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1739 the case may be, %0000000000000000 000.11111111111, still */
1740 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1741 shlri r5,14-1,r8
1742 mulu.l r8,r7,r5
1743 mshalds.l r1,r21,r1
1744 shari r4,26,r4
1745 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1746 sub r25,r5,r25
1747 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1748
1749 shlri r25,22,r21
1750 mulu.l r21,r1,r21
1751 pta LOCAL(no_lo_adj),tr0
1752 addi r22,32,r0
1753 shlri r21,40,r21
1754 mulu.l r21,r7,r5
1755 add r8,r21,r8
1756 shlld r2,r0,r2
1757 sub r25,r5,r25
1758 bgtu/u r7,r25,tr0 // no_lo_adj
1759 addi r8,1,r8
1760 sub r25,r7,r25
1761 LOCAL(no_lo_adj):
1762 mextr4 r2,r25,r2
1763
1764 /* large_divisor: only needs a few adjustments. */
1765 mulu.l r8,r6,r5
1766 ptabs r18,tr0
1767 /* bubble */
1768 cmpgtu r5,r2,r5
1769 sub r8,r5,r2
1770 blink tr0,r63
1771 ENDFUNC(GLOBAL(udivdi3))
1772 /* Note 1: To shift the result of the second divide stage so that the result
1773 always fits into 32 bits, yet we still reduce the rest sufficiently
1774 would require a lot of instructions to do the shifts just right. Using
1775 the full 64 bit shift result to multiply with the divisor would require
1776 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1777 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1778 know that the rest after taking this partial result into account will
1779 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1780 upper 32 bits of the partial result are nonzero. */
1781 #endif /* __SHMEDIA__ */
1782 #endif /* L_udivdi3 */
1783
1784 #ifdef L_divdi3
1785 #ifdef __SHMEDIA__
1786 .mode SHmedia
1787 .section .text..SHmedia32,"ax"
1788 .align 2
1789 .global GLOBAL(divdi3)
1790 FUNC(GLOBAL(divdi3))
1791 GLOBAL(divdi3):
1792 pta GLOBAL(udivdi3_internal),tr0
1793 shari r2,63,r22
1794 shari r3,63,r23
1795 xor r2,r22,r2
1796 xor r3,r23,r3
1797 sub r2,r22,r2
1798 sub r3,r23,r3
1799 beq/u r22,r23,tr0
1800 ptabs r18,tr1
1801 blink tr0,r18
1802 sub r63,r2,r2
1803 blink tr1,r63
1804 ENDFUNC(GLOBAL(divdi3))
1805 #endif /* __SHMEDIA__ */
1806 #endif /* L_divdi3 */
1807
1808 #ifdef L_umoddi3
1809 #ifdef __SHMEDIA__
1810 .mode SHmedia
1811 .section .text..SHmedia32,"ax"
1812 .align 2
1813 .global GLOBAL(umoddi3)
1814 FUNC(GLOBAL(umoddi3))
1815 GLOBAL(umoddi3):
1816 HIDDEN_ALIAS(umoddi3_internal,umoddi3)
1817 shlri r3,1,r4
1818 nsb r4,r22
1819 shlld r3,r22,r6
1820 shlri r6,49,r5
1821 movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
1822 sub r21,r5,r1
1823 mmulfx.w r1,r1,r4
1824 mshflo.w r1,r63,r1
1825 sub r63,r22,r20 // r63 == 64 % 64
1826 mmulfx.w r5,r4,r4
1827 pta LOCAL(large_divisor),tr0
1828 addi r20,32,r9
1829 msub.w r1,r4,r1
1830 madd.w r1,r1,r1
1831 mmulfx.w r1,r1,r4
1832 shlri r6,32,r7
1833 bgt/u r9,r63,tr0 // large_divisor
1834 mmulfx.w r5,r4,r4
1835 shlri r2,32+14,r19
1836 addi r22,-31,r0
1837 msub.w r1,r4,r1
1838
1839 mulu.l r1,r7,r4
1840 addi r1,-3,r5
1841 mulu.l r5,r19,r5
1842 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1843 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1844 the case may be, %0000000000000000 000.11111111111, still */
1845 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1846 mulu.l r5,r3,r5
1847 mshalds.l r1,r21,r1
1848 shari r4,26,r4
1849 shlld r5,r0,r5
1850 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1851 sub r2,r5,r2
1852 /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
1853
1854 shlri r2,22,r21
1855 mulu.l r21,r1,r21
1856 addi r20,30-22,r0
1857 /* bubble */ /* could test r3 here to check for divide by zero. */
1858 shlrd r21,r0,r21
1859 mulu.l r21,r3,r5
1860 mcmpgt.l r21,r63,r21 // See Note 1
1861 addi r20,30,r0
1862 mshfhi.l r63,r21,r21
1863 sub r2,r5,r2
1864 andc r2,r21,r2
1865
1866 /* small divisor: need a third divide step */
1867 mulu.l r2,r1,r7
1868 ptabs r18,tr0
1869 sub r2,r3,r8 /* re-use r8 here for rest - r3 */
1870 shlrd r7,r0,r7
1871 mulu.l r7,r3,r5
1872 /* bubble */
1873 addi r8,1,r7
1874 cmpgt r7,r5,r7
1875 cmvne r7,r8,r2
1876 sub r2,r5,r2
1877 blink tr0,r63
1878
1879 LOCAL(large_divisor):
1880 mmulfx.w r5,r4,r4
1881 shlrd r2,r9,r25
1882 shlri r25,32,r8
1883 msub.w r1,r4,r1
1884
1885 mulu.l r1,r7,r4
1886 addi r1,-3,r5
1887 mulu.l r5,r8,r5
1888 sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
1889 shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
1890 the case may be, %0000000000000000 000.11111111111, still */
1891 muls.l r1,r4,r4 /* leaving at least one sign bit. */
1892 shlri r5,14-1,r8
1893 mulu.l r8,r7,r5
1894 mshalds.l r1,r21,r1
1895 shari r4,26,r4
1896 add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
1897 sub r25,r5,r25
1898 /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
1899
1900 shlri r25,22,r21
1901 mulu.l r21,r1,r21
1902 pta LOCAL(no_lo_adj),tr0
1903 addi r22,32,r0
1904 shlri r21,40,r21
1905 mulu.l r21,r7,r5
1906 add r8,r21,r8
1907 shlld r2,r0,r2
1908 sub r25,r5,r25
1909 bgtu/u r7,r25,tr0 // no_lo_adj
1910 addi r8,1,r8
1911 sub r25,r7,r25
1912 LOCAL(no_lo_adj):
1913 mextr4 r2,r25,r2
1914
1915 /* large_divisor: only needs a few adjustments. */
1916 mulu.l r8,r6,r5
1917 ptabs r18,tr0
1918 add r2,r6,r7
1919 cmpgtu r5,r2,r8
1920 cmvne r8,r7,r2
1921 sub r2,r5,r2
1922 shlrd r2,r22,r2
1923 blink tr0,r63
1924 ENDFUNC(GLOBAL(umoddi3))
1925 /* Note 1: To shift the result of the second divide stage so that the result
1926 always fits into 32 bits, yet we still reduce the rest sufficiently
1927 would require a lot of instructions to do the shifts just right. Using
1928 the full 64 bit shift result to multiply with the divisor would require
1929 four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
1930 Fortunately, if the upper 32 bits of the shift result are nonzero, we
1931 know that the rest after taking this partial result into account will
1932 fit into 32 bits. So we just clear the upper 32 bits of the rest if the
1933 upper 32 bits of the partial result are nonzero. */
1934 #endif /* __SHMEDIA__ */
1935 #endif /* L_umoddi3 */
1936
1937 #ifdef L_moddi3
1938 #ifdef __SHMEDIA__
1939 .mode SHmedia
1940 .section .text..SHmedia32,"ax"
1941 .align 2
1942 .global GLOBAL(moddi3)
1943 FUNC(GLOBAL(moddi3))
1944 GLOBAL(moddi3):
1945 pta GLOBAL(umoddi3_internal),tr0
1946 shari r2,63,r22
1947 shari r3,63,r23
1948 xor r2,r22,r2
1949 xor r3,r23,r3
1950 sub r2,r22,r2
1951 sub r3,r23,r3
1952 beq/u r22,r63,tr0
1953 ptabs r18,tr1
1954 blink tr0,r18
1955 sub r63,r2,r2
1956 blink tr1,r63
1957 ENDFUNC(GLOBAL(moddi3))
1958 #endif /* __SHMEDIA__ */
1959 #endif /* L_moddi3 */
1960
1961 #ifdef L_set_fpscr
1962 #if !defined (__SH2A_NOFPU__)
1963 #if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
1964 #ifdef __SH5__
1965 .mode SHcompact
1966 #endif
1967 .global GLOBAL(set_fpscr)
1968 HIDDEN_FUNC(GLOBAL(set_fpscr))
1969 GLOBAL(set_fpscr):
1970 lds r4,fpscr
1971 #ifdef __PIC__
1972 mov.l r12,@-r15
1973 #ifdef __vxworks
1974 mov.l LOCAL(set_fpscr_L0_base),r12
1975 mov.l LOCAL(set_fpscr_L0_index),r0
1976 mov.l @r12,r12
1977 mov.l @(r0,r12),r12
1978 #else
1979 mova LOCAL(set_fpscr_L0),r0
1980 mov.l LOCAL(set_fpscr_L0),r12
1981 add r0,r12
1982 #endif
1983 mov.l LOCAL(set_fpscr_L1),r0
1984 mov.l @(r0,r12),r1
1985 mov.l @r15+,r12
1986 #else
1987 mov.l LOCAL(set_fpscr_L1),r1
1988 #endif
1989 swap.w r4,r0
1990 or #24,r0
1991 #ifndef FMOVD_WORKS
1992 xor #16,r0
1993 #endif
1994 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
1995 swap.w r0,r3
1996 mov.l r3,@(4,r1)
1997 #else /* defined (__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
1998 swap.w r0,r2
1999 mov.l r2,@r1
2000 #endif
2001 #ifndef FMOVD_WORKS
2002 xor #8,r0
2003 #else
2004 xor #24,r0
2005 #endif
2006 #if defined(__SH4__) || defined (__SH2A_DOUBLE__)
2007 swap.w r0,r2
2008 rts
2009 mov.l r2,@r1
2010 #else /* defined(__SH2E__) || defined(__SH3E__) || defined(__SH4_SINGLE*__) */
2011 swap.w r0,r3
2012 rts
2013 mov.l r3,@(4,r1)
2014 #endif
2015 .align 2
2016 #ifdef __PIC__
2017 #ifdef __vxworks
2018 LOCAL(set_fpscr_L0_base):
2019 .long ___GOTT_BASE__
2020 LOCAL(set_fpscr_L0_index):
2021 .long ___GOTT_INDEX__
2022 #else
2023 LOCAL(set_fpscr_L0):
2024 .long _GLOBAL_OFFSET_TABLE_
2025 #endif
2026 LOCAL(set_fpscr_L1):
2027 .long GLOBAL(fpscr_values@GOT)
2028 #else
2029 LOCAL(set_fpscr_L1):
2030 .long GLOBAL(fpscr_values)
2031 #endif
2032
2033 ENDFUNC(GLOBAL(set_fpscr))
2034 #ifndef NO_FPSCR_VALUES
2035 #ifdef __ELF__
2036 .comm GLOBAL(fpscr_values),8,4
2037 #else
2038 .comm GLOBAL(fpscr_values),8
2039 #endif /* ELF */
2040 #endif /* NO_FPSCR_VALUES */
2041 #endif /* SH2E / SH3E / SH4 */
2042 #endif /* __SH2A_NOFPU__ */
2043 #endif /* L_set_fpscr */
2044 #ifdef L_ic_invalidate
2045 #if __SH5__ == 32
2046 .mode SHmedia
2047 .section .text..SHmedia32,"ax"
2048 .align 2
2049 .global GLOBAL(init_trampoline)
2050 HIDDEN_FUNC(GLOBAL(init_trampoline))
2051 GLOBAL(init_trampoline):
2052 st.l r0,8,r2
2053 #ifdef __LITTLE_ENDIAN__
2054 movi 9,r20
2055 shori 0x402b,r20
2056 shori 0xd101,r20
2057 shori 0xd002,r20
2058 #else
2059 movi 0xffffffffffffd002,r20
2060 shori 0xd101,r20
2061 shori 0x402b,r20
2062 shori 9,r20
2063 #endif
2064 st.q r0,0,r20
2065 st.l r0,12,r3
2066 ENDFUNC(GLOBAL(init_trampoline))
2067 .global GLOBAL(ic_invalidate)
2068 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2069 GLOBAL(ic_invalidate):
2070 ocbwb r0,0
2071 synco
2072 icbi r0, 0
2073 ptabs r18, tr0
2074 synci
2075 blink tr0, r63
2076 ENDFUNC(GLOBAL(ic_invalidate))
2077 #elif defined(__SH4A__)
2078 .global GLOBAL(ic_invalidate)
2079 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2080 GLOBAL(ic_invalidate):
2081 ocbwb @r4
2082 synco
2083 rts
2084 icbi @r4
2085 ENDFUNC(GLOBAL(ic_invalidate))
2086 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2087 /* For system code, we use ic_invalidate_line_i, but user code
2088 needs a different mechanism. A kernel call is generally not
2089 available, and it would also be slow. Different SH4 variants use
2090 different sizes and associativities of the Icache. We use a small
2091 bit of dispatch code that can be put hidden in every shared object,
2092 which calls the actual processor-specific invalidation code in a
2093 separate module.
2094 Or if you have operating system support, the OS could mmap the
2095 procesor-specific code from a single page, since it is highly
2096 repetitive. */
2097 .global GLOBAL(ic_invalidate)
2098 HIDDEN_FUNC(GLOBAL(ic_invalidate))
2099 GLOBAL(ic_invalidate):
2100 #ifdef __pic__
2101 #ifdef __vxworks
2102 mov.l 1f,r1
2103 mov.l 2f,r0
2104 mov.l @r1,r1
2105 mov.l 0f,r2
2106 mov.l @(r0,r1),r0
2107 #else
2108 mov.l 1f,r1
2109 mova 1f,r0
2110 mov.l 0f,r2
2111 add r1,r0
2112 #endif
2113 mov.l @(r0,r2),r1
2114 #else
2115 mov.l 0f,r1
2116 #endif
2117 ocbwb @r4
2118 mov.l @(8,r1),r0
2119 sub r1,r4
2120 and r4,r0
2121 add r1,r0
2122 jmp @r0
2123 mov.l @(4,r1),r0
2124 .align 2
2125 #ifndef __pic__
2126 0: .long GLOBAL(ic_invalidate_array)
2127 #else /* __pic__ */
2128 .global GLOBAL(ic_invalidate_array)
2129 0: .long GLOBAL(ic_invalidate_array)@GOT
2130 #ifdef __vxworks
2131 1: .long ___GOTT_BASE__
2132 2: .long ___GOTT_INDEX__
2133 #else
2134 1: .long _GLOBAL_OFFSET_TABLE_
2135 #endif
2136 ENDFUNC(GLOBAL(ic_invalidate))
2137 #endif /* __pic__ */
2138 #endif /* SH4 */
2139 #endif /* L_ic_invalidate */
2140
2141 #ifdef L_ic_invalidate_array
2142 #if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
2143 .global GLOBAL(ic_invalidate_array)
2144 /* This is needed when an SH4 dso with trampolines is used on SH4A. */
2145 .global GLOBAL(ic_invalidate_array)
2146 FUNC(GLOBAL(ic_invalidate_array))
2147 GLOBAL(ic_invalidate_array):
2148 add r1,r4
2149 synco
2150 rts
2151 icbi @r4
2152 .long 0
2153 ENDFUNC(GLOBAL(ic_invalidate_array))
2154 #elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
2155 .global GLOBAL(ic_invalidate_array)
2156 .p2align 5
2157 FUNC(GLOBAL(ic_invalidate_array))
2158 /* This must be aligned to the beginning of a cache line. */
2159 GLOBAL(ic_invalidate_array):
2160 #ifndef WAYS
2161 #define WAYS 4
2162 #define WAY_SIZE 0x4000
2163 #endif
2164 #if WAYS == 1
2165 .rept WAY_SIZE * WAYS / 32
2166 rts
2167 nop
2168 .rept 7
2169 .long WAY_SIZE - 32
2170 .endr
2171 .endr
2172 #elif WAYS <= 6
2173 .rept WAY_SIZE * WAYS / 32
2174 braf r0
2175 add #-8,r0
2176 .long WAY_SIZE + 8
2177 .long WAY_SIZE - 32
2178 .rept WAYS-2
2179 braf r0
2180 nop
2181 .endr
2182 .rept 7 - WAYS
2183 rts
2184 nop
2185 .endr
2186 .endr
2187 #else /* WAYS > 6 */
2188 /* This variant needs two different pages for mmap-ing. */
2189 .rept WAYS-1
2190 .rept WAY_SIZE / 32
2191 braf r0
2192 nop
2193 .long WAY_SIZE
2194 .rept 6
2195 .long WAY_SIZE - 32
2196 .endr
2197 .endr
2198 .endr
2199 .rept WAY_SIZE / 32
2200 rts
2201 .rept 15
2202 nop
2203 .endr
2204 .endr
2205 #endif /* WAYS */
2206 ENDFUNC(GLOBAL(ic_invalidate_array))
2207 #endif /* SH4 */
2208 #endif /* L_ic_invalidate_array */
2209
2210 #if defined (__SH5__) && __SH5__ == 32
2211 #ifdef L_shcompact_call_trampoline
2212 .section .rodata
2213 .align 1
2214 LOCAL(ct_main_table):
2215 .word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
2216 .word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
2217 .word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
2218 .word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
2219 .word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
2220 .word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
2221 .word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
2222 .word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
2223 .word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
2224 .word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
2225 .word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
2226 .word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
2227 .word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
2228 .word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
2229 .word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
2230 .word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
2231 .word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
2232 .word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
2233 .word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
2234 .word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
2235 .word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
2236 .word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
2237 .word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
2238 .word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
2239 .word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
2240 .word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
2241 .word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
2242 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2243 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2244 .word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
2245 .word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
2246 .word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
2247 .word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
2248 .mode SHmedia
2249 .section .text..SHmedia32, "ax"
2250 .align 2
2251
2252 /* This function loads 64-bit general-purpose registers from the
2253 stack, from a memory address contained in them or from an FP
2254 register, according to a cookie passed in r1. Its execution
2255 time is linear on the number of registers that actually have
2256 to be copied. See sh.h for details on the actual bit pattern.
2257
2258 The function to be called is passed in r0. If a 32-bit return
2259 value is expected, the actual function will be tail-called,
2260 otherwise the return address will be stored in r10 (that the
2261 caller should expect to be clobbered) and the return value
2262 will be expanded into r2/r3 upon return. */
2263
2264 .global GLOBAL(GCC_shcompact_call_trampoline)
2265 FUNC(GLOBAL(GCC_shcompact_call_trampoline))
2266 GLOBAL(GCC_shcompact_call_trampoline):
2267 ptabs/l r0, tr0 /* Prepare to call the actual function. */
2268 movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
2269 pt/l LOCAL(ct_loop), tr1
2270 addz.l r1, r63, r1
2271 shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
2272 LOCAL(ct_loop):
2273 nsb r1, r28
2274 shlli r28, 1, r29
2275 ldx.w r0, r29, r30
2276 LOCAL(ct_main_label):
2277 ptrel/l r30, tr2
2278 blink tr2, r63
2279 LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
2280 /* It must be dr0, so just do it. */
2281 fmov.dq dr0, r2
2282 movi 7, r30
2283 shlli r30, 29, r31
2284 andc r1, r31, r1
2285 blink tr1, r63
2286 LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
2287 /* It is either dr0 or dr2. */
2288 movi 7, r30
2289 shlri r1, 26, r32
2290 shlli r30, 26, r31
2291 andc r1, r31, r1
2292 fmov.dq dr0, r3
2293 beqi/l r32, 4, tr1
2294 fmov.dq dr2, r3
2295 blink tr1, r63
2296 LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
2297 shlri r1, 23 - 3, r34
2298 andi r34, 3 << 3, r33
2299 addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
2300 LOCAL(ct_r4_fp_base):
2301 ptrel/l r32, tr2
2302 movi 7, r30
2303 shlli r30, 23, r31
2304 andc r1, r31, r1
2305 blink tr2, r63
2306 LOCAL(ct_r4_fp_copy):
2307 fmov.dq dr0, r4
2308 blink tr1, r63
2309 fmov.dq dr2, r4
2310 blink tr1, r63
2311 fmov.dq dr4, r4
2312 blink tr1, r63
2313 LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
2314 shlri r1, 20 - 3, r34
2315 andi r34, 3 << 3, r33
2316 addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
2317 LOCAL(ct_r5_fp_base):
2318 ptrel/l r32, tr2
2319 movi 7, r30
2320 shlli r30, 20, r31
2321 andc r1, r31, r1
2322 blink tr2, r63
2323 LOCAL(ct_r5_fp_copy):
2324 fmov.dq dr0, r5
2325 blink tr1, r63
2326 fmov.dq dr2, r5
2327 blink tr1, r63
2328 fmov.dq dr4, r5
2329 blink tr1, r63
2330 fmov.dq dr6, r5
2331 blink tr1, r63
2332 LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
2333 /* It must be dr8. */
2334 fmov.dq dr8, r6
2335 movi 15, r30
2336 shlli r30, 16, r31
2337 andc r1, r31, r1
2338 blink tr1, r63
2339 LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
2340 shlri r1, 16 - 3, r34
2341 andi r34, 3 << 3, r33
2342 addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
2343 LOCAL(ct_r6_fp_base):
2344 ptrel/l r32, tr2
2345 movi 7, r30
2346 shlli r30, 16, r31
2347 andc r1, r31, r1
2348 blink tr2, r63
2349 LOCAL(ct_r6_fp_copy):
2350 fmov.dq dr0, r6
2351 blink tr1, r63
2352 fmov.dq dr2, r6
2353 blink tr1, r63
2354 fmov.dq dr4, r6
2355 blink tr1, r63
2356 fmov.dq dr6, r6
2357 blink tr1, r63
2358 LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
2359 /* It is either dr8 or dr10. */
2360 movi 15 << 12, r31
2361 shlri r1, 12, r32
2362 andc r1, r31, r1
2363 fmov.dq dr8, r7
2364 beqi/l r32, 8, tr1
2365 fmov.dq dr10, r7
2366 blink tr1, r63
2367 LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
2368 shlri r1, 12 - 3, r34
2369 andi r34, 3 << 3, r33
2370 addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
2371 LOCAL(ct_r7_fp_base):
2372 ptrel/l r32, tr2
2373 movi 7 << 12, r31
2374 andc r1, r31, r1
2375 blink tr2, r63
2376 LOCAL(ct_r7_fp_copy):
2377 fmov.dq dr0, r7
2378 blink tr1, r63
2379 fmov.dq dr2, r7
2380 blink tr1, r63
2381 fmov.dq dr4, r7
2382 blink tr1, r63
2383 fmov.dq dr6, r7
2384 blink tr1, r63
2385 LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
2386 /* It is either dr8 or dr10. */
2387 movi 15 << 8, r31
2388 andi r1, 1 << 8, r32
2389 andc r1, r31, r1
2390 fmov.dq dr8, r8
2391 beq/l r32, r63, tr1
2392 fmov.dq dr10, r8
2393 blink tr1, r63
2394 LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
2395 shlri r1, 8 - 3, r34
2396 andi r34, 3 << 3, r33
2397 addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
2398 LOCAL(ct_r8_fp_base):
2399 ptrel/l r32, tr2
2400 movi 7 << 8, r31
2401 andc r1, r31, r1
2402 blink tr2, r63
2403 LOCAL(ct_r8_fp_copy):
2404 fmov.dq dr0, r8
2405 blink tr1, r63
2406 fmov.dq dr2, r8
2407 blink tr1, r63
2408 fmov.dq dr4, r8
2409 blink tr1, r63
2410 fmov.dq dr6, r8
2411 blink tr1, r63
2412 LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
2413 /* It is either dr8 or dr10. */
2414 movi 15 << 4, r31
2415 andi r1, 1 << 4, r32
2416 andc r1, r31, r1
2417 fmov.dq dr8, r9
2418 beq/l r32, r63, tr1
2419 fmov.dq dr10, r9
2420 blink tr1, r63
2421 LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
2422 shlri r1, 4 - 3, r34
2423 andi r34, 3 << 3, r33
2424 addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
2425 LOCAL(ct_r9_fp_base):
2426 ptrel/l r32, tr2
2427 movi 7 << 4, r31
2428 andc r1, r31, r1
2429 blink tr2, r63
2430 LOCAL(ct_r9_fp_copy):
2431 fmov.dq dr0, r9
2432 blink tr1, r63
2433 fmov.dq dr2, r9
2434 blink tr1, r63
2435 fmov.dq dr4, r9
2436 blink tr1, r63
2437 fmov.dq dr6, r9
2438 blink tr1, r63
2439 LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
2440 pt/l LOCAL(ct_r2_load), tr2
2441 movi 3, r30
2442 shlli r30, 29, r31
2443 and r1, r31, r32
2444 andc r1, r31, r1
2445 beq/l r31, r32, tr2
2446 addi.l r2, 8, r3
2447 ldx.q r2, r63, r2
2448 /* Fall through. */
2449 LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
2450 pt/l LOCAL(ct_r3_load), tr2
2451 movi 3, r30
2452 shlli r30, 26, r31
2453 and r1, r31, r32
2454 andc r1, r31, r1
2455 beq/l r31, r32, tr2
2456 addi.l r3, 8, r4
2457 ldx.q r3, r63, r3
2458 LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
2459 pt/l LOCAL(ct_r4_load), tr2
2460 movi 3, r30
2461 shlli r30, 23, r31
2462 and r1, r31, r32
2463 andc r1, r31, r1
2464 beq/l r31, r32, tr2
2465 addi.l r4, 8, r5
2466 ldx.q r4, r63, r4
2467 LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
2468 pt/l LOCAL(ct_r5_load), tr2
2469 movi 3, r30
2470 shlli r30, 20, r31
2471 and r1, r31, r32
2472 andc r1, r31, r1
2473 beq/l r31, r32, tr2
2474 addi.l r5, 8, r6
2475 ldx.q r5, r63, r5
2476 LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
2477 pt/l LOCAL(ct_r6_load), tr2
2478 movi 3 << 16, r31
2479 and r1, r31, r32
2480 andc r1, r31, r1
2481 beq/l r31, r32, tr2
2482 addi.l r6, 8, r7
2483 ldx.q r6, r63, r6
2484 LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
2485 pt/l LOCAL(ct_r7_load), tr2
2486 movi 3 << 12, r31
2487 and r1, r31, r32
2488 andc r1, r31, r1
2489 beq/l r31, r32, tr2
2490 addi.l r7, 8, r8
2491 ldx.q r7, r63, r7
2492 LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
2493 pt/l LOCAL(ct_r8_load), tr2
2494 movi 3 << 8, r31
2495 and r1, r31, r32
2496 andc r1, r31, r1
2497 beq/l r31, r32, tr2
2498 addi.l r8, 8, r9
2499 ldx.q r8, r63, r8
2500 LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
2501 pt/l LOCAL(ct_check_tramp), tr2
2502 ldx.q r9, r63, r9
2503 blink tr2, r63
2504 LOCAL(ct_r2_load):
2505 ldx.q r2, r63, r2
2506 blink tr1, r63
2507 LOCAL(ct_r3_load):
2508 ldx.q r3, r63, r3
2509 blink tr1, r63
2510 LOCAL(ct_r4_load):
2511 ldx.q r4, r63, r4
2512 blink tr1, r63
2513 LOCAL(ct_r5_load):
2514 ldx.q r5, r63, r5
2515 blink tr1, r63
2516 LOCAL(ct_r6_load):
2517 ldx.q r6, r63, r6
2518 blink tr1, r63
2519 LOCAL(ct_r7_load):
2520 ldx.q r7, r63, r7
2521 blink tr1, r63
2522 LOCAL(ct_r8_load):
2523 ldx.q r8, r63, r8
2524 blink tr1, r63
2525 LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
2526 movi 1, r30
2527 ldx.q r15, r63, r2
2528 shlli r30, 29, r31
2529 addi.l r15, 8, r15
2530 andc r1, r31, r1
2531 blink tr1, r63
2532 LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
2533 movi 1, r30
2534 ldx.q r15, r63, r3
2535 shlli r30, 26, r31
2536 addi.l r15, 8, r15
2537 andc r1, r31, r1
2538 blink tr1, r63
2539 LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
2540 movi 1, r30
2541 ldx.q r15, r63, r4
2542 shlli r30, 23, r31
2543 addi.l r15, 8, r15
2544 andc r1, r31, r1
2545 blink tr1, r63
2546 LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
2547 movi 1, r30
2548 ldx.q r15, r63, r5
2549 shlli r30, 20, r31
2550 addi.l r15, 8, r15
2551 andc r1, r31, r1
2552 blink tr1, r63
2553 LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
2554 movi 1, r30
2555 ldx.q r15, r63, r6
2556 shlli r30, 16, r31
2557 addi.l r15, 8, r15
2558 andc r1, r31, r1
2559 blink tr1, r63
2560 LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
2561 ldx.q r15, r63, r7
2562 movi 1 << 12, r31
2563 addi.l r15, 8, r15
2564 andc r1, r31, r1
2565 blink tr1, r63
2566 LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
2567 ldx.q r15, r63, r8
2568 movi 1 << 8, r31
2569 addi.l r15, 8, r15
2570 andc r1, r31, r1
2571 blink tr1, r63
2572 LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
2573 andi r1, 7 << 1, r30
2574 movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
2575 shlli r30, 2, r31
2576 shori LOCAL(ct_end_of_pop_seq) & 65535, r32
2577 sub.l r32, r31, r33
2578 ptabs/l r33, tr2
2579 blink tr2, r63
2580 LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
2581 ldx.q r15, r63, r3
2582 addi.l r15, 8, r15
2583 ldx.q r15, r63, r4
2584 addi.l r15, 8, r15
2585 ldx.q r15, r63, r5
2586 addi.l r15, 8, r15
2587 ldx.q r15, r63, r6
2588 addi.l r15, 8, r15
2589 ldx.q r15, r63, r7
2590 addi.l r15, 8, r15
2591 ldx.q r15, r63, r8
2592 addi.l r15, 8, r15
2593 LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
2594 ldx.q r15, r63, r9
2595 addi.l r15, 8, r15
2596 LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
2597 LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
2598 pt/u LOCAL(ct_ret_wide), tr2
2599 andi r1, 1, r1
2600 bne/u r1, r63, tr2
2601 LOCAL(ct_call_func): /* Just branch to the function. */
2602 blink tr0, r63
2603 LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
2604 64-bit return value. */
2605 add.l r18, r63, r10
2606 blink tr0, r18
2607 ptabs r10, tr0
2608 #if __LITTLE_ENDIAN__
2609 shari r2, 32, r3
2610 add.l r2, r63, r2
2611 #else
2612 add.l r2, r63, r3
2613 shari r2, 32, r2
2614 #endif
2615 blink tr0, r63
2616
2617 ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
2618 #endif /* L_shcompact_call_trampoline */
2619
2620 #ifdef L_shcompact_return_trampoline
2621 /* This function does the converse of the code in `ret_wide'
2622 above. It is tail-called by SHcompact functions returning
2623 64-bit non-floating-point values, to pack the 32-bit values in
2624 r2 and r3 into r2. */
2625
2626 .mode SHmedia
2627 .section .text..SHmedia32, "ax"
2628 .align 2
2629 .global GLOBAL(GCC_shcompact_return_trampoline)
2630 HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
2631 GLOBAL(GCC_shcompact_return_trampoline):
2632 ptabs/l r18, tr0
2633 #if __LITTLE_ENDIAN__
2634 addz.l r2, r63, r2
2635 shlli r3, 32, r3
2636 #else
2637 addz.l r3, r63, r3
2638 shlli r2, 32, r2
2639 #endif
2640 or r3, r2, r2
2641 blink tr0, r63
2642
2643 ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
2644 #endif /* L_shcompact_return_trampoline */
2645
2646 #ifdef L_shcompact_incoming_args
2647 .section .rodata
2648 .align 1
2649 LOCAL(ia_main_table):
2650 .word 1 /* Invalid, just loop */
2651 .word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
2652 .word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
2653 .word 1 /* Invalid, just loop */
2654 .word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
2655 .word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
2656 .word 1 /* Invalid, just loop */
2657 .word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
2658 .word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
2659 .word 1 /* Invalid, just loop */
2660 .word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
2661 .word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
2662 .word 1 /* Invalid, just loop */
2663 .word 1 /* Invalid, just loop */
2664 .word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
2665 .word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
2666 .word 1 /* Invalid, just loop */
2667 .word 1 /* Invalid, just loop */
2668 .word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
2669 .word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
2670 .word 1 /* Invalid, just loop */
2671 .word 1 /* Invalid, just loop */
2672 .word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
2673 .word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
2674 .word 1 /* Invalid, just loop */
2675 .word 1 /* Invalid, just loop */
2676 .word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
2677 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2678 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2679 .word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
2680 .word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
2681 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2682 .word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
2683 .mode SHmedia
2684 .section .text..SHmedia32, "ax"
2685 .align 2
2686
2687 /* This function stores 64-bit general-purpose registers back in
2688 the stack, and loads the address in which each register
2689 was stored into itself. The lower 32 bits of r17 hold the address
2690 to begin storing, and the upper 32 bits of r17 hold the cookie.
2691 Its execution time is linear on the
2692 number of registers that actually have to be copied, and it is
2693 optimized for structures larger than 64 bits, as opposed to
2694 individual `long long' arguments. See sh.h for details on the
2695 actual bit pattern. */
2696
2697 .global GLOBAL(GCC_shcompact_incoming_args)
2698 FUNC(GLOBAL(GCC_shcompact_incoming_args))
2699 GLOBAL(GCC_shcompact_incoming_args):
2700 ptabs/l r18, tr0 /* Prepare to return. */
2701 shlri r17, 32, r0 /* Load the cookie. */
2702 movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
2703 pt/l LOCAL(ia_loop), tr1
2704 add.l r17, r63, r17
2705 shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
2706 LOCAL(ia_loop):
2707 nsb r0, r36
2708 shlli r36, 1, r37
2709 ldx.w r43, r37, r38
2710 LOCAL(ia_main_label):
2711 ptrel/l r38, tr2
2712 blink tr2, r63
2713 LOCAL(ia_r2_ld): /* Store r2 and load its address. */
2714 movi 3, r38
2715 shlli r38, 29, r39
2716 and r0, r39, r40
2717 andc r0, r39, r0
2718 stx.q r17, r63, r2
2719 add.l r17, r63, r2
2720 addi.l r17, 8, r17
2721 beq/u r39, r40, tr1
2722 LOCAL(ia_r3_ld): /* Store r3 and load its address. */
2723 movi 3, r38
2724 shlli r38, 26, r39
2725 and r0, r39, r40
2726 andc r0, r39, r0
2727 stx.q r17, r63, r3
2728 add.l r17, r63, r3
2729 addi.l r17, 8, r17
2730 beq/u r39, r40, tr1
2731 LOCAL(ia_r4_ld): /* Store r4 and load its address. */
2732 movi 3, r38
2733 shlli r38, 23, r39
2734 and r0, r39, r40
2735 andc r0, r39, r0
2736 stx.q r17, r63, r4
2737 add.l r17, r63, r4
2738 addi.l r17, 8, r17
2739 beq/u r39, r40, tr1
2740 LOCAL(ia_r5_ld): /* Store r5 and load its address. */
2741 movi 3, r38
2742 shlli r38, 20, r39
2743 and r0, r39, r40
2744 andc r0, r39, r0
2745 stx.q r17, r63, r5
2746 add.l r17, r63, r5
2747 addi.l r17, 8, r17
2748 beq/u r39, r40, tr1
2749 LOCAL(ia_r6_ld): /* Store r6 and load its address. */
2750 movi 3, r38
2751 shlli r38, 16, r39
2752 and r0, r39, r40
2753 andc r0, r39, r0
2754 stx.q r17, r63, r6
2755 add.l r17, r63, r6
2756 addi.l r17, 8, r17
2757 beq/u r39, r40, tr1
2758 LOCAL(ia_r7_ld): /* Store r7 and load its address. */
2759 movi 3 << 12, r39
2760 and r0, r39, r40
2761 andc r0, r39, r0
2762 stx.q r17, r63, r7
2763 add.l r17, r63, r7
2764 addi.l r17, 8, r17
2765 beq/u r39, r40, tr1
2766 LOCAL(ia_r8_ld): /* Store r8 and load its address. */
2767 movi 3 << 8, r39
2768 and r0, r39, r40
2769 andc r0, r39, r0
2770 stx.q r17, r63, r8
2771 add.l r17, r63, r8
2772 addi.l r17, 8, r17
2773 beq/u r39, r40, tr1
2774 LOCAL(ia_r9_ld): /* Store r9 and load its address. */
2775 stx.q r17, r63, r9
2776 add.l r17, r63, r9
2777 blink tr0, r63
2778 LOCAL(ia_r2_push): /* Push r2 onto the stack. */
2779 movi 1, r38
2780 shlli r38, 29, r39
2781 andc r0, r39, r0
2782 stx.q r17, r63, r2
2783 addi.l r17, 8, r17
2784 blink tr1, r63
2785 LOCAL(ia_r3_push): /* Push r3 onto the stack. */
2786 movi 1, r38
2787 shlli r38, 26, r39
2788 andc r0, r39, r0
2789 stx.q r17, r63, r3
2790 addi.l r17, 8, r17
2791 blink tr1, r63
2792 LOCAL(ia_r4_push): /* Push r4 onto the stack. */
2793 movi 1, r38
2794 shlli r38, 23, r39
2795 andc r0, r39, r0
2796 stx.q r17, r63, r4
2797 addi.l r17, 8, r17
2798 blink tr1, r63
2799 LOCAL(ia_r5_push): /* Push r5 onto the stack. */
2800 movi 1, r38
2801 shlli r38, 20, r39
2802 andc r0, r39, r0
2803 stx.q r17, r63, r5
2804 addi.l r17, 8, r17
2805 blink tr1, r63
2806 LOCAL(ia_r6_push): /* Push r6 onto the stack. */
2807 movi 1, r38
2808 shlli r38, 16, r39
2809 andc r0, r39, r0
2810 stx.q r17, r63, r6
2811 addi.l r17, 8, r17
2812 blink tr1, r63
2813 LOCAL(ia_r7_push): /* Push r7 onto the stack. */
2814 movi 1 << 12, r39
2815 andc r0, r39, r0
2816 stx.q r17, r63, r7
2817 addi.l r17, 8, r17
2818 blink tr1, r63
2819 LOCAL(ia_r8_push): /* Push r8 onto the stack. */
2820 movi 1 << 8, r39
2821 andc r0, r39, r0
2822 stx.q r17, r63, r8
2823 addi.l r17, 8, r17
2824 blink tr1, r63
2825 LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
2826 andi r0, 7 << 1, r38
2827 movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
2828 shlli r38, 2, r39
2829 shori LOCAL(ia_end_of_push_seq) & 65535, r40
2830 sub.l r40, r39, r41
2831 ptabs/l r41, tr2
2832 blink tr2, r63
2833 LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
2834 stx.q r17, r63, r3
2835 addi.l r17, 8, r17
2836 stx.q r17, r63, r4
2837 addi.l r17, 8, r17
2838 stx.q r17, r63, r5
2839 addi.l r17, 8, r17
2840 stx.q r17, r63, r6
2841 addi.l r17, 8, r17
2842 stx.q r17, r63, r7
2843 addi.l r17, 8, r17
2844 stx.q r17, r63, r8
2845 addi.l r17, 8, r17
2846 LOCAL(ia_r9_push): /* Push r9 onto the stack. */
2847 stx.q r17, r63, r9
2848 LOCAL(ia_return): /* Return. */
2849 blink tr0, r63
2850 LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
2851 ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
2852 #endif /* L_shcompact_incoming_args */
2853 #endif
2854 #if __SH5__
2855 #ifdef L_nested_trampoline
2856 #if __SH5__ == 32
2857 .section .text..SHmedia32,"ax"
2858 #else
2859 .text
2860 #endif
2861 .align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
2862 .global GLOBAL(GCC_nested_trampoline)
2863 HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
2864 GLOBAL(GCC_nested_trampoline):
2865 .mode SHmedia
2866 ptrel/u r63, tr0
2867 gettr tr0, r0
2868 #if __SH5__ == 64
2869 ld.q r0, 24, r1
2870 #else
2871 ld.l r0, 24, r1
2872 #endif
2873 ptabs/l r1, tr1
2874 #if __SH5__ == 64
2875 ld.q r0, 32, r1
2876 #else
2877 ld.l r0, 28, r1
2878 #endif
2879 blink tr1, r63
2880
2881 ENDFUNC(GLOBAL(GCC_nested_trampoline))
2882 #endif /* L_nested_trampoline */
2883 #endif /* __SH5__ */
2884 #if __SH5__ == 32
2885 #ifdef L_push_pop_shmedia_regs
2886 .section .text..SHmedia32,"ax"
2887 .mode SHmedia
2888 .align 2
2889 #ifndef __SH4_NOFPU__
2890 .global GLOBAL(GCC_push_shmedia_regs)
2891 FUNC(GLOBAL(GCC_push_shmedia_regs))
2892 GLOBAL(GCC_push_shmedia_regs):
2893 addi.l r15, -14*8, r15
2894 fst.d r15, 13*8, dr62
2895 fst.d r15, 12*8, dr60
2896 fst.d r15, 11*8, dr58
2897 fst.d r15, 10*8, dr56
2898 fst.d r15, 9*8, dr54
2899 fst.d r15, 8*8, dr52
2900 fst.d r15, 7*8, dr50
2901 fst.d r15, 6*8, dr48
2902 fst.d r15, 5*8, dr46
2903 fst.d r15, 4*8, dr44
2904 fst.d r15, 3*8, dr42
2905 fst.d r15, 2*8, dr40
2906 fst.d r15, 1*8, dr38
2907 fst.d r15, 0*8, dr36
2908 #else /* ! __SH4_NOFPU__ */
2909 .global GLOBAL(GCC_push_shmedia_regs_nofpu)
2910 FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2911 GLOBAL(GCC_push_shmedia_regs_nofpu):
2912 #endif /* ! __SH4_NOFPU__ */
2913 ptabs/l r18, tr0
2914 addi.l r15, -27*8, r15
2915 gettr tr7, r62
2916 gettr tr6, r61
2917 gettr tr5, r60
2918 st.q r15, 26*8, r62
2919 st.q r15, 25*8, r61
2920 st.q r15, 24*8, r60
2921 st.q r15, 23*8, r59
2922 st.q r15, 22*8, r58
2923 st.q r15, 21*8, r57
2924 st.q r15, 20*8, r56
2925 st.q r15, 19*8, r55
2926 st.q r15, 18*8, r54
2927 st.q r15, 17*8, r53
2928 st.q r15, 16*8, r52
2929 st.q r15, 15*8, r51
2930 st.q r15, 14*8, r50
2931 st.q r15, 13*8, r49
2932 st.q r15, 12*8, r48
2933 st.q r15, 11*8, r47
2934 st.q r15, 10*8, r46
2935 st.q r15, 9*8, r45
2936 st.q r15, 8*8, r44
2937 st.q r15, 7*8, r35
2938 st.q r15, 6*8, r34
2939 st.q r15, 5*8, r33
2940 st.q r15, 4*8, r32
2941 st.q r15, 3*8, r31
2942 st.q r15, 2*8, r30
2943 st.q r15, 1*8, r29
2944 st.q r15, 0*8, r28
2945 blink tr0, r63
2946 #ifndef __SH4_NOFPU__
2947 ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
2948 #else
2949 ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
2950 #endif
2951 #ifndef __SH4_NOFPU__
2952 .global GLOBAL(GCC_pop_shmedia_regs)
2953 FUNC(GLOBAL(GCC_pop_shmedia_regs))
2954 GLOBAL(GCC_pop_shmedia_regs):
2955 pt .L0, tr1
2956 movi 41*8, r0
2957 fld.d r15, 40*8, dr62
2958 fld.d r15, 39*8, dr60
2959 fld.d r15, 38*8, dr58
2960 fld.d r15, 37*8, dr56
2961 fld.d r15, 36*8, dr54
2962 fld.d r15, 35*8, dr52
2963 fld.d r15, 34*8, dr50
2964 fld.d r15, 33*8, dr48
2965 fld.d r15, 32*8, dr46
2966 fld.d r15, 31*8, dr44
2967 fld.d r15, 30*8, dr42
2968 fld.d r15, 29*8, dr40
2969 fld.d r15, 28*8, dr38
2970 fld.d r15, 27*8, dr36
2971 blink tr1, r63
2972 #else /* ! __SH4_NOFPU__ */
2973 .global GLOBAL(GCC_pop_shmedia_regs_nofpu)
2974 FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
2975 GLOBAL(GCC_pop_shmedia_regs_nofpu):
2976 #endif /* ! __SH4_NOFPU__ */
2977 movi 27*8, r0
2978 .L0:
2979 ptabs r18, tr0
2980 ld.q r15, 26*8, r62
2981 ld.q r15, 25*8, r61
2982 ld.q r15, 24*8, r60
2983 ptabs r62, tr7
2984 ptabs r61, tr6
2985 ptabs r60, tr5
2986 ld.q r15, 23*8, r59
2987 ld.q r15, 22*8, r58
2988 ld.q r15, 21*8, r57
2989 ld.q r15, 20*8, r56
2990 ld.q r15, 19*8, r55
2991 ld.q r15, 18*8, r54
2992 ld.q r15, 17*8, r53
2993 ld.q r15, 16*8, r52
2994 ld.q r15, 15*8, r51
2995 ld.q r15, 14*8, r50
2996 ld.q r15, 13*8, r49
2997 ld.q r15, 12*8, r48
2998 ld.q r15, 11*8, r47
2999 ld.q r15, 10*8, r46
3000 ld.q r15, 9*8, r45
3001 ld.q r15, 8*8, r44
3002 ld.q r15, 7*8, r35
3003 ld.q r15, 6*8, r34
3004 ld.q r15, 5*8, r33
3005 ld.q r15, 4*8, r32
3006 ld.q r15, 3*8, r31
3007 ld.q r15, 2*8, r30
3008 ld.q r15, 1*8, r29
3009 ld.q r15, 0*8, r28
3010 add.l r15, r0, r15
3011 blink tr0, r63
3012
3013 #ifndef __SH4_NOFPU__
3014 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
3015 #else
3016 ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
3017 #endif
3018 #endif /* __SH5__ == 32 */
3019 #endif /* L_push_pop_shmedia_regs */
3020
3021 #ifdef L_div_table
3022 #if __SH5__
3023 #if defined(__pic__) && defined(__SHMEDIA__)
3024 .global GLOBAL(sdivsi3)
3025 FUNC(GLOBAL(sdivsi3))
3026 #if __SH5__ == 32
3027 .section .text..SHmedia32,"ax"
3028 #else
3029 .text
3030 #endif
3031 #if 0
3032 /* ??? FIXME: Presumably due to a linker bug, exporting data symbols
3033 in a text section does not work (at least for shared libraries):
3034 the linker sets the LSB of the address as if this was SHmedia code. */
3035 #define TEXT_DATA_BUG
3036 #endif
3037 .align 2
3038 // inputs: r4,r5
3039 // clobbered: r1,r18,r19,r20,r21,r25,tr0
3040 // result in r0
3041 .global GLOBAL(sdivsi3)
3042 GLOBAL(sdivsi3):
3043 #ifdef TEXT_DATA_BUG
3044 ptb datalabel Local_div_table,tr0
3045 #else
3046 ptb GLOBAL(div_table_internal),tr0
3047 #endif
3048 nsb r5, r1
3049 shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
3050 shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
3051 /* bubble */
3052 gettr tr0,r20
3053 ldx.ub r20, r21, r19 // u0.8
3054 shari r25, 32, r25 // normalize to s2.30
3055 shlli r21, 1, r21
3056 muls.l r25, r19, r19 // s2.38
3057 ldx.w r20, r21, r21 // s2.14
3058 ptabs r18, tr0
3059 shari r19, 24, r19 // truncate to s2.14
3060 sub r21, r19, r19 // some 11 bit inverse in s1.14
3061 muls.l r19, r19, r21 // u0.28
3062 sub r63, r1, r1
3063 addi r1, 92, r1
3064 muls.l r25, r21, r18 // s2.58
3065 shlli r19, 45, r19 // multiply by two and convert to s2.58
3066 /* bubble */
3067 sub r19, r18, r18
3068 shari r18, 28, r18 // some 22 bit inverse in s1.30
3069 muls.l r18, r25, r0 // s2.60
3070 muls.l r18, r4, r25 // s32.30
3071 /* bubble */
3072 shari r0, 16, r19 // s-16.44
3073 muls.l r19, r18, r19 // s-16.74
3074 shari r25, 63, r0
3075 shari r4, 14, r18 // s19.-14
3076 shari r19, 30, r19 // s-16.44
3077 muls.l r19, r18, r19 // s15.30
3078 xor r21, r0, r21 // You could also use the constant 1 << 27.
3079 add r21, r25, r21
3080 sub r21, r19, r21
3081 shard r21, r1, r21
3082 sub r21, r0, r0
3083 blink tr0, r63
3084 ENDFUNC(GLOBAL(sdivsi3))
3085 /* This table has been generated by divtab.c .
3086 Defects for bias -330:
3087 Max defect: 6.081536e-07 at -1.000000e+00
3088 Min defect: 2.849516e-08 at 1.030651e+00
3089 Max 2nd step defect: 9.606539e-12 at -1.000000e+00
3090 Min 2nd step defect: 0.000000e+00 at 0.000000e+00
3091 Defect at 1: 1.238659e-07
3092 Defect at -2: 1.061708e-07 */
3093 #else /* ! __pic__ || ! __SHMEDIA__ */
3094 .section .rodata
3095 #endif /* __pic__ */
3096 #if defined(TEXT_DATA_BUG) && defined(__pic__) && defined(__SHMEDIA__)
3097 .balign 2
3098 .type Local_div_table,@object
3099 .size Local_div_table,128
3100 /* negative division constants */
3101 .word -16638
3102 .word -17135
3103 .word -17737
3104 .word -18433
3105 .word -19103
3106 .word -19751
3107 .word -20583
3108 .word -21383
3109 .word -22343
3110 .word -23353
3111 .word -24407
3112 .word -25582
3113 .word -26863
3114 .word -28382
3115 .word -29965
3116 .word -31800
3117 /* negative division factors */
3118 .byte 66
3119 .byte 70
3120 .byte 75
3121 .byte 81
3122 .byte 87
3123 .byte 93
3124 .byte 101
3125 .byte 109
3126 .byte 119
3127 .byte 130
3128 .byte 142
3129 .byte 156
3130 .byte 172
3131 .byte 192
3132 .byte 214
3133 .byte 241
3134 .skip 16
3135 Local_div_table:
3136 .skip 16
3137 /* positive division factors */
3138 .byte 241
3139 .byte 214
3140 .byte 192
3141 .byte 172
3142 .byte 156
3143 .byte 142
3144 .byte 130
3145 .byte 119
3146 .byte 109
3147 .byte 101
3148 .byte 93
3149 .byte 87
3150 .byte 81
3151 .byte 75
3152 .byte 70
3153 .byte 66
3154 /* positive division constants */
3155 .word 31801
3156 .word 29966
3157 .word 28383
3158 .word 26864
3159 .word 25583
3160 .word 24408
3161 .word 23354
3162 .word 22344
3163 .word 21384
3164 .word 20584
3165 .word 19752
3166 .word 19104
3167 .word 18434
3168 .word 17738
3169 .word 17136
3170 .word 16639
3171 .section .rodata
3172 #endif /* TEXT_DATA_BUG */
3173 .balign 2
3174 .type GLOBAL(div_table),@object
3175 .size GLOBAL(div_table),128
3176 /* negative division constants */
3177 .word -16638
3178 .word -17135
3179 .word -17737
3180 .word -18433
3181 .word -19103
3182 .word -19751
3183 .word -20583
3184 .word -21383
3185 .word -22343
3186 .word -23353
3187 .word -24407
3188 .word -25582
3189 .word -26863
3190 .word -28382
3191 .word -29965
3192 .word -31800
3193 /* negative division factors */
3194 .byte 66
3195 .byte 70
3196 .byte 75
3197 .byte 81
3198 .byte 87
3199 .byte 93
3200 .byte 101
3201 .byte 109
3202 .byte 119
3203 .byte 130
3204 .byte 142
3205 .byte 156
3206 .byte 172
3207 .byte 192
3208 .byte 214
3209 .byte 241
3210 .skip 16
3211 .global GLOBAL(div_table)
3212 GLOBAL(div_table):
3213 HIDDEN_ALIAS(div_table_internal,div_table)
3214 .skip 16
3215 /* positive division factors */
3216 .byte 241
3217 .byte 214
3218 .byte 192
3219 .byte 172
3220 .byte 156
3221 .byte 142
3222 .byte 130
3223 .byte 119
3224 .byte 109
3225 .byte 101
3226 .byte 93
3227 .byte 87
3228 .byte 81
3229 .byte 75
3230 .byte 70
3231 .byte 66
3232 /* positive division constants */
3233 .word 31801
3234 .word 29966
3235 .word 28383
3236 .word 26864
3237 .word 25583
3238 .word 24408
3239 .word 23354
3240 .word 22344
3241 .word 21384
3242 .word 20584
3243 .word 19752
3244 .word 19104
3245 .word 18434
3246 .word 17738
3247 .word 17136
3248 .word 16639
3249
3250 #elif defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
3251 /* This code used shld, thus is not suitable for SH1 / SH2. */
3252
3253 /* Signed / unsigned division without use of FPU, optimized for SH4.
3254 Uses a lookup table for divisors in the range -128 .. +128, and
3255 div1 with case distinction for larger divisors in three more ranges.
3256 The code is lumped together with the table to allow the use of mova. */
3257 #ifdef __LITTLE_ENDIAN__
3258 #define L_LSB 0
3259 #define L_LSWMSB 1
3260 #define L_MSWLSB 2
3261 #else
3262 #define L_LSB 3
3263 #define L_LSWMSB 2
3264 #define L_MSWLSB 1
3265 #endif
3266
3267 .balign 4
3268 .global GLOBAL(udivsi3_i4i)
3269 FUNC(GLOBAL(udivsi3_i4i))
3270 GLOBAL(udivsi3_i4i):
3271 mov.w LOCAL(c128_w), r1
3272 div0u
3273 mov r4,r0
3274 shlr8 r0
3275 cmp/hi r1,r5
3276 extu.w r5,r1
3277 bf LOCAL(udiv_le128)
3278 cmp/eq r5,r1
3279 bf LOCAL(udiv_ge64k)
3280 shlr r0
3281 mov r5,r1
3282 shll16 r5
3283 mov.l r4,@-r15
3284 div1 r5,r0
3285 mov.l r1,@-r15
3286 div1 r5,r0
3287 div1 r5,r0
3288 bra LOCAL(udiv_25)
3289 div1 r5,r0
3290
3291 LOCAL(div_le128):
3292 mova LOCAL(div_table_ix),r0
3293 bra LOCAL(div_le128_2)
3294 mov.b @(r0,r5),r1
3295 LOCAL(udiv_le128):
3296 mov.l r4,@-r15
3297 mova LOCAL(div_table_ix),r0
3298 mov.b @(r0,r5),r1
3299 mov.l r5,@-r15
3300 LOCAL(div_le128_2):
3301 mova LOCAL(div_table_inv),r0
3302 mov.l @(r0,r1),r1
3303 mov r5,r0
3304 tst #0xfe,r0
3305 mova LOCAL(div_table_clz),r0
3306 dmulu.l r1,r4
3307 mov.b @(r0,r5),r1
3308 bt/s LOCAL(div_by_1)
3309 mov r4,r0
3310 mov.l @r15+,r5
3311 sts mach,r0
3312 /* clrt */
3313 addc r4,r0
3314 mov.l @r15+,r4
3315 rotcr r0
3316 rts
3317 shld r1,r0
3318
3319 LOCAL(div_by_1_neg):
3320 neg r4,r0
3321 LOCAL(div_by_1):
3322 mov.l @r15+,r5
3323 rts
3324 mov.l @r15+,r4
3325
3326 LOCAL(div_ge64k):
3327 bt/s LOCAL(div_r8)
3328 div0u
3329 shll8 r5
3330 bra LOCAL(div_ge64k_2)
3331 div1 r5,r0
3332 LOCAL(udiv_ge64k):
3333 cmp/hi r0,r5
3334 mov r5,r1
3335 bt LOCAL(udiv_r8)
3336 shll8 r5
3337 mov.l r4,@-r15
3338 div1 r5,r0
3339 mov.l r1,@-r15
3340 LOCAL(div_ge64k_2):
3341 div1 r5,r0
3342 mov.l LOCAL(zero_l),r1
3343 .rept 4
3344 div1 r5,r0
3345 .endr
3346 mov.l r1,@-r15
3347 div1 r5,r0
3348 mov.w LOCAL(m256_w),r1
3349 div1 r5,r0
3350 mov.b r0,@(L_LSWMSB,r15)
3351 xor r4,r0
3352 and r1,r0
3353 bra LOCAL(div_ge64k_end)
3354 xor r4,r0
3355
3356 LOCAL(div_r8):
3357 shll16 r4
3358 bra LOCAL(div_r8_2)
3359 shll8 r4
3360 LOCAL(udiv_r8):
3361 mov.l r4,@-r15
3362 shll16 r4
3363 clrt
3364 shll8 r4
3365 mov.l r5,@-r15
3366 LOCAL(div_r8_2):
3367 rotcl r4
3368 mov r0,r1
3369 div1 r5,r1
3370 mov r4,r0
3371 rotcl r0
3372 mov r5,r4
3373 div1 r5,r1
3374 .rept 5
3375 rotcl r0; div1 r5,r1
3376 .endr
3377 rotcl r0
3378 mov.l @r15+,r5
3379 div1 r4,r1
3380 mov.l @r15+,r4
3381 rts
3382 rotcl r0
3383
3384 ENDFUNC(GLOBAL(udivsi3_i4i))
3385
3386 .global GLOBAL(sdivsi3_i4i)
3387 FUNC(GLOBAL(sdivsi3_i4i))
3388 /* This is link-compatible with a GLOBAL(sdivsi3) call,
3389 but we effectively clobber only r1. */
3390 GLOBAL(sdivsi3_i4i):
3391 mov.l r4,@-r15
3392 cmp/pz r5
3393 mov.w LOCAL(c128_w), r1
3394 bt/s LOCAL(pos_divisor)
3395 cmp/pz r4
3396 mov.l r5,@-r15
3397 neg r5,r5
3398 bt/s LOCAL(neg_result)
3399 cmp/hi r1,r5
3400 neg r4,r4
3401 LOCAL(pos_result):
3402 extu.w r5,r0
3403 bf LOCAL(div_le128)
3404 cmp/eq r5,r0
3405 mov r4,r0
3406 shlr8 r0
3407 bf/s LOCAL(div_ge64k)
3408 cmp/hi r0,r5
3409 div0u
3410 shll16 r5
3411 div1 r5,r0
3412 div1 r5,r0
3413 div1 r5,r0
3414 LOCAL(udiv_25):
3415 mov.l LOCAL(zero_l),r1
3416 div1 r5,r0
3417 div1 r5,r0
3418 mov.l r1,@-r15
3419 .rept 3
3420 div1 r5,r0
3421 .endr
3422 mov.b r0,@(L_MSWLSB,r15)
3423 xtrct r4,r0
3424 swap.w r0,r0
3425 .rept 8
3426 div1 r5,r0
3427 .endr
3428 mov.b r0,@(L_LSWMSB,r15)
3429 LOCAL(div_ge64k_end):
3430 .rept 8
3431 div1 r5,r0
3432 .endr
3433 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3434 extu.b r0,r0
3435 mov.l @r15+,r5
3436 or r4,r0
3437 mov.l @r15+,r4
3438 rts
3439 rotcl r0
3440
3441 LOCAL(div_le128_neg):
3442 tst #0xfe,r0
3443 mova LOCAL(div_table_ix),r0
3444 mov.b @(r0,r5),r1
3445 mova LOCAL(div_table_inv),r0
3446 bt/s LOCAL(div_by_1_neg)
3447 mov.l @(r0,r1),r1
3448 mova LOCAL(div_table_clz),r0
3449 dmulu.l r1,r4
3450 mov.b @(r0,r5),r1
3451 mov.l @r15+,r5
3452 sts mach,r0
3453 /* clrt */
3454 addc r4,r0
3455 mov.l @r15+,r4
3456 rotcr r0
3457 shld r1,r0
3458 rts
3459 neg r0,r0
3460
3461 LOCAL(pos_divisor):
3462 mov.l r5,@-r15
3463 bt/s LOCAL(pos_result)
3464 cmp/hi r1,r5
3465 neg r4,r4
3466 LOCAL(neg_result):
3467 extu.w r5,r0
3468 bf LOCAL(div_le128_neg)
3469 cmp/eq r5,r0
3470 mov r4,r0
3471 shlr8 r0
3472 bf/s LOCAL(div_ge64k_neg)
3473 cmp/hi r0,r5
3474 div0u
3475 mov.l LOCAL(zero_l),r1
3476 shll16 r5
3477 div1 r5,r0
3478 mov.l r1,@-r15
3479 .rept 7
3480 div1 r5,r0
3481 .endr
3482 mov.b r0,@(L_MSWLSB,r15)
3483 xtrct r4,r0
3484 swap.w r0,r0
3485 .rept 8
3486 div1 r5,r0
3487 .endr
3488 mov.b r0,@(L_LSWMSB,r15)
3489 LOCAL(div_ge64k_neg_end):
3490 .rept 8
3491 div1 r5,r0
3492 .endr
3493 mov.l @r15+,r4 ! zero-extension and swap using LS unit.
3494 extu.b r0,r1
3495 mov.l @r15+,r5
3496 or r4,r1
3497 LOCAL(div_r8_neg_end):
3498 mov.l @r15+,r4
3499 rotcl r1
3500 rts
3501 neg r1,r0
3502
3503 LOCAL(div_ge64k_neg):
3504 bt/s LOCAL(div_r8_neg)
3505 div0u
3506 shll8 r5
3507 mov.l LOCAL(zero_l),r1
3508 .rept 6
3509 div1 r5,r0
3510 .endr
3511 mov.l r1,@-r15
3512 div1 r5,r0
3513 mov.w LOCAL(m256_w),r1
3514 div1 r5,r0
3515 mov.b r0,@(L_LSWMSB,r15)
3516 xor r4,r0
3517 and r1,r0
3518 bra LOCAL(div_ge64k_neg_end)
3519 xor r4,r0
3520
3521 LOCAL(c128_w):
3522 .word 128
3523
3524 LOCAL(div_r8_neg):
3525 clrt
3526 shll16 r4
3527 mov r4,r1
3528 shll8 r1
3529 mov r5,r4
3530 .rept 7
3531 rotcl r1; div1 r5,r0
3532 .endr
3533 mov.l @r15+,r5
3534 rotcl r1
3535 bra LOCAL(div_r8_neg_end)
3536 div1 r4,r0
3537
3538 LOCAL(m256_w):
3539 .word 0xff00
3540 /* This table has been generated by divtab-sh4.c. */
3541 .balign 4
3542 LOCAL(div_table_clz):
3543 .byte 0
3544 .byte 1
3545 .byte 0
3546 .byte -1
3547 .byte -1
3548 .byte -2
3549 .byte -2
3550 .byte -2
3551 .byte -2
3552 .byte -3
3553 .byte -3
3554 .byte -3
3555 .byte -3
3556 .byte -3
3557 .byte -3
3558 .byte -3
3559 .byte -3
3560 .byte -4
3561 .byte -4
3562 .byte -4
3563 .byte -4
3564 .byte -4
3565 .byte -4
3566 .byte -4
3567 .byte -4
3568 .byte -4
3569 .byte -4
3570 .byte -4
3571 .byte -4
3572 .byte -4
3573 .byte -4
3574 .byte -4
3575 .byte -4
3576 .byte -5
3577 .byte -5
3578 .byte -5
3579 .byte -5
3580 .byte -5
3581 .byte -5
3582 .byte -5
3583 .byte -5
3584 .byte -5
3585 .byte -5
3586 .byte -5
3587 .byte -5
3588 .byte -5
3589 .byte -5
3590 .byte -5
3591 .byte -5
3592 .byte -5
3593 .byte -5
3594 .byte -5
3595 .byte -5
3596 .byte -5
3597 .byte -5
3598 .byte -5
3599 .byte -5
3600 .byte -5
3601 .byte -5
3602 .byte -5
3603 .byte -5
3604 .byte -5
3605 .byte -5
3606 .byte -5
3607 .byte -5
3608 .byte -6
3609 .byte -6
3610 .byte -6
3611 .byte -6
3612 .byte -6
3613 .byte -6
3614 .byte -6
3615 .byte -6
3616 .byte -6
3617 .byte -6
3618 .byte -6
3619 .byte -6
3620 .byte -6
3621 .byte -6
3622 .byte -6
3623 .byte -6
3624 .byte -6
3625 .byte -6
3626 .byte -6
3627 .byte -6
3628 .byte -6
3629 .byte -6
3630 .byte -6
3631 .byte -6
3632 .byte -6
3633 .byte -6
3634 .byte -6
3635 .byte -6
3636 .byte -6
3637 .byte -6
3638 .byte -6
3639 .byte -6
3640 .byte -6
3641 .byte -6
3642 .byte -6
3643 .byte -6
3644 .byte -6
3645 .byte -6
3646 .byte -6
3647 .byte -6
3648 .byte -6
3649 .byte -6
3650 .byte -6
3651 .byte -6
3652 .byte -6
3653 .byte -6
3654 .byte -6
3655 .byte -6
3656 .byte -6
3657 .byte -6
3658 .byte -6
3659 .byte -6
3660 .byte -6
3661 .byte -6
3662 .byte -6
3663 .byte -6
3664 .byte -6
3665 .byte -6
3666 .byte -6
3667 .byte -6
3668 .byte -6
3669 .byte -6
3670 .byte -6
3671 /* Lookup table translating positive divisor to index into table of
3672 normalized inverse. N.B. the '0' entry is also the last entry of the
3673 previous table, and causes an unaligned access for division by zero. */
3674 LOCAL(div_table_ix):
3675 .byte -6
3676 .byte -128
3677 .byte -128
3678 .byte 0
3679 .byte -128
3680 .byte -64
3681 .byte 0
3682 .byte 64
3683 .byte -128
3684 .byte -96
3685 .byte -64
3686 .byte -32
3687 .byte 0
3688 .byte 32
3689 .byte 64
3690 .byte 96
3691 .byte -128
3692 .byte -112
3693 .byte -96
3694 .byte -80
3695 .byte -64
3696 .byte -48
3697 .byte -32
3698 .byte -16
3699 .byte 0
3700 .byte 16
3701 .byte 32
3702 .byte 48
3703 .byte 64
3704 .byte 80
3705 .byte 96
3706 .byte 112
3707 .byte -128
3708 .byte -120
3709 .byte -112
3710 .byte -104
3711 .byte -96
3712 .byte -88
3713 .byte -80
3714 .byte -72
3715 .byte -64
3716 .byte -56
3717 .byte -48
3718 .byte -40
3719 .byte -32
3720 .byte -24
3721 .byte -16
3722 .byte -8
3723 .byte 0
3724 .byte 8
3725 .byte 16
3726 .byte 24
3727 .byte 32
3728 .byte 40
3729 .byte 48
3730 .byte 56
3731 .byte 64
3732 .byte 72
3733 .byte 80
3734 .byte 88
3735 .byte 96
3736 .byte 104
3737 .byte 112
3738 .byte 120
3739 .byte -128
3740 .byte -124
3741 .byte -120
3742 .byte -116
3743 .byte -112
3744 .byte -108
3745 .byte -104
3746 .byte -100
3747 .byte -96
3748 .byte -92
3749 .byte -88
3750 .byte -84
3751 .byte -80
3752 .byte -76
3753 .byte -72
3754 .byte -68
3755 .byte -64
3756 .byte -60
3757 .byte -56
3758 .byte -52
3759 .byte -48
3760 .byte -44
3761 .byte -40
3762 .byte -36
3763 .byte -32
3764 .byte -28
3765 .byte -24
3766 .byte -20
3767 .byte -16
3768 .byte -12
3769 .byte -8
3770 .byte -4
3771 .byte 0
3772 .byte 4
3773 .byte 8
3774 .byte 12
3775 .byte 16
3776 .byte 20
3777 .byte 24
3778 .byte 28
3779 .byte 32
3780 .byte 36
3781 .byte 40
3782 .byte 44
3783 .byte 48
3784 .byte 52
3785 .byte 56
3786 .byte 60
3787 .byte 64
3788 .byte 68
3789 .byte 72
3790 .byte 76
3791 .byte 80
3792 .byte 84
3793 .byte 88
3794 .byte 92
3795 .byte 96
3796 .byte 100
3797 .byte 104
3798 .byte 108
3799 .byte 112
3800 .byte 116
3801 .byte 120
3802 .byte 124
3803 .byte -128
3804 /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
3805 .balign 4
3806 LOCAL(zero_l):
3807 .long 0x0
3808 .long 0xF81F81F9
3809 .long 0xF07C1F08
3810 .long 0xE9131AC0
3811 .long 0xE1E1E1E2
3812 .long 0xDAE6076C
3813 .long 0xD41D41D5
3814 .long 0xCD856891
3815 .long 0xC71C71C8
3816 .long 0xC0E07039
3817 .long 0xBACF914D
3818 .long 0xB4E81B4F
3819 .long 0xAF286BCB
3820 .long 0xA98EF607
3821 .long 0xA41A41A5
3822 .long 0x9EC8E952
3823 .long 0x9999999A
3824 .long 0x948B0FCE
3825 .long 0x8F9C18FA
3826 .long 0x8ACB90F7
3827 .long 0x86186187
3828 .long 0x81818182
3829 .long 0x7D05F418
3830 .long 0x78A4C818
3831 .long 0x745D1746
3832 .long 0x702E05C1
3833 .long 0x6C16C16D
3834 .long 0x68168169
3835 .long 0x642C8591
3836 .long 0x60581606
3837 .long 0x5C9882BA
3838 .long 0x58ED2309
3839 LOCAL(div_table_inv):
3840 .long 0x55555556
3841 .long 0x51D07EAF
3842 .long 0x4E5E0A73
3843 .long 0x4AFD6A06
3844 .long 0x47AE147B
3845 .long 0x446F8657
3846 .long 0x41414142
3847 .long 0x3E22CBCF
3848 .long 0x3B13B13C
3849 .long 0x38138139
3850 .long 0x3521CFB3
3851 .long 0x323E34A3
3852 .long 0x2F684BDB
3853 .long 0x2C9FB4D9
3854 .long 0x29E4129F
3855 .long 0x27350B89
3856 .long 0x24924925
3857 .long 0x21FB7813
3858 .long 0x1F7047DD
3859 .long 0x1CF06ADB
3860 .long 0x1A7B9612
3861 .long 0x18118119
3862 .long 0x15B1E5F8
3863 .long 0x135C8114
3864 .long 0x11111112
3865 .long 0xECF56BF
3866 .long 0xC9714FC
3867 .long 0xA6810A7
3868 .long 0x8421085
3869 .long 0x624DD30
3870 .long 0x4104105
3871 .long 0x2040811
3872 /* maximum error: 0.987342 scaled: 0.921875*/
3873
3874 ENDFUNC(GLOBAL(sdivsi3_i4i))
3875 #endif /* SH3 / SH4 */
3876
3877 #endif /* L_div_table */
3878
3879 #ifdef L_udiv_qrnnd_16
3880 #if !__SHMEDIA__
3881 HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
3882 /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
3883 /* n1 < d, but n1 might be larger than d1. */
3884 .global GLOBAL(udiv_qrnnd_16)
3885 .balign 8
3886 GLOBAL(udiv_qrnnd_16):
3887 div0u
3888 cmp/hi r6,r0
3889 bt .Lots
3890 .rept 16
3891 div1 r6,r0
3892 .endr
3893 extu.w r0,r1
3894 bt 0f
3895 add r6,r0
3896 0: rotcl r1
3897 mulu.w r1,r5
3898 xtrct r4,r0
3899 swap.w r0,r0
3900 sts macl,r2
3901 cmp/hs r2,r0
3902 sub r2,r0
3903 bt 0f
3904 addc r5,r0
3905 add #-1,r1
3906 bt 0f
3907 1: add #-1,r1
3908 rts
3909 add r5,r0
3910 .balign 8
3911 .Lots:
3912 sub r5,r0
3913 swap.w r4,r1
3914 xtrct r0,r1
3915 clrt
3916 mov r1,r0
3917 addc r5,r0
3918 mov #-1,r1
3919 SL1(bf, 1b,
3920 shlr16 r1)
3921 0: rts
3922 nop
3923 ENDFUNC(GLOBAL(udiv_qrnnd_16))
3924 #endif /* !__SHMEDIA__ */
3925 #endif /* L_udiv_qrnnd_16 */