0
|
1 ;; libgcc routines for the Renesas H8/300 CPU.
|
|
2 ;; Contributed by Steve Chamberlain <sac@cygnus.com>
|
|
3 ;; Optimizations by Toshiyasu Morita <toshiyasu.morita@renesas.com>
|
|
4
|
|
5 /* Copyright (C) 1994, 2000, 2001, 2002, 2003, 2004, 2009
|
|
6 Free Software Foundation, Inc.
|
|
7
|
|
8 This file is free software; you can redistribute it and/or modify it
|
|
9 under the terms of the GNU General Public License as published by the
|
|
10 Free Software Foundation; either version 3, or (at your option) any
|
|
11 later version.
|
|
12
|
|
13 This file is distributed in the hope that it will be useful, but
|
|
14 WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
16 General Public License for more details.
|
|
17
|
|
18 Under Section 7 of GPL version 3, you are granted additional
|
|
19 permissions described in the GCC Runtime Library Exception, version
|
|
20 3.1, as published by the Free Software Foundation.
|
|
21
|
|
22 You should have received a copy of the GNU General Public License and
|
|
23 a copy of the GCC Runtime Library Exception along with this program;
|
|
24 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
25 <http://www.gnu.org/licenses/>. */
|
|
26
|
|
27 /* Assembler register definitions. */
|
|
28
|
|
29 #define A0 r0
|
|
30 #define A0L r0l
|
|
31 #define A0H r0h
|
|
32
|
|
33 #define A1 r1
|
|
34 #define A1L r1l
|
|
35 #define A1H r1h
|
|
36
|
|
37 #define A2 r2
|
|
38 #define A2L r2l
|
|
39 #define A2H r2h
|
|
40
|
|
41 #define A3 r3
|
|
42 #define A3L r3l
|
|
43 #define A3H r3h
|
|
44
|
|
45 #define S0 r4
|
|
46 #define S0L r4l
|
|
47 #define S0H r4h
|
|
48
|
|
49 #define S1 r5
|
|
50 #define S1L r5l
|
|
51 #define S1H r5h
|
|
52
|
|
53 #define S2 r6
|
|
54 #define S2L r6l
|
|
55 #define S2H r6h
|
|
56
|
|
57 #ifdef __H8300__
|
|
58 #define PUSHP push
|
|
59 #define POPP pop
|
|
60
|
|
61 #define A0P r0
|
|
62 #define A1P r1
|
|
63 #define A2P r2
|
|
64 #define A3P r3
|
|
65 #define S0P r4
|
|
66 #define S1P r5
|
|
67 #define S2P r6
|
|
68 #endif
|
|
69
|
|
70 #if defined (__H8300H__) || defined (__H8300S__) || defined (__H8300SX__)
|
|
71 #define PUSHP push.l
|
|
72 #define POPP pop.l
|
|
73
|
|
74 #define A0P er0
|
|
75 #define A1P er1
|
|
76 #define A2P er2
|
|
77 #define A3P er3
|
|
78 #define S0P er4
|
|
79 #define S1P er5
|
|
80 #define S2P er6
|
|
81
|
|
82 #define A0E e0
|
|
83 #define A1E e1
|
|
84 #define A2E e2
|
|
85 #define A3E e3
|
|
86 #endif
|
|
87
|
|
88 #ifdef __H8300H__
|
|
89 #ifdef __NORMAL_MODE__
|
|
90 .h8300hn
|
|
91 #else
|
|
92 .h8300h
|
|
93 #endif
|
|
94 #endif
|
|
95
|
|
96 #ifdef __H8300S__
|
|
97 #ifdef __NORMAL_MODE__
|
|
98 .h8300sn
|
|
99 #else
|
|
100 .h8300s
|
|
101 #endif
|
|
102 #endif
|
|
103 #ifdef __H8300SX__
|
|
104 #ifdef __NORMAL_MODE__
|
|
105 .h8300sxn
|
|
106 #else
|
|
107 .h8300sx
|
|
108 #endif
|
|
109 #endif
|
|
110
|
|
111 #ifdef L_cmpsi2
|
|
112 #ifdef __H8300__
|
|
113 .section .text
|
|
114 .align 2
|
|
115 .global ___cmpsi2
|
|
116 ___cmpsi2:
|
|
117 cmp.w A0,A2
|
|
118 bne .L2
|
|
119 cmp.w A1,A3
|
|
120 bne .L4
|
|
121 mov.w #1,A0
|
|
122 rts
|
|
123 .L2:
|
|
124 bgt .L5
|
|
125 .L3:
|
|
126 mov.w #2,A0
|
|
127 rts
|
|
128 .L4:
|
|
129 bls .L3
|
|
130 .L5:
|
|
131 sub.w A0,A0
|
|
132 rts
|
|
133 .end
|
|
134 #endif
|
|
135 #endif /* L_cmpsi2 */
|
|
136
|
|
137 #ifdef L_ucmpsi2
|
|
138 #ifdef __H8300__
|
|
139 .section .text
|
|
140 .align 2
|
|
141 .global ___ucmpsi2
|
|
142 ___ucmpsi2:
|
|
143 cmp.w A0,A2
|
|
144 bne .L2
|
|
145 cmp.w A1,A3
|
|
146 bne .L4
|
|
147 mov.w #1,A0
|
|
148 rts
|
|
149 .L2:
|
|
150 bhi .L5
|
|
151 .L3:
|
|
152 mov.w #2,A0
|
|
153 rts
|
|
154 .L4:
|
|
155 bls .L3
|
|
156 .L5:
|
|
157 sub.w A0,A0
|
|
158 rts
|
|
159 .end
|
|
160 #endif
|
|
161 #endif /* L_ucmpsi2 */
|
|
162
|
|
163 #ifdef L_divhi3
|
|
164
|
|
165 ;; HImode divides for the H8/300.
|
|
166 ;; We bunch all of this into one object file since there are several
|
|
167 ;; "supporting routines".
|
|
168
|
|
169 ; general purpose normalize routine
|
|
170 ;
|
|
171 ; divisor in A0
|
|
172 ; dividend in A1
|
|
173 ; turns both into +ve numbers, and leaves what the answer sign
|
|
174 ; should be in A2L
|
|
175
|
|
176 #ifdef __H8300__
|
|
177 .section .text
|
|
178 .align 2
|
|
179 divnorm:
|
|
180 or A0H,A0H ; is divisor > 0
|
|
181 stc ccr,A2L
|
|
182 bge _lab1
|
|
183 not A0H ; no - then make it +ve
|
|
184 not A0L
|
|
185 adds #1,A0
|
|
186 _lab1: or A1H,A1H ; look at dividend
|
|
187 bge _lab2
|
|
188 not A1H ; it is -ve, make it positive
|
|
189 not A1L
|
|
190 adds #1,A1
|
|
191 xor #0x8,A2L; and toggle sign of result
|
|
192 _lab2: rts
|
|
193 ;; Basically the same, except that the sign of the divisor determines
|
|
194 ;; the sign.
|
|
195 modnorm:
|
|
196 or A0H,A0H ; is divisor > 0
|
|
197 stc ccr,A2L
|
|
198 bge _lab7
|
|
199 not A0H ; no - then make it +ve
|
|
200 not A0L
|
|
201 adds #1,A0
|
|
202 _lab7: or A1H,A1H ; look at dividend
|
|
203 bge _lab8
|
|
204 not A1H ; it is -ve, make it positive
|
|
205 not A1L
|
|
206 adds #1,A1
|
|
207 _lab8: rts
|
|
208
|
|
209 ; A0=A0/A1 signed
|
|
210
|
|
211 .global ___divhi3
|
|
212 ___divhi3:
|
|
213 bsr divnorm
|
|
214 bsr ___udivhi3
|
|
215 negans: btst #3,A2L ; should answer be negative ?
|
|
216 beq _lab4
|
|
217 not A0H ; yes, so make it so
|
|
218 not A0L
|
|
219 adds #1,A0
|
|
220 _lab4: rts
|
|
221
|
|
222 ; A0=A0%A1 signed
|
|
223
|
|
224 .global ___modhi3
|
|
225 ___modhi3:
|
|
226 bsr modnorm
|
|
227 bsr ___udivhi3
|
|
228 mov A3,A0
|
|
229 bra negans
|
|
230
|
|
231 ; A0=A0%A1 unsigned
|
|
232
|
|
233 .global ___umodhi3
|
|
234 ___umodhi3:
|
|
235 bsr ___udivhi3
|
|
236 mov A3,A0
|
|
237 rts
|
|
238
|
|
239 ; A0=A0/A1 unsigned
|
|
240 ; A3=A0%A1 unsigned
|
|
241 ; A2H trashed
|
|
242 ; D high 8 bits of denom
|
|
243 ; d low 8 bits of denom
|
|
244 ; N high 8 bits of num
|
|
245 ; n low 8 bits of num
|
|
246 ; M high 8 bits of mod
|
|
247 ; m low 8 bits of mod
|
|
248 ; Q high 8 bits of quot
|
|
249 ; q low 8 bits of quot
|
|
250 ; P preserve
|
|
251
|
|
252 ; The H8/300 only has a 16/8 bit divide, so we look at the incoming and
|
|
253 ; see how to partition up the expression.
|
|
254
|
|
255 .global ___udivhi3
|
|
256 ___udivhi3:
|
|
257 ; A0 A1 A2 A3
|
|
258 ; Nn Dd P
|
|
259 sub.w A3,A3 ; Nn Dd xP 00
|
|
260 or A1H,A1H
|
|
261 bne divlongway
|
|
262 or A0H,A0H
|
|
263 beq _lab6
|
|
264
|
|
265 ; we know that D == 0 and N is != 0
|
|
266 mov.b A0H,A3L ; Nn Dd xP 0N
|
|
267 divxu A1L,A3 ; MQ
|
|
268 mov.b A3L,A0H ; Q
|
|
269 ; dealt with N, do n
|
|
270 _lab6: mov.b A0L,A3L ; n
|
|
271 divxu A1L,A3 ; mq
|
|
272 mov.b A3L,A0L ; Qq
|
|
273 mov.b A3H,A3L ; m
|
|
274 mov.b #0x0,A3H ; Qq 0m
|
|
275 rts
|
|
276
|
|
277 ; D != 0 - which means the denominator is
|
|
278 ; loop around to get the result.
|
|
279
|
|
280 divlongway:
|
|
281 mov.b A0H,A3L ; Nn Dd xP 0N
|
|
282 mov.b #0x0,A0H ; high byte of answer has to be zero
|
|
283 mov.b #0x8,A2H ; 8
|
|
284 div8: add.b A0L,A0L ; n*=2
|
|
285 rotxl A3L ; Make remainder bigger
|
|
286 rotxl A3H
|
|
287 sub.w A1,A3 ; Q-=N
|
|
288 bhs setbit ; set a bit ?
|
|
289 add.w A1,A3 ; no : too far , Q+=N
|
|
290
|
|
291 dec A2H
|
|
292 bne div8 ; next bit
|
|
293 rts
|
|
294
|
|
295 setbit: inc A0L ; do insert bit
|
|
296 dec A2H
|
|
297 bne div8 ; next bit
|
|
298 rts
|
|
299
|
|
300 #endif /* __H8300__ */
|
|
301 #endif /* L_divhi3 */
|
|
302
|
|
303 #ifdef L_divsi3
|
|
304
|
|
305 ;; 4 byte integer divides for the H8/300.
|
|
306 ;;
|
|
307 ;; We have one routine which does all the work and lots of
|
|
308 ;; little ones which prepare the args and massage the sign.
|
|
309 ;; We bunch all of this into one object file since there are several
|
|
310 ;; "supporting routines".
|
|
311
|
|
312 .section .text
|
|
313 .align 2
|
|
314
|
|
315 ; Put abs SIs into r0/r1 and r2/r3, and leave a 1 in r6l with sign of rest.
|
|
316 ; This function is here to keep branch displacements small.
|
|
317
|
|
318 #ifdef __H8300__
|
|
319
|
|
320 divnorm:
|
|
321 mov.b A0H,A0H ; is the numerator -ve
|
|
322 stc ccr,S2L ; keep the sign in bit 3 of S2L
|
|
323 bge postive
|
|
324
|
|
325 ; negate arg
|
|
326 not A0H
|
|
327 not A1H
|
|
328 not A0L
|
|
329 not A1L
|
|
330
|
|
331 add #1,A1L
|
|
332 addx #0,A1H
|
|
333 addx #0,A0L
|
|
334 addx #0,A0H
|
|
335 postive:
|
|
336 mov.b A2H,A2H ; is the denominator -ve
|
|
337 bge postive2
|
|
338 not A2L
|
|
339 not A2H
|
|
340 not A3L
|
|
341 not A3H
|
|
342 add.b #1,A3L
|
|
343 addx #0,A3H
|
|
344 addx #0,A2L
|
|
345 addx #0,A2H
|
|
346 xor.b #0x08,S2L ; toggle the result sign
|
|
347 postive2:
|
|
348 rts
|
|
349
|
|
350 ;; Basically the same, except that the sign of the divisor determines
|
|
351 ;; the sign.
|
|
352 modnorm:
|
|
353 mov.b A0H,A0H ; is the numerator -ve
|
|
354 stc ccr,S2L ; keep the sign in bit 3 of S2L
|
|
355 bge mpostive
|
|
356
|
|
357 ; negate arg
|
|
358 not A0H
|
|
359 not A1H
|
|
360 not A0L
|
|
361 not A1L
|
|
362
|
|
363 add #1,A1L
|
|
364 addx #0,A1H
|
|
365 addx #0,A0L
|
|
366 addx #0,A0H
|
|
367 mpostive:
|
|
368 mov.b A2H,A2H ; is the denominator -ve
|
|
369 bge mpostive2
|
|
370 not A2L
|
|
371 not A2H
|
|
372 not A3L
|
|
373 not A3H
|
|
374 add.b #1,A3L
|
|
375 addx #0,A3H
|
|
376 addx #0,A2L
|
|
377 addx #0,A2H
|
|
378 mpostive2:
|
|
379 rts
|
|
380
|
|
381 #else /* __H8300H__ */
|
|
382
|
|
383 divnorm:
|
|
384 mov.l A0P,A0P ; is the numerator -ve
|
|
385 stc ccr,S2L ; keep the sign in bit 3 of S2L
|
|
386 bge postive
|
|
387
|
|
388 neg.l A0P ; negate arg
|
|
389
|
|
390 postive:
|
|
391 mov.l A1P,A1P ; is the denominator -ve
|
|
392 bge postive2
|
|
393
|
|
394 neg.l A1P ; negate arg
|
|
395 xor.b #0x08,S2L ; toggle the result sign
|
|
396
|
|
397 postive2:
|
|
398 rts
|
|
399
|
|
400 ;; Basically the same, except that the sign of the divisor determines
|
|
401 ;; the sign.
|
|
402 modnorm:
|
|
403 mov.l A0P,A0P ; is the numerator -ve
|
|
404 stc ccr,S2L ; keep the sign in bit 3 of S2L
|
|
405 bge mpostive
|
|
406
|
|
407 neg.l A0P ; negate arg
|
|
408
|
|
409 mpostive:
|
|
410 mov.l A1P,A1P ; is the denominator -ve
|
|
411 bge mpostive2
|
|
412
|
|
413 neg.l A1P ; negate arg
|
|
414
|
|
415 mpostive2:
|
|
416 rts
|
|
417
|
|
418 #endif
|
|
419
|
|
420 ; numerator in A0/A1
|
|
421 ; denominator in A2/A3
|
|
422 .global ___modsi3
|
|
423 ___modsi3:
|
|
424 #ifdef __H8300__
|
|
425 PUSHP S2P
|
|
426 PUSHP S0P
|
|
427 PUSHP S1P
|
|
428 bsr modnorm
|
|
429 bsr divmodsi4
|
|
430 mov S0,A0
|
|
431 mov S1,A1
|
|
432 bra exitdiv
|
|
433 #else
|
|
434 PUSHP S2P
|
|
435 bsr modnorm
|
|
436 bsr ___udivsi3
|
|
437 mov.l er3,er0
|
|
438 bra exitdiv
|
|
439 #endif
|
|
440
|
|
441 ;; H8/300H and H8S version of ___udivsi3 is defined later in
|
|
442 ;; the file.
|
|
443 #ifdef __H8300__
|
|
444 .global ___udivsi3
|
|
445 ___udivsi3:
|
|
446 PUSHP S2P
|
|
447 PUSHP S0P
|
|
448 PUSHP S1P
|
|
449 bsr divmodsi4
|
|
450 bra reti
|
|
451 #endif
|
|
452
|
|
453 .global ___umodsi3
|
|
454 ___umodsi3:
|
|
455 #ifdef __H8300__
|
|
456 PUSHP S2P
|
|
457 PUSHP S0P
|
|
458 PUSHP S1P
|
|
459 bsr divmodsi4
|
|
460 mov S0,A0
|
|
461 mov S1,A1
|
|
462 bra reti
|
|
463 #else
|
|
464 bsr ___udivsi3
|
|
465 mov.l er3,er0
|
|
466 rts
|
|
467 #endif
|
|
468
|
|
469 .global ___divsi3
|
|
470 ___divsi3:
|
|
471 #ifdef __H8300__
|
|
472 PUSHP S2P
|
|
473 PUSHP S0P
|
|
474 PUSHP S1P
|
|
475 jsr divnorm
|
|
476 jsr divmodsi4
|
|
477 #else
|
|
478 PUSHP S2P
|
|
479 jsr divnorm
|
|
480 bsr ___udivsi3
|
|
481 #endif
|
|
482
|
|
483 ; examine what the sign should be
|
|
484 exitdiv:
|
|
485 btst #3,S2L
|
|
486 beq reti
|
|
487
|
|
488 ; should be -ve
|
|
489 #ifdef __H8300__
|
|
490 not A0H
|
|
491 not A1H
|
|
492 not A0L
|
|
493 not A1L
|
|
494
|
|
495 add #1,A1L
|
|
496 addx #0,A1H
|
|
497 addx #0,A0L
|
|
498 addx #0,A0H
|
|
499 #else /* __H8300H__ */
|
|
500 neg.l A0P
|
|
501 #endif
|
|
502
|
|
503 reti:
|
|
504 #ifdef __H8300__
|
|
505 POPP S1P
|
|
506 POPP S0P
|
|
507 #endif
|
|
508 POPP S2P
|
|
509 rts
|
|
510
|
|
511 ; takes A0/A1 numerator (A0P for H8/300H)
|
|
512 ; A2/A3 denominator (A1P for H8/300H)
|
|
513 ; returns A0/A1 quotient (A0P for H8/300H)
|
|
514 ; S0/S1 remainder (S0P for H8/300H)
|
|
515 ; trashes S2H
|
|
516
|
|
517 #ifdef __H8300__
|
|
518
|
|
519 divmodsi4:
|
|
520 sub.w S0,S0 ; zero play area
|
|
521 mov.w S0,S1
|
|
522 mov.b A2H,S2H
|
|
523 or A2L,S2H
|
|
524 or A3H,S2H
|
|
525 bne DenHighNonZero
|
|
526 mov.b A0H,A0H
|
|
527 bne NumByte0Zero
|
|
528 mov.b A0L,A0L
|
|
529 bne NumByte1Zero
|
|
530 mov.b A1H,A1H
|
|
531 bne NumByte2Zero
|
|
532 bra NumByte3Zero
|
|
533 NumByte0Zero:
|
|
534 mov.b A0H,S1L
|
|
535 divxu A3L,S1
|
|
536 mov.b S1L,A0H
|
|
537 NumByte1Zero:
|
|
538 mov.b A0L,S1L
|
|
539 divxu A3L,S1
|
|
540 mov.b S1L,A0L
|
|
541 NumByte2Zero:
|
|
542 mov.b A1H,S1L
|
|
543 divxu A3L,S1
|
|
544 mov.b S1L,A1H
|
|
545 NumByte3Zero:
|
|
546 mov.b A1L,S1L
|
|
547 divxu A3L,S1
|
|
548 mov.b S1L,A1L
|
|
549
|
|
550 mov.b S1H,S1L
|
|
551 mov.b #0x0,S1H
|
|
552 rts
|
|
553
|
|
554 ; have to do the divide by shift and test
|
|
555 DenHighNonZero:
|
|
556 mov.b A0H,S1L
|
|
557 mov.b A0L,A0H
|
|
558 mov.b A1H,A0L
|
|
559 mov.b A1L,A1H
|
|
560
|
|
561 mov.b #0,A1L
|
|
562 mov.b #24,S2H ; only do 24 iterations
|
|
563
|
|
564 nextbit:
|
|
565 add.w A1,A1 ; double the answer guess
|
|
566 rotxl A0L
|
|
567 rotxl A0H
|
|
568
|
|
569 rotxl S1L ; double remainder
|
|
570 rotxl S1H
|
|
571 rotxl S0L
|
|
572 rotxl S0H
|
|
573 sub.w A3,S1 ; does it all fit
|
|
574 subx A2L,S0L
|
|
575 subx A2H,S0H
|
|
576 bhs setone
|
|
577
|
|
578 add.w A3,S1 ; no, restore mistake
|
|
579 addx A2L,S0L
|
|
580 addx A2H,S0H
|
|
581
|
|
582 dec S2H
|
|
583 bne nextbit
|
|
584 rts
|
|
585
|
|
586 setone:
|
|
587 inc A1L
|
|
588 dec S2H
|
|
589 bne nextbit
|
|
590 rts
|
|
591
|
|
592 #else /* __H8300H__ */
|
|
593
|
|
594 ;; This function also computes the remainder and stores it in er3.
|
|
595 .global ___udivsi3
|
|
596 ___udivsi3:
|
|
597 mov.w A1E,A1E ; denominator top word 0?
|
|
598 bne DenHighNonZero
|
|
599
|
|
600 ; do it the easy way, see page 107 in manual
|
|
601 mov.w A0E,A2
|
|
602 extu.l A2P
|
|
603 divxu.w A1,A2P
|
|
604 mov.w A2E,A0E
|
|
605 divxu.w A1,A0P
|
|
606 mov.w A0E,A3
|
|
607 mov.w A2,A0E
|
|
608 extu.l A3P
|
|
609 rts
|
|
610
|
|
611 ; er0 = er0 / er1
|
|
612 ; er3 = er0 % er1
|
|
613 ; trashes er1 er2
|
|
614 ; expects er1 >= 2^16
|
|
615 DenHighNonZero:
|
|
616 mov.l er0,er3
|
|
617 mov.l er1,er2
|
|
618 #ifdef __H8300H__
|
|
619 divmod_L21:
|
|
620 shlr.l er0
|
|
621 shlr.l er2 ; make divisor < 2^16
|
|
622 mov.w e2,e2
|
|
623 bne divmod_L21
|
|
624 #else
|
|
625 shlr.l #2,er2 ; make divisor < 2^16
|
|
626 mov.w e2,e2
|
|
627 beq divmod_L22A
|
|
628 divmod_L21:
|
|
629 shlr.l #2,er0
|
|
630 divmod_L22:
|
|
631 shlr.l #2,er2 ; make divisor < 2^16
|
|
632 mov.w e2,e2
|
|
633 bne divmod_L21
|
|
634 divmod_L22A:
|
|
635 rotxl.w r2
|
|
636 bcs divmod_L23
|
|
637 shlr.l er0
|
|
638 bra divmod_L24
|
|
639 divmod_L23:
|
|
640 rotxr.w r2
|
|
641 shlr.l #2,er0
|
|
642 divmod_L24:
|
|
643 #endif
|
|
644 ;; At this point,
|
|
645 ;; er0 contains shifted dividend
|
|
646 ;; er1 contains divisor
|
|
647 ;; er2 contains shifted divisor
|
|
648 ;; er3 contains dividend, later remainder
|
|
649 divxu.w r2,er0 ; r0 now contains the approximate quotient (AQ)
|
|
650 extu.l er0
|
|
651 beq divmod_L25
|
|
652 subs #1,er0 ; er0 = AQ - 1
|
|
653 mov.w e1,r2
|
|
654 mulxu.w r0,er2 ; er2 = upper (AQ - 1) * divisor
|
|
655 sub.w r2,e3 ; dividend - 65536 * er2
|
|
656 mov.w r1,r2
|
|
657 mulxu.w r0,er2 ; compute er3 = remainder (tentative)
|
|
658 sub.l er2,er3 ; er3 = dividend - (AQ - 1) * divisor
|
|
659 divmod_L25:
|
|
660 cmp.l er1,er3 ; is divisor < remainder?
|
|
661 blo divmod_L26
|
|
662 adds #1,er0
|
|
663 sub.l er1,er3 ; correct the remainder
|
|
664 divmod_L26:
|
|
665 rts
|
|
666
|
|
667 #endif
|
|
668 #endif /* L_divsi3 */
|
|
669
|
|
670 #ifdef L_mulhi3
|
|
671
|
|
672 ;; HImode multiply.
|
|
673 ; The H8/300 only has an 8*8->16 multiply.
|
|
674 ; The answer is the same as:
|
|
675 ;
|
|
676 ; product = (srca.l * srcb.l) + ((srca.h * srcb.l) + (srcb.h * srca.l)) * 256
|
|
677 ; (we can ignore A1.h * A0.h cause that will all off the top)
|
|
678 ; A0 in
|
|
679 ; A1 in
|
|
680 ; A0 answer
|
|
681
|
|
682 #ifdef __H8300__
|
|
683 .section .text
|
|
684 .align 2
|
|
685 .global ___mulhi3
|
|
686 ___mulhi3:
|
|
687 mov.b A1L,A2L ; A2l gets srcb.l
|
|
688 mulxu A0L,A2 ; A2 gets first sub product
|
|
689
|
|
690 mov.b A0H,A3L ; prepare for
|
|
691 mulxu A1L,A3 ; second sub product
|
|
692
|
|
693 add.b A3L,A2H ; sum first two terms
|
|
694
|
|
695 mov.b A1H,A3L ; third sub product
|
|
696 mulxu A0L,A3
|
|
697
|
|
698 add.b A3L,A2H ; almost there
|
|
699 mov.w A2,A0 ; that is
|
|
700 rts
|
|
701
|
|
702 #endif
|
|
703 #endif /* L_mulhi3 */
|
|
704
|
|
705 #ifdef L_mulsi3
|
|
706
|
|
707 ;; SImode multiply.
|
|
708 ;;
|
|
709 ;; I think that shift and add may be sufficient for this. Using the
|
|
710 ;; supplied 8x8->16 would need 10 ops of 14 cycles each + overhead. This way
|
|
711 ;; the inner loop uses maybe 20 cycles + overhead, but terminates
|
|
712 ;; quickly on small args.
|
|
713 ;;
|
|
714 ;; A0/A1 src_a
|
|
715 ;; A2/A3 src_b
|
|
716 ;;
|
|
717 ;; while (a)
|
|
718 ;; {
|
|
719 ;; if (a & 1)
|
|
720 ;; r += b;
|
|
721 ;; a >>= 1;
|
|
722 ;; b <<= 1;
|
|
723 ;; }
|
|
724
|
|
725 .section .text
|
|
726 .align 2
|
|
727
|
|
728 #ifdef __H8300__
|
|
729
|
|
730 .global ___mulsi3
|
|
731 ___mulsi3:
|
|
732 PUSHP S0P
|
|
733 PUSHP S1P
|
|
734
|
|
735 sub.w S0,S0
|
|
736 sub.w S1,S1
|
|
737
|
|
738 ; while (a)
|
|
739 _top: mov.w A0,A0
|
|
740 bne _more
|
|
741 mov.w A1,A1
|
|
742 beq _done
|
|
743 _more: ; if (a & 1)
|
|
744 bld #0,A1L
|
|
745 bcc _nobit
|
|
746 ; r += b
|
|
747 add.w A3,S1
|
|
748 addx A2L,S0L
|
|
749 addx A2H,S0H
|
|
750 _nobit:
|
|
751 ; a >>= 1
|
|
752 shlr A0H
|
|
753 rotxr A0L
|
|
754 rotxr A1H
|
|
755 rotxr A1L
|
|
756
|
|
757 ; b <<= 1
|
|
758 add.w A3,A3
|
|
759 addx A2L,A2L
|
|
760 addx A2H,A2H
|
|
761 bra _top
|
|
762
|
|
763 _done:
|
|
764 mov.w S0,A0
|
|
765 mov.w S1,A1
|
|
766 POPP S1P
|
|
767 POPP S0P
|
|
768 rts
|
|
769
|
|
770 #else /* __H8300H__ */
|
|
771
|
|
772 ;
|
|
773 ; mulsi3 for H8/300H - based on Renesas SH implementation
|
|
774 ;
|
|
775 ; by Toshiyasu Morita
|
|
776 ;
|
|
777 ; Old code:
|
|
778 ;
|
|
779 ; 16b * 16b = 372 states (worst case)
|
|
780 ; 32b * 32b = 724 states (worst case)
|
|
781 ;
|
|
782 ; New code:
|
|
783 ;
|
|
784 ; 16b * 16b = 48 states
|
|
785 ; 16b * 32b = 72 states
|
|
786 ; 32b * 32b = 92 states
|
|
787 ;
|
|
788
|
|
789 .global ___mulsi3
|
|
790 ___mulsi3:
|
|
791 mov.w r1,r2 ; ( 2 states) b * d
|
|
792 mulxu r0,er2 ; (22 states)
|
|
793
|
|
794 mov.w e0,r3 ; ( 2 states) a * d
|
|
795 beq L_skip1 ; ( 4 states)
|
|
796 mulxu r1,er3 ; (22 states)
|
|
797 add.w r3,e2 ; ( 2 states)
|
|
798
|
|
799 L_skip1:
|
|
800 mov.w e1,r3 ; ( 2 states) c * b
|
|
801 beq L_skip2 ; ( 4 states)
|
|
802 mulxu r0,er3 ; (22 states)
|
|
803 add.w r3,e2 ; ( 2 states)
|
|
804
|
|
805 L_skip2:
|
|
806 mov.l er2,er0 ; ( 2 states)
|
|
807 rts ; (10 states)
|
|
808
|
|
809 #endif
|
|
810 #endif /* L_mulsi3 */
|
|
811 #ifdef L_fixunssfsi_asm
|
|
812 /* For the h8300 we use asm to save some bytes, to
|
|
813 allow more programs to fit into the tiny address
|
|
814 space. For the H8/300H and H8S, the C version is good enough. */
|
|
815 #ifdef __H8300__
|
|
816 /* We still treat NANs different than libgcc2.c, but then, the
|
|
817 behavior is undefined anyways. */
|
|
818 .global ___fixunssfsi
|
|
819 ___fixunssfsi:
|
|
820 cmp.b #0x4f,r0h
|
|
821 bge Large_num
|
|
822 jmp @___fixsfsi
|
|
823 Large_num:
|
|
824 bhi L_huge_num
|
|
825 xor.b #0x80,A0L
|
|
826 bmi L_shift8
|
|
827 L_huge_num:
|
|
828 mov.w #65535,A0
|
|
829 mov.w A0,A1
|
|
830 rts
|
|
831 L_shift8:
|
|
832 mov.b A0L,A0H
|
|
833 mov.b A1H,A0L
|
|
834 mov.b A1L,A1H
|
|
835 mov.b #0,A1L
|
|
836 rts
|
|
837 #endif
|
|
838 #endif /* L_fixunssfsi_asm */
|