comparison gcc/config/xtensa/ieee754-df.S @ 0:a06113de4d67

first commit
author kent <kent@cr.ie.u-ryukyu.ac.jp>
date Fri, 17 Jul 2009 14:47:48 +0900
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a06113de4d67
1 /* IEEE-754 double-precision functions for Xtensa
2 Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 #ifdef __XTENSA_EB__
27 #define xh a2
28 #define xl a3
29 #define yh a4
30 #define yl a5
31 #else
32 #define xh a3
33 #define xl a2
34 #define yh a5
35 #define yl a4
36 #endif
37
38 /* Warning! The branch displacements for some Xtensa branch instructions
39 are quite small, and this code has been carefully laid out to keep
40 branch targets in range. If you change anything, be sure to check that
41 the assembler is not relaxing anything to branch over a jump. */
42
43 #ifdef L_negdf2
44
45 .align 4
46 .global __negdf2
47 .type __negdf2, @function
48 __negdf2:
49 leaf_entry sp, 16
50 movi a4, 0x80000000
51 xor xh, xh, a4
52 leaf_return
53
54 #endif /* L_negdf2 */
55
56 #ifdef L_addsubdf3
57
58 /* Addition */
59 __adddf3_aux:
60
61 /* Handle NaNs and Infinities. (This code is placed before the
62 start of the function just to keep it in range of the limited
63 branch displacements.) */
64
65 .Ladd_xnan_or_inf:
66 /* If y is neither Infinity nor NaN, return x. */
67 bnall yh, a6, 1f
68 /* If x is a NaN, return it. Otherwise, return y. */
69 slli a7, xh, 12
70 or a7, a7, xl
71 beqz a7, .Ladd_ynan_or_inf
72 1: leaf_return
73
74 .Ladd_ynan_or_inf:
75 /* Return y. */
76 mov xh, yh
77 mov xl, yl
78 leaf_return
79
80 .Ladd_opposite_signs:
81 /* Operand signs differ. Do a subtraction. */
82 slli a7, a6, 11
83 xor yh, yh, a7
84 j .Lsub_same_sign
85
86 .align 4
87 .global __adddf3
88 .type __adddf3, @function
89 __adddf3:
90 leaf_entry sp, 16
91 movi a6, 0x7ff00000
92
93 /* Check if the two operands have the same sign. */
94 xor a7, xh, yh
95 bltz a7, .Ladd_opposite_signs
96
97 .Ladd_same_sign:
98 /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
99 ball xh, a6, .Ladd_xnan_or_inf
100 ball yh, a6, .Ladd_ynan_or_inf
101
102 /* Compare the exponents. The smaller operand will be shifted
103 right by the exponent difference and added to the larger
104 one. */
105 extui a7, xh, 20, 12
106 extui a8, yh, 20, 12
107 bltu a7, a8, .Ladd_shiftx
108
109 .Ladd_shifty:
110 /* Check if the smaller (or equal) exponent is zero. */
111 bnone yh, a6, .Ladd_yexpzero
112
113 /* Replace yh sign/exponent with 0x001. */
114 or yh, yh, a6
115 slli yh, yh, 11
116 srli yh, yh, 11
117
118 .Ladd_yexpdiff:
119 /* Compute the exponent difference. Optimize for difference < 32. */
120 sub a10, a7, a8
121 bgeui a10, 32, .Ladd_bigshifty
122
123 /* Shift yh/yl right by the exponent difference. Any bits that are
124 shifted out of yl are saved in a9 for rounding the result. */
125 ssr a10
126 movi a9, 0
127 src a9, yl, a9
128 src yl, yh, yl
129 srl yh, yh
130
131 .Ladd_addy:
132 /* Do the 64-bit addition. */
133 add xl, xl, yl
134 add xh, xh, yh
135 bgeu xl, yl, 1f
136 addi xh, xh, 1
137 1:
138 /* Check if the add overflowed into the exponent. */
139 extui a10, xh, 20, 12
140 beq a10, a7, .Ladd_round
141 mov a8, a7
142 j .Ladd_carry
143
144 .Ladd_yexpzero:
145 /* y is a subnormal value. Replace its sign/exponent with zero,
146 i.e., no implicit "1.0", and increment the apparent exponent
147 because subnormals behave as if they had the minimum (nonzero)
148 exponent. Test for the case when both exponents are zero. */
149 slli yh, yh, 12
150 srli yh, yh, 12
151 bnone xh, a6, .Ladd_bothexpzero
152 addi a8, a8, 1
153 j .Ladd_yexpdiff
154
155 .Ladd_bothexpzero:
156 /* Both exponents are zero. Handle this as a special case. There
157 is no need to shift or round, and the normal code for handling
158 a carry into the exponent field will not work because it
159 assumes there is an implicit "1.0" that needs to be added. */
160 add xl, xl, yl
161 add xh, xh, yh
162 bgeu xl, yl, 1f
163 addi xh, xh, 1
164 1: leaf_return
165
166 .Ladd_bigshifty:
167 /* Exponent difference > 64 -- just return the bigger value. */
168 bgeui a10, 64, 1b
169
170 /* Shift yh/yl right by the exponent difference. Any bits that are
171 shifted out are saved in a9 for rounding the result. */
172 ssr a10
173 sll a11, yl /* lost bits shifted out of yl */
174 src a9, yh, yl
175 srl yl, yh
176 movi yh, 0
177 beqz a11, .Ladd_addy
178 or a9, a9, a10 /* any positive, nonzero value will work */
179 j .Ladd_addy
180
181 .Ladd_xexpzero:
182 /* Same as "yexpzero" except skip handling the case when both
183 exponents are zero. */
184 slli xh, xh, 12
185 srli xh, xh, 12
186 addi a7, a7, 1
187 j .Ladd_xexpdiff
188
189 .Ladd_shiftx:
190 /* Same thing as the "shifty" code, but with x and y swapped. Also,
191 because the exponent difference is always nonzero in this version,
192 the shift sequence can use SLL and skip loading a constant zero. */
193 bnone xh, a6, .Ladd_xexpzero
194
195 or xh, xh, a6
196 slli xh, xh, 11
197 srli xh, xh, 11
198
199 .Ladd_xexpdiff:
200 sub a10, a8, a7
201 bgeui a10, 32, .Ladd_bigshiftx
202
203 ssr a10
204 sll a9, xl
205 src xl, xh, xl
206 srl xh, xh
207
208 .Ladd_addx:
209 add xl, xl, yl
210 add xh, xh, yh
211 bgeu xl, yl, 1f
212 addi xh, xh, 1
213 1:
214 /* Check if the add overflowed into the exponent. */
215 extui a10, xh, 20, 12
216 bne a10, a8, .Ladd_carry
217
218 .Ladd_round:
219 /* Round up if the leftover fraction is >= 1/2. */
220 bgez a9, 1f
221 addi xl, xl, 1
222 beqz xl, .Ladd_roundcarry
223
224 /* Check if the leftover fraction is exactly 1/2. */
225 slli a9, a9, 1
226 beqz a9, .Ladd_exactlyhalf
227 1: leaf_return
228
229 .Ladd_bigshiftx:
230 /* Mostly the same thing as "bigshifty".... */
231 bgeui a10, 64, .Ladd_returny
232
233 ssr a10
234 sll a11, xl
235 src a9, xh, xl
236 srl xl, xh
237 movi xh, 0
238 beqz a11, .Ladd_addx
239 or a9, a9, a10
240 j .Ladd_addx
241
242 .Ladd_returny:
243 mov xh, yh
244 mov xl, yl
245 leaf_return
246
247 .Ladd_carry:
248 /* The addition has overflowed into the exponent field, so the
249 value needs to be renormalized. The mantissa of the result
250 can be recovered by subtracting the original exponent and
251 adding 0x100000 (which is the explicit "1.0" for the
252 mantissa of the non-shifted operand -- the "1.0" for the
253 shifted operand was already added). The mantissa can then
254 be shifted right by one bit. The explicit "1.0" of the
255 shifted mantissa then needs to be replaced by the exponent,
256 incremented by one to account for the normalizing shift.
257 It is faster to combine these operations: do the shift first
258 and combine the additions and subtractions. If x is the
259 original exponent, the result is:
260 shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
261 or:
262 shifted mantissa + ((x + 1) << 19)
263 Note that the exponent is incremented here by leaving the
264 explicit "1.0" of the mantissa in the exponent field. */
265
266 /* Shift xh/xl right by one bit. Save the lsb of xl. */
267 mov a10, xl
268 ssai 1
269 src xl, xh, xl
270 srl xh, xh
271
272 /* See explanation above. The original exponent is in a8. */
273 addi a8, a8, 1
274 slli a8, a8, 19
275 add xh, xh, a8
276
277 /* Return an Infinity if the exponent overflowed. */
278 ball xh, a6, .Ladd_infinity
279
280 /* Same thing as the "round" code except the msb of the leftover
281 fraction is bit 0 of a10, with the rest of the fraction in a9. */
282 bbci.l a10, 0, 1f
283 addi xl, xl, 1
284 beqz xl, .Ladd_roundcarry
285 beqz a9, .Ladd_exactlyhalf
286 1: leaf_return
287
288 .Ladd_infinity:
289 /* Clear the mantissa. */
290 movi xl, 0
291 srli xh, xh, 20
292 slli xh, xh, 20
293
294 /* The sign bit may have been lost in a carry-out. Put it back. */
295 slli a8, a8, 1
296 or xh, xh, a8
297 leaf_return
298
299 .Ladd_exactlyhalf:
300 /* Round down to the nearest even value. */
301 srli xl, xl, 1
302 slli xl, xl, 1
303 leaf_return
304
305 .Ladd_roundcarry:
306 /* xl is always zero when the rounding increment overflows, so
307 there's no need to round it to an even value. */
308 addi xh, xh, 1
309 /* Overflow to the exponent is OK. */
310 leaf_return
311
312
313 /* Subtraction */
314 __subdf3_aux:
315
316 /* Handle NaNs and Infinities. (This code is placed before the
317 start of the function just to keep it in range of the limited
318 branch displacements.) */
319
320 .Lsub_xnan_or_inf:
321 /* If y is neither Infinity nor NaN, return x. */
322 bnall yh, a6, 1f
323 /* Both x and y are either NaN or Inf, so the result is NaN. */
324 movi a4, 0x80000 /* make it a quiet NaN */
325 or xh, xh, a4
326 1: leaf_return
327
328 .Lsub_ynan_or_inf:
329 /* Negate y and return it. */
330 slli a7, a6, 11
331 xor xh, yh, a7
332 mov xl, yl
333 leaf_return
334
335 .Lsub_opposite_signs:
336 /* Operand signs differ. Do an addition. */
337 slli a7, a6, 11
338 xor yh, yh, a7
339 j .Ladd_same_sign
340
341 .align 4
342 .global __subdf3
343 .type __subdf3, @function
344 __subdf3:
345 leaf_entry sp, 16
346 movi a6, 0x7ff00000
347
348 /* Check if the two operands have the same sign. */
349 xor a7, xh, yh
350 bltz a7, .Lsub_opposite_signs
351
352 .Lsub_same_sign:
353 /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
354 ball xh, a6, .Lsub_xnan_or_inf
355 ball yh, a6, .Lsub_ynan_or_inf
356
357 /* Compare the operands. In contrast to addition, the entire
358 value matters here. */
359 extui a7, xh, 20, 11
360 extui a8, yh, 20, 11
361 bltu xh, yh, .Lsub_xsmaller
362 beq xh, yh, .Lsub_compare_low
363
364 .Lsub_ysmaller:
365 /* Check if the smaller (or equal) exponent is zero. */
366 bnone yh, a6, .Lsub_yexpzero
367
368 /* Replace yh sign/exponent with 0x001. */
369 or yh, yh, a6
370 slli yh, yh, 11
371 srli yh, yh, 11
372
373 .Lsub_yexpdiff:
374 /* Compute the exponent difference. Optimize for difference < 32. */
375 sub a10, a7, a8
376 bgeui a10, 32, .Lsub_bigshifty
377
378 /* Shift yh/yl right by the exponent difference. Any bits that are
379 shifted out of yl are saved in a9 for rounding the result. */
380 ssr a10
381 movi a9, 0
382 src a9, yl, a9
383 src yl, yh, yl
384 srl yh, yh
385
386 .Lsub_suby:
387 /* Do the 64-bit subtraction. */
388 sub xh, xh, yh
389 bgeu xl, yl, 1f
390 addi xh, xh, -1
391 1: sub xl, xl, yl
392
393 /* Subtract the leftover bits in a9 from zero and propagate any
394 borrow from xh/xl. */
395 neg a9, a9
396 beqz a9, 1f
397 addi a5, xh, -1
398 moveqz xh, a5, xl
399 addi xl, xl, -1
400 1:
401 /* Check if the subtract underflowed into the exponent. */
402 extui a10, xh, 20, 11
403 beq a10, a7, .Lsub_round
404 j .Lsub_borrow
405
406 .Lsub_compare_low:
407 /* The high words are equal. Compare the low words. */
408 bltu xl, yl, .Lsub_xsmaller
409 bltu yl, xl, .Lsub_ysmaller
410 /* The operands are equal. Return 0.0. */
411 movi xh, 0
412 movi xl, 0
413 1: leaf_return
414
415 .Lsub_yexpzero:
416 /* y is a subnormal value. Replace its sign/exponent with zero,
417 i.e., no implicit "1.0". Unless x is also a subnormal, increment
418 y's apparent exponent because subnormals behave as if they had
419 the minimum (nonzero) exponent. */
420 slli yh, yh, 12
421 srli yh, yh, 12
422 bnone xh, a6, .Lsub_yexpdiff
423 addi a8, a8, 1
424 j .Lsub_yexpdiff
425
426 .Lsub_bigshifty:
427 /* Exponent difference > 64 -- just return the bigger value. */
428 bgeui a10, 64, 1b
429
430 /* Shift yh/yl right by the exponent difference. Any bits that are
431 shifted out are saved in a9 for rounding the result. */
432 ssr a10
433 sll a11, yl /* lost bits shifted out of yl */
434 src a9, yh, yl
435 srl yl, yh
436 movi yh, 0
437 beqz a11, .Lsub_suby
438 or a9, a9, a10 /* any positive, nonzero value will work */
439 j .Lsub_suby
440
441 .Lsub_xsmaller:
442 /* Same thing as the "ysmaller" code, but with x and y swapped and
443 with y negated. */
444 bnone xh, a6, .Lsub_xexpzero
445
446 or xh, xh, a6
447 slli xh, xh, 11
448 srli xh, xh, 11
449
450 .Lsub_xexpdiff:
451 sub a10, a8, a7
452 bgeui a10, 32, .Lsub_bigshiftx
453
454 ssr a10
455 movi a9, 0
456 src a9, xl, a9
457 src xl, xh, xl
458 srl xh, xh
459
460 /* Negate y. */
461 slli a11, a6, 11
462 xor yh, yh, a11
463
464 .Lsub_subx:
465 sub xl, yl, xl
466 sub xh, yh, xh
467 bgeu yl, xl, 1f
468 addi xh, xh, -1
469 1:
470 /* Subtract the leftover bits in a9 from zero and propagate any
471 borrow from xh/xl. */
472 neg a9, a9
473 beqz a9, 1f
474 addi a5, xh, -1
475 moveqz xh, a5, xl
476 addi xl, xl, -1
477 1:
478 /* Check if the subtract underflowed into the exponent. */
479 extui a10, xh, 20, 11
480 bne a10, a8, .Lsub_borrow
481
482 .Lsub_round:
483 /* Round up if the leftover fraction is >= 1/2. */
484 bgez a9, 1f
485 addi xl, xl, 1
486 beqz xl, .Lsub_roundcarry
487
488 /* Check if the leftover fraction is exactly 1/2. */
489 slli a9, a9, 1
490 beqz a9, .Lsub_exactlyhalf
491 1: leaf_return
492
493 .Lsub_xexpzero:
494 /* Same as "yexpzero". */
495 slli xh, xh, 12
496 srli xh, xh, 12
497 bnone yh, a6, .Lsub_xexpdiff
498 addi a7, a7, 1
499 j .Lsub_xexpdiff
500
501 .Lsub_bigshiftx:
502 /* Mostly the same thing as "bigshifty", but with the sign bit of the
503 shifted value set so that the subsequent subtraction flips the
504 sign of y. */
505 bgeui a10, 64, .Lsub_returny
506
507 ssr a10
508 sll a11, xl
509 src a9, xh, xl
510 srl xl, xh
511 slli xh, a6, 11 /* set sign bit of xh */
512 beqz a11, .Lsub_subx
513 or a9, a9, a10
514 j .Lsub_subx
515
516 .Lsub_returny:
517 /* Negate and return y. */
518 slli a7, a6, 11
519 xor xh, yh, a7
520 mov xl, yl
521 leaf_return
522
523 .Lsub_borrow:
524 /* The subtraction has underflowed into the exponent field, so the
525 value needs to be renormalized. Shift the mantissa left as
526 needed to remove any leading zeros and adjust the exponent
527 accordingly. If the exponent is not large enough to remove
528 all the leading zeros, the result will be a subnormal value. */
529
530 slli a8, xh, 12
531 beqz a8, .Lsub_xhzero
532 do_nsau a6, a8, a7, a11
533 srli a8, a8, 12
534 bge a6, a10, .Lsub_subnormal
535 addi a6, a6, 1
536
537 .Lsub_shift_lt32:
538 /* Shift the mantissa (a8/xl/a9) left by a6. */
539 ssl a6
540 src a8, a8, xl
541 src xl, xl, a9
542 sll a9, a9
543
544 /* Combine the shifted mantissa with the sign and exponent,
545 decrementing the exponent by a6. (The exponent has already
546 been decremented by one due to the borrow from the subtraction,
547 but adding the mantissa will increment the exponent by one.) */
548 srli xh, xh, 20
549 sub xh, xh, a6
550 slli xh, xh, 20
551 add xh, xh, a8
552 j .Lsub_round
553
554 .Lsub_exactlyhalf:
555 /* Round down to the nearest even value. */
556 srli xl, xl, 1
557 slli xl, xl, 1
558 leaf_return
559
560 .Lsub_roundcarry:
561 /* xl is always zero when the rounding increment overflows, so
562 there's no need to round it to an even value. */
563 addi xh, xh, 1
564 /* Overflow to the exponent is OK. */
565 leaf_return
566
567 .Lsub_xhzero:
568 /* When normalizing the result, all the mantissa bits in the high
569 word are zero. Shift by "20 + (leading zero count of xl) + 1". */
570 do_nsau a6, xl, a7, a11
571 addi a6, a6, 21
572 blt a10, a6, .Lsub_subnormal
573
574 .Lsub_normalize_shift:
575 bltui a6, 32, .Lsub_shift_lt32
576
577 ssl a6
578 src a8, xl, a9
579 sll xl, a9
580 movi a9, 0
581
582 srli xh, xh, 20
583 sub xh, xh, a6
584 slli xh, xh, 20
585 add xh, xh, a8
586 j .Lsub_round
587
588 .Lsub_subnormal:
589 /* The exponent is too small to shift away all the leading zeros.
590 Set a6 to the current exponent (which has already been
591 decremented by the borrow) so that the exponent of the result
592 will be zero. Do not add 1 to a6 in this case, because: (1)
593 adding the mantissa will not increment the exponent, so there is
594 no need to subtract anything extra from the exponent to
595 compensate, and (2) the effective exponent of a subnormal is 1
596 not 0 so the shift amount must be 1 smaller than normal. */
597 mov a6, a10
598 j .Lsub_normalize_shift
599
600 #endif /* L_addsubdf3 */
601
602 #ifdef L_muldf3
603
604 /* Multiplication */
605 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
606 #define XCHAL_NO_MUL 1
607 #endif
608
609 __muldf3_aux:
610
611 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
612 (This code is placed before the start of the function just to
613 keep it in range of the limited branch displacements.) */
614
615 .Lmul_xexpzero:
616 /* Clear the sign bit of x. */
617 slli xh, xh, 1
618 srli xh, xh, 1
619
620 /* If x is zero, return zero. */
621 or a10, xh, xl
622 beqz a10, .Lmul_return_zero
623
624 /* Normalize x. Adjust the exponent in a8. */
625 beqz xh, .Lmul_xh_zero
626 do_nsau a10, xh, a11, a12
627 addi a10, a10, -11
628 ssl a10
629 src xh, xh, xl
630 sll xl, xl
631 movi a8, 1
632 sub a8, a8, a10
633 j .Lmul_xnormalized
634 .Lmul_xh_zero:
635 do_nsau a10, xl, a11, a12
636 addi a10, a10, -11
637 movi a8, -31
638 sub a8, a8, a10
639 ssl a10
640 bltz a10, .Lmul_xl_srl
641 sll xh, xl
642 movi xl, 0
643 j .Lmul_xnormalized
644 .Lmul_xl_srl:
645 srl xh, xl
646 sll xl, xl
647 j .Lmul_xnormalized
648
649 .Lmul_yexpzero:
650 /* Clear the sign bit of y. */
651 slli yh, yh, 1
652 srli yh, yh, 1
653
654 /* If y is zero, return zero. */
655 or a10, yh, yl
656 beqz a10, .Lmul_return_zero
657
658 /* Normalize y. Adjust the exponent in a9. */
659 beqz yh, .Lmul_yh_zero
660 do_nsau a10, yh, a11, a12
661 addi a10, a10, -11
662 ssl a10
663 src yh, yh, yl
664 sll yl, yl
665 movi a9, 1
666 sub a9, a9, a10
667 j .Lmul_ynormalized
668 .Lmul_yh_zero:
669 do_nsau a10, yl, a11, a12
670 addi a10, a10, -11
671 movi a9, -31
672 sub a9, a9, a10
673 ssl a10
674 bltz a10, .Lmul_yl_srl
675 sll yh, yl
676 movi yl, 0
677 j .Lmul_ynormalized
678 .Lmul_yl_srl:
679 srl yh, yl
680 sll yl, yl
681 j .Lmul_ynormalized
682
683 .Lmul_return_zero:
684 /* Return zero with the appropriate sign bit. */
685 srli xh, a7, 31
686 slli xh, xh, 31
687 movi xl, 0
688 j .Lmul_done
689
690 .Lmul_xnan_or_inf:
691 /* If y is zero, return NaN. */
692 bnez yl, 1f
693 slli a8, yh, 1
694 bnez a8, 1f
695 movi a4, 0x80000 /* make it a quiet NaN */
696 or xh, xh, a4
697 j .Lmul_done
698 1:
699 /* If y is NaN, return y. */
700 bnall yh, a6, .Lmul_returnx
701 slli a8, yh, 12
702 or a8, a8, yl
703 beqz a8, .Lmul_returnx
704
705 .Lmul_returny:
706 mov xh, yh
707 mov xl, yl
708
709 .Lmul_returnx:
710 /* Set the sign bit and return. */
711 extui a7, a7, 31, 1
712 slli xh, xh, 1
713 ssai 1
714 src xh, a7, xh
715 j .Lmul_done
716
717 .Lmul_ynan_or_inf:
718 /* If x is zero, return NaN. */
719 bnez xl, .Lmul_returny
720 slli a8, xh, 1
721 bnez a8, .Lmul_returny
722 movi a7, 0x80000 /* make it a quiet NaN */
723 or xh, yh, a7
724 j .Lmul_done
725
726 .align 4
727 .global __muldf3
728 .type __muldf3, @function
729 __muldf3:
730 #if __XTENSA_CALL0_ABI__
731 leaf_entry sp, 32
732 addi sp, sp, -32
733 s32i a12, sp, 16
734 s32i a13, sp, 20
735 s32i a14, sp, 24
736 s32i a15, sp, 28
737 #elif XCHAL_NO_MUL
738 /* This is not really a leaf function; allocate enough stack space
739 to allow CALL12s to a helper function. */
740 leaf_entry sp, 64
741 #else
742 leaf_entry sp, 32
743 #endif
744 movi a6, 0x7ff00000
745
746 /* Get the sign of the result. */
747 xor a7, xh, yh
748
749 /* Check for NaN and infinity. */
750 ball xh, a6, .Lmul_xnan_or_inf
751 ball yh, a6, .Lmul_ynan_or_inf
752
753 /* Extract the exponents. */
754 extui a8, xh, 20, 11
755 extui a9, yh, 20, 11
756
757 beqz a8, .Lmul_xexpzero
758 .Lmul_xnormalized:
759 beqz a9, .Lmul_yexpzero
760 .Lmul_ynormalized:
761
762 /* Add the exponents. */
763 add a8, a8, a9
764
765 /* Replace sign/exponent fields with explicit "1.0". */
766 movi a10, 0x1fffff
767 or xh, xh, a6
768 and xh, xh, a10
769 or yh, yh, a6
770 and yh, yh, a10
771
772 /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6.
773 The least-significant word of the result is thrown away except
774 that if it is nonzero, the lsb of a6 is set to 1. */
775 #if XCHAL_HAVE_MUL32_HIGH
776
777 /* Compute a6 with any carry-outs in a10. */
778 movi a10, 0
779 mull a6, xl, yh
780 mull a11, xh, yl
781 add a6, a6, a11
782 bgeu a6, a11, 1f
783 addi a10, a10, 1
784 1:
785 muluh a11, xl, yl
786 add a6, a6, a11
787 bgeu a6, a11, 1f
788 addi a10, a10, 1
789 1:
790 /* If the low word of the result is nonzero, set the lsb of a6. */
791 mull a11, xl, yl
792 beqz a11, 1f
793 movi a9, 1
794 or a6, a6, a9
795 1:
796 /* Compute xl with any carry-outs in a9. */
797 movi a9, 0
798 mull a11, xh, yh
799 add a10, a10, a11
800 bgeu a10, a11, 1f
801 addi a9, a9, 1
802 1:
803 muluh a11, xh, yl
804 add a10, a10, a11
805 bgeu a10, a11, 1f
806 addi a9, a9, 1
807 1:
808 muluh xl, xl, yh
809 add xl, xl, a10
810 bgeu xl, a10, 1f
811 addi a9, a9, 1
812 1:
813 /* Compute xh. */
814 muluh xh, xh, yh
815 add xh, xh, a9
816
817 #else /* ! XCHAL_HAVE_MUL32_HIGH */
818
819 /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
820 products. These partial products are:
821
822 0 xll * yll
823
824 1 xll * ylh
825 2 xlh * yll
826
827 3 xll * yhl
828 4 xlh * ylh
829 5 xhl * yll
830
831 6 xll * yhh
832 7 xlh * yhl
833 8 xhl * ylh
834 9 xhh * yll
835
836 10 xlh * yhh
837 11 xhl * yhl
838 12 xhh * ylh
839
840 13 xhl * yhh
841 14 xhh * yhl
842
843 15 xhh * yhh
844
845 where the input chunks are (hh, hl, lh, ll). If using the Mul16
846 or Mul32 multiplier options, these input chunks must be stored in
847 separate registers. For Mac16, the UMUL.AA.* opcodes can specify
848 that the inputs come from either half of the registers, so there
849 is no need to shift them out ahead of time. If there is no
850 multiply hardware, the 16-bit chunks can be extracted when setting
851 up the arguments to the separate multiply function. */
852
853 /* Save a7 since it is needed to hold a temporary value. */
854 s32i a7, sp, 4
855 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
856 /* Calling a separate multiply function will clobber a0 and requires
857 use of a8 as a temporary, so save those values now. (The function
858 uses a custom ABI so nothing else needs to be saved.) */
859 s32i a0, sp, 0
860 s32i a8, sp, 8
861 #endif
862
863 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
864
865 #define xlh a12
866 #define ylh a13
867 #define xhh a14
868 #define yhh a15
869
870 /* Get the high halves of the inputs into registers. */
871 srli xlh, xl, 16
872 srli ylh, yl, 16
873 srli xhh, xh, 16
874 srli yhh, yh, 16
875
876 #define xll xl
877 #define yll yl
878 #define xhl xh
879 #define yhl yh
880
881 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
882 /* Clear the high halves of the inputs. This does not matter
883 for MUL16 because the high bits are ignored. */
884 extui xl, xl, 0, 16
885 extui xh, xh, 0, 16
886 extui yl, yl, 0, 16
887 extui yh, yh, 0, 16
888 #endif
889 #endif /* MUL16 || MUL32 */
890
891
892 #if XCHAL_HAVE_MUL16
893
894 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
895 mul16u dst, xreg ## xhalf, yreg ## yhalf
896
897 #elif XCHAL_HAVE_MUL32
898
899 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
900 mull dst, xreg ## xhalf, yreg ## yhalf
901
902 #elif XCHAL_HAVE_MAC16
903
904 /* The preprocessor insists on inserting a space when concatenating after
905 a period in the definition of do_mul below. These macros are a workaround
906 using underscores instead of periods when doing the concatenation. */
907 #define umul_aa_ll umul.aa.ll
908 #define umul_aa_lh umul.aa.lh
909 #define umul_aa_hl umul.aa.hl
910 #define umul_aa_hh umul.aa.hh
911
912 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
913 umul_aa_ ## xhalf ## yhalf xreg, yreg; \
914 rsr dst, ACCLO
915
916 #else /* no multiply hardware */
917
918 #define set_arg_l(dst, src) \
919 extui dst, src, 0, 16
920 #define set_arg_h(dst, src) \
921 srli dst, src, 16
922
923 #if __XTENSA_CALL0_ABI__
924 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
925 set_arg_ ## xhalf (a13, xreg); \
926 set_arg_ ## yhalf (a14, yreg); \
927 call0 .Lmul_mulsi3; \
928 mov dst, a12
929 #else
930 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
931 set_arg_ ## xhalf (a14, xreg); \
932 set_arg_ ## yhalf (a15, yreg); \
933 call12 .Lmul_mulsi3; \
934 mov dst, a14
935 #endif /* __XTENSA_CALL0_ABI__ */
936
937 #endif /* no multiply hardware */
938
939 /* Add pp1 and pp2 into a10 with carry-out in a9. */
940 do_mul(a10, xl, l, yl, h) /* pp 1 */
941 do_mul(a11, xl, h, yl, l) /* pp 2 */
942 movi a9, 0
943 add a10, a10, a11
944 bgeu a10, a11, 1f
945 addi a9, a9, 1
946 1:
947 /* Initialize a6 with a9/a10 shifted into position. Note that
948 this value can be safely incremented without any carry-outs. */
949 ssai 16
950 src a6, a9, a10
951
952 /* Compute the low word into a10. */
953 do_mul(a11, xl, l, yl, l) /* pp 0 */
954 sll a10, a10
955 add a10, a10, a11
956 bgeu a10, a11, 1f
957 addi a6, a6, 1
958 1:
959 /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
960 This is good enough to determine the low half of a6, so that any
961 nonzero bits from the low word of the result can be collapsed
962 into a6, freeing up a register. */
963 movi a9, 0
964 do_mul(a11, xl, l, yh, l) /* pp 3 */
965 add a6, a6, a11
966 bgeu a6, a11, 1f
967 addi a9, a9, 1
968 1:
969 do_mul(a11, xl, h, yl, h) /* pp 4 */
970 add a6, a6, a11
971 bgeu a6, a11, 1f
972 addi a9, a9, 1
973 1:
974 do_mul(a11, xh, l, yl, l) /* pp 5 */
975 add a6, a6, a11
976 bgeu a6, a11, 1f
977 addi a9, a9, 1
978 1:
979 /* Collapse any nonzero bits from the low word into a6. */
980 beqz a10, 1f
981 movi a11, 1
982 or a6, a6, a11
983 1:
984 /* Add pp6-9 into a11 with carry-outs in a10. */
985 do_mul(a7, xl, l, yh, h) /* pp 6 */
986 do_mul(a11, xh, h, yl, l) /* pp 9 */
987 movi a10, 0
988 add a11, a11, a7
989 bgeu a11, a7, 1f
990 addi a10, a10, 1
991 1:
992 do_mul(a7, xl, h, yh, l) /* pp 7 */
993 add a11, a11, a7
994 bgeu a11, a7, 1f
995 addi a10, a10, 1
996 1:
997 do_mul(a7, xh, l, yl, h) /* pp 8 */
998 add a11, a11, a7
999 bgeu a11, a7, 1f
1000 addi a10, a10, 1
1001 1:
1002 /* Shift a10/a11 into position, and add low half of a11 to a6. */
1003 src a10, a10, a11
1004 add a10, a10, a9
1005 sll a11, a11
1006 add a6, a6, a11
1007 bgeu a6, a11, 1f
1008 addi a10, a10, 1
1009 1:
1010 /* Add pp10-12 into xl with carry-outs in a9. */
1011 movi a9, 0
1012 do_mul(xl, xl, h, yh, h) /* pp 10 */
1013 add xl, xl, a10
1014 bgeu xl, a10, 1f
1015 addi a9, a9, 1
1016 1:
1017 do_mul(a10, xh, l, yh, l) /* pp 11 */
1018 add xl, xl, a10
1019 bgeu xl, a10, 1f
1020 addi a9, a9, 1
1021 1:
1022 do_mul(a10, xh, h, yl, h) /* pp 12 */
1023 add xl, xl, a10
1024 bgeu xl, a10, 1f
1025 addi a9, a9, 1
1026 1:
1027 /* Add pp13-14 into a11 with carry-outs in a10. */
1028 do_mul(a11, xh, l, yh, h) /* pp 13 */
1029 do_mul(a7, xh, h, yh, l) /* pp 14 */
1030 movi a10, 0
1031 add a11, a11, a7
1032 bgeu a11, a7, 1f
1033 addi a10, a10, 1
1034 1:
1035 /* Shift a10/a11 into position, and add low half of a11 to a6. */
1036 src a10, a10, a11
1037 add a10, a10, a9
1038 sll a11, a11
1039 add xl, xl, a11
1040 bgeu xl, a11, 1f
1041 addi a10, a10, 1
1042 1:
1043 /* Compute xh. */
1044 do_mul(xh, xh, h, yh, h) /* pp 15 */
1045 add xh, xh, a10
1046
1047 /* Restore values saved on the stack during the multiplication. */
1048 l32i a7, sp, 4
1049 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
1050 l32i a0, sp, 0
1051 l32i a8, sp, 8
1052 #endif
1053 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
1054
1055 /* Shift left by 12 bits, unless there was a carry-out from the
1056 multiply, in which case, shift by 11 bits and increment the
1057 exponent. Note: It is convenient to use the constant 0x3ff
1058 instead of 0x400 when removing the extra exponent bias (so that
1059 it is easy to construct 0x7fe for the overflow check). Reverse
1060 the logic here to decrement the exponent sum by one unless there
1061 was a carry-out. */
1062 movi a4, 11
1063 srli a5, xh, 21 - 12
1064 bnez a5, 1f
1065 addi a4, a4, 1
1066 addi a8, a8, -1
1067 1: ssl a4
1068 src xh, xh, xl
1069 src xl, xl, a6
1070 sll a6, a6
1071
1072 /* Subtract the extra bias from the exponent sum (plus one to account
1073 for the explicit "1.0" of the mantissa that will be added to the
1074 exponent in the final result). */
1075 movi a4, 0x3ff
1076 sub a8, a8, a4
1077
1078 /* Check for over/underflow. The value in a8 is one less than the
1079 final exponent, so values in the range 0..7fd are OK here. */
1080 slli a4, a4, 1 /* 0x7fe */
1081 bgeu a8, a4, .Lmul_overflow
1082
1083 .Lmul_round:
1084 /* Round. */
1085 bgez a6, .Lmul_rounded
1086 addi xl, xl, 1
1087 beqz xl, .Lmul_roundcarry
1088 slli a6, a6, 1
1089 beqz a6, .Lmul_exactlyhalf
1090
1091 .Lmul_rounded:
1092 /* Add the exponent to the mantissa. */
1093 slli a8, a8, 20
1094 add xh, xh, a8
1095
1096 .Lmul_addsign:
1097 /* Add the sign bit. */
1098 srli a7, a7, 31
1099 slli a7, a7, 31
1100 or xh, xh, a7
1101
1102 .Lmul_done:
1103 #if __XTENSA_CALL0_ABI__
1104 l32i a12, sp, 16
1105 l32i a13, sp, 20
1106 l32i a14, sp, 24
1107 l32i a15, sp, 28
1108 addi sp, sp, 32
1109 #endif
1110 leaf_return
1111
1112 .Lmul_exactlyhalf:
1113 /* Round down to the nearest even value. */
1114 srli xl, xl, 1
1115 slli xl, xl, 1
1116 j .Lmul_rounded
1117
1118 .Lmul_roundcarry:
1119 /* xl is always zero when the rounding increment overflows, so
1120 there's no need to round it to an even value. */
1121 addi xh, xh, 1
1122 /* Overflow is OK -- it will be added to the exponent. */
1123 j .Lmul_rounded
1124
1125 .Lmul_overflow:
1126 bltz a8, .Lmul_underflow
1127 /* Return +/- Infinity. */
1128 addi a8, a4, 1 /* 0x7ff */
1129 slli xh, a8, 20
1130 movi xl, 0
1131 j .Lmul_addsign
1132
1133 .Lmul_underflow:
1134 /* Create a subnormal value, where the exponent field contains zero,
1135 but the effective exponent is 1. The value of a8 is one less than
1136 the actual exponent, so just negate it to get the shift amount. */
1137 neg a8, a8
1138 mov a9, a6
1139 ssr a8
1140 bgeui a8, 32, .Lmul_bigshift
1141
1142 /* Shift xh/xl right. Any bits that are shifted out of xl are saved
1143 in a6 (combined with the shifted-out bits currently in a6) for
1144 rounding the result. */
1145 sll a6, xl
1146 src xl, xh, xl
1147 srl xh, xh
1148 j 1f
1149
1150 .Lmul_bigshift:
1151 bgeui a8, 64, .Lmul_flush_to_zero
1152 sll a10, xl /* lost bits shifted out of xl */
1153 src a6, xh, xl
1154 srl xl, xh
1155 movi xh, 0
1156 or a9, a9, a10
1157
1158 /* Set the exponent to zero. */
1159 1: movi a8, 0
1160
1161 /* Pack any nonzero bits shifted out into a6. */
1162 beqz a9, .Lmul_round
1163 movi a9, 1
1164 or a6, a6, a9
1165 j .Lmul_round
1166
1167 .Lmul_flush_to_zero:
1168 /* Return zero with the appropriate sign bit. */
1169 srli xh, a7, 31
1170 slli xh, xh, 31
1171 movi xl, 0
1172 j .Lmul_done
1173
1174 #if XCHAL_NO_MUL
1175
1176 /* For Xtensa processors with no multiply hardware, this simplified
1177 version of _mulsi3 is used for multiplying 16-bit chunks of
1178 the floating-point mantissas. When using CALL0, this function
1179 uses a custom ABI: the inputs are passed in a13 and a14, the
1180 result is returned in a12, and a8 and a15 are clobbered. */
1181 .align 4
1182 .Lmul_mulsi3:
1183 leaf_entry sp, 16
1184 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
1185 movi \dst, 0
1186 1: add \tmp1, \src2, \dst
1187 extui \tmp2, \src1, 0, 1
1188 movnez \dst, \tmp1, \tmp2
1189
1190 do_addx2 \tmp1, \src2, \dst, \tmp1
1191 extui \tmp2, \src1, 1, 1
1192 movnez \dst, \tmp1, \tmp2
1193
1194 do_addx4 \tmp1, \src2, \dst, \tmp1
1195 extui \tmp2, \src1, 2, 1
1196 movnez \dst, \tmp1, \tmp2
1197
1198 do_addx8 \tmp1, \src2, \dst, \tmp1
1199 extui \tmp2, \src1, 3, 1
1200 movnez \dst, \tmp1, \tmp2
1201
1202 srli \src1, \src1, 4
1203 slli \src2, \src2, 4
1204 bnez \src1, 1b
1205 .endm
1206 #if __XTENSA_CALL0_ABI__
1207 mul_mulsi3_body a12, a13, a14, a15, a8
1208 #else
1209 /* The result will be written into a2, so save that argument in a4. */
1210 mov a4, a2
1211 mul_mulsi3_body a2, a4, a3, a5, a6
1212 #endif
1213 leaf_return
1214 #endif /* XCHAL_NO_MUL */
1215 #endif /* L_muldf3 */
1216
1217 #ifdef L_divdf3
1218
1219 /* Division */
1220 __divdf3_aux:
1221
1222 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
1223 (This code is placed before the start of the function just to
1224 keep it in range of the limited branch displacements.) */
1225
1226 .Ldiv_yexpzero:
1227 /* Clear the sign bit of y. */
1228 slli yh, yh, 1
1229 srli yh, yh, 1
1230
1231 /* Check for division by zero. */
1232 or a10, yh, yl
1233 beqz a10, .Ldiv_yzero
1234
1235 /* Normalize y. Adjust the exponent in a9. */
1236 beqz yh, .Ldiv_yh_zero
1237 do_nsau a10, yh, a11, a9
1238 addi a10, a10, -11
1239 ssl a10
1240 src yh, yh, yl
1241 sll yl, yl
1242 movi a9, 1
1243 sub a9, a9, a10
1244 j .Ldiv_ynormalized
1245 .Ldiv_yh_zero:
1246 do_nsau a10, yl, a11, a9
1247 addi a10, a10, -11
1248 movi a9, -31
1249 sub a9, a9, a10
1250 ssl a10
1251 bltz a10, .Ldiv_yl_srl
1252 sll yh, yl
1253 movi yl, 0
1254 j .Ldiv_ynormalized
1255 .Ldiv_yl_srl:
1256 srl yh, yl
1257 sll yl, yl
1258 j .Ldiv_ynormalized
1259
1260 .Ldiv_yzero:
1261 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
1262 slli xh, xh, 1
1263 srli xh, xh, 1
1264 or xl, xl, xh
1265 srli xh, a7, 31
1266 slli xh, xh, 31
1267 or xh, xh, a6
1268 bnez xl, 1f
1269 movi a4, 0x80000 /* make it a quiet NaN */
1270 or xh, xh, a4
1271 1: movi xl, 0
1272 leaf_return
1273
1274 .Ldiv_xexpzero:
1275 /* Clear the sign bit of x. */
1276 slli xh, xh, 1
1277 srli xh, xh, 1
1278
1279 /* If x is zero, return zero. */
1280 or a10, xh, xl
1281 beqz a10, .Ldiv_return_zero
1282
1283 /* Normalize x. Adjust the exponent in a8. */
1284 beqz xh, .Ldiv_xh_zero
1285 do_nsau a10, xh, a11, a8
1286 addi a10, a10, -11
1287 ssl a10
1288 src xh, xh, xl
1289 sll xl, xl
1290 movi a8, 1
1291 sub a8, a8, a10
1292 j .Ldiv_xnormalized
1293 .Ldiv_xh_zero:
1294 do_nsau a10, xl, a11, a8
1295 addi a10, a10, -11
1296 movi a8, -31
1297 sub a8, a8, a10
1298 ssl a10
1299 bltz a10, .Ldiv_xl_srl
1300 sll xh, xl
1301 movi xl, 0
1302 j .Ldiv_xnormalized
1303 .Ldiv_xl_srl:
1304 srl xh, xl
1305 sll xl, xl
1306 j .Ldiv_xnormalized
1307
1308 .Ldiv_return_zero:
1309 /* Return zero with the appropriate sign bit. */
1310 srli xh, a7, 31
1311 slli xh, xh, 31
1312 movi xl, 0
1313 leaf_return
1314
1315 .Ldiv_xnan_or_inf:
1316 /* Set the sign bit of the result. */
1317 srli a7, yh, 31
1318 slli a7, a7, 31
1319 xor xh, xh, a7
1320 /* If y is NaN or Inf, return NaN. */
1321 bnall yh, a6, 1f
1322 movi a4, 0x80000 /* make it a quiet NaN */
1323 or xh, xh, a4
1324 1: leaf_return
1325
1326 .Ldiv_ynan_or_inf:
1327 /* If y is Infinity, return zero. */
1328 slli a8, yh, 12
1329 or a8, a8, yl
1330 beqz a8, .Ldiv_return_zero
1331 /* y is NaN; return it. */
1332 mov xh, yh
1333 mov xl, yl
1334 leaf_return
1335
1336 .Ldiv_highequal1:
1337 bltu xl, yl, 2f
1338 j 3f
1339
1340 .align 4
1341 .global __divdf3
1342 .type __divdf3, @function
1343 __divdf3:
1344 leaf_entry sp, 16
1345 movi a6, 0x7ff00000
1346
1347 /* Get the sign of the result. */
1348 xor a7, xh, yh
1349
1350 /* Check for NaN and infinity. */
1351 ball xh, a6, .Ldiv_xnan_or_inf
1352 ball yh, a6, .Ldiv_ynan_or_inf
1353
1354 /* Extract the exponents. */
1355 extui a8, xh, 20, 11
1356 extui a9, yh, 20, 11
1357
1358 beqz a9, .Ldiv_yexpzero
1359 .Ldiv_ynormalized:
1360 beqz a8, .Ldiv_xexpzero
1361 .Ldiv_xnormalized:
1362
1363 /* Subtract the exponents. */
1364 sub a8, a8, a9
1365
1366 /* Replace sign/exponent fields with explicit "1.0". */
1367 movi a10, 0x1fffff
1368 or xh, xh, a6
1369 and xh, xh, a10
1370 or yh, yh, a6
1371 and yh, yh, a10
1372
1373 /* Set SAR for left shift by one. */
1374 ssai (32 - 1)
1375
1376 /* The first digit of the mantissa division must be a one.
1377 Shift x (and adjust the exponent) as needed to make this true. */
1378 bltu yh, xh, 3f
1379 beq yh, xh, .Ldiv_highequal1
1380 2: src xh, xh, xl
1381 sll xl, xl
1382 addi a8, a8, -1
1383 3:
1384 /* Do the first subtraction and shift. */
1385 sub xh, xh, yh
1386 bgeu xl, yl, 1f
1387 addi xh, xh, -1
1388 1: sub xl, xl, yl
1389 src xh, xh, xl
1390 sll xl, xl
1391
1392 /* Put the quotient into a10/a11. */
1393 movi a10, 0
1394 movi a11, 1
1395
1396 /* Divide one bit at a time for 52 bits. */
1397 movi a9, 52
1398 #if XCHAL_HAVE_LOOPS
1399 loop a9, .Ldiv_loopend
1400 #endif
1401 .Ldiv_loop:
1402 /* Shift the quotient << 1. */
1403 src a10, a10, a11
1404 sll a11, a11
1405
1406 /* Is this digit a 0 or 1? */
1407 bltu xh, yh, 3f
1408 beq xh, yh, .Ldiv_highequal2
1409
1410 /* Output a 1 and subtract. */
1411 2: addi a11, a11, 1
1412 sub xh, xh, yh
1413 bgeu xl, yl, 1f
1414 addi xh, xh, -1
1415 1: sub xl, xl, yl
1416
1417 /* Shift the dividend << 1. */
1418 3: src xh, xh, xl
1419 sll xl, xl
1420
1421 #if !XCHAL_HAVE_LOOPS
1422 addi a9, a9, -1
1423 bnez a9, .Ldiv_loop
1424 #endif
1425 .Ldiv_loopend:
1426
1427 /* Add the exponent bias (less one to account for the explicit "1.0"
1428 of the mantissa that will be added to the exponent in the final
1429 result). */
1430 movi a9, 0x3fe
1431 add a8, a8, a9
1432
1433 /* Check for over/underflow. The value in a8 is one less than the
1434 final exponent, so values in the range 0..7fd are OK here. */
1435 addmi a9, a9, 0x400 /* 0x7fe */
1436 bgeu a8, a9, .Ldiv_overflow
1437
1438 .Ldiv_round:
1439 /* Round. The remainder (<< 1) is in xh/xl. */
1440 bltu xh, yh, .Ldiv_rounded
1441 beq xh, yh, .Ldiv_highequal3
1442 .Ldiv_roundup:
1443 addi a11, a11, 1
1444 beqz a11, .Ldiv_roundcarry
1445
1446 .Ldiv_rounded:
1447 mov xl, a11
1448 /* Add the exponent to the mantissa. */
1449 slli a8, a8, 20
1450 add xh, a10, a8
1451
1452 .Ldiv_addsign:
1453 /* Add the sign bit. */
1454 srli a7, a7, 31
1455 slli a7, a7, 31
1456 or xh, xh, a7
1457 leaf_return
1458
1459 .Ldiv_highequal2:
1460 bgeu xl, yl, 2b
1461 j 3b
1462
1463 .Ldiv_highequal3:
1464 bltu xl, yl, .Ldiv_rounded
1465 bne xl, yl, .Ldiv_roundup
1466
1467 /* Remainder is exactly half the divisor. Round even. */
1468 addi a11, a11, 1
1469 beqz a11, .Ldiv_roundcarry
1470 srli a11, a11, 1
1471 slli a11, a11, 1
1472 j .Ldiv_rounded
1473
1474 .Ldiv_overflow:
1475 bltz a8, .Ldiv_underflow
1476 /* Return +/- Infinity. */
1477 addi a8, a9, 1 /* 0x7ff */
1478 slli xh, a8, 20
1479 movi xl, 0
1480 j .Ldiv_addsign
1481
1482 .Ldiv_underflow:
1483 /* Create a subnormal value, where the exponent field contains zero,
1484 but the effective exponent is 1. The value of a8 is one less than
1485 the actual exponent, so just negate it to get the shift amount. */
1486 neg a8, a8
1487 ssr a8
1488 bgeui a8, 32, .Ldiv_bigshift
1489
1490 /* Shift a10/a11 right. Any bits that are shifted out of a11 are
1491 saved in a6 for rounding the result. */
1492 sll a6, a11
1493 src a11, a10, a11
1494 srl a10, a10
1495 j 1f
1496
1497 .Ldiv_bigshift:
1498 bgeui a8, 64, .Ldiv_flush_to_zero
1499 sll a9, a11 /* lost bits shifted out of a11 */
1500 src a6, a10, a11
1501 srl a11, a10
1502 movi a10, 0
1503 or xl, xl, a9
1504
1505 /* Set the exponent to zero. */
1506 1: movi a8, 0
1507
1508 /* Pack any nonzero remainder (in xh/xl) into a6. */
1509 or xh, xh, xl
1510 beqz xh, 1f
1511 movi a9, 1
1512 or a6, a6, a9
1513
1514 /* Round a10/a11 based on the bits shifted out into a6. */
1515 1: bgez a6, .Ldiv_rounded
1516 addi a11, a11, 1
1517 beqz a11, .Ldiv_roundcarry
1518 slli a6, a6, 1
1519 bnez a6, .Ldiv_rounded
1520 srli a11, a11, 1
1521 slli a11, a11, 1
1522 j .Ldiv_rounded
1523
1524 .Ldiv_roundcarry:
1525 /* a11 is always zero when the rounding increment overflows, so
1526 there's no need to round it to an even value. */
1527 addi a10, a10, 1
1528 /* Overflow to the exponent field is OK. */
1529 j .Ldiv_rounded
1530
1531 .Ldiv_flush_to_zero:
1532 /* Return zero with the appropriate sign bit. */
1533 srli xh, a7, 31
1534 slli xh, xh, 31
1535 movi xl, 0
1536 leaf_return
1537
1538 #endif /* L_divdf3 */
1539
1540 #ifdef L_cmpdf2
1541
1542 /* Equal and Not Equal */
1543
1544 .align 4
1545 .global __eqdf2
1546 .global __nedf2
1547 .set __nedf2, __eqdf2
1548 .type __eqdf2, @function
1549 __eqdf2:
1550 leaf_entry sp, 16
1551 bne xl, yl, 2f
1552 bne xh, yh, 4f
1553
1554 /* The values are equal but NaN != NaN. Check the exponent. */
1555 movi a6, 0x7ff00000
1556 ball xh, a6, 3f
1557
1558 /* Equal. */
1559 movi a2, 0
1560 leaf_return
1561
1562 /* Not equal. */
1563 2: movi a2, 1
1564 leaf_return
1565
1566 /* Check if the mantissas are nonzero. */
1567 3: slli a7, xh, 12
1568 or a7, a7, xl
1569 j 5f
1570
1571 /* Check if x and y are zero with different signs. */
1572 4: or a7, xh, yh
1573 slli a7, a7, 1
1574 or a7, a7, xl /* xl == yl here */
1575
1576 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1577 or x when exponent(x) = 0x7ff and x == y. */
1578 5: movi a2, 0
1579 movi a3, 1
1580 movnez a2, a3, a7
1581 leaf_return
1582
1583
1584 /* Greater Than */
1585
1586 .align 4
1587 .global __gtdf2
1588 .type __gtdf2, @function
1589 __gtdf2:
1590 leaf_entry sp, 16
1591 movi a6, 0x7ff00000
1592 ball xh, a6, 2f
1593 1: bnall yh, a6, .Lle_cmp
1594
1595 /* Check if y is a NaN. */
1596 slli a7, yh, 12
1597 or a7, a7, yl
1598 beqz a7, .Lle_cmp
1599 movi a2, 0
1600 leaf_return
1601
1602 /* Check if x is a NaN. */
1603 2: slli a7, xh, 12
1604 or a7, a7, xl
1605 beqz a7, 1b
1606 movi a2, 0
1607 leaf_return
1608
1609
1610 /* Less Than or Equal */
1611
1612 .align 4
1613 .global __ledf2
1614 .type __ledf2, @function
1615 __ledf2:
1616 leaf_entry sp, 16
1617 movi a6, 0x7ff00000
1618 ball xh, a6, 2f
1619 1: bnall yh, a6, .Lle_cmp
1620
1621 /* Check if y is a NaN. */
1622 slli a7, yh, 12
1623 or a7, a7, yl
1624 beqz a7, .Lle_cmp
1625 movi a2, 1
1626 leaf_return
1627
1628 /* Check if x is a NaN. */
1629 2: slli a7, xh, 12
1630 or a7, a7, xl
1631 beqz a7, 1b
1632 movi a2, 1
1633 leaf_return
1634
1635 .Lle_cmp:
1636 /* Check if x and y have different signs. */
1637 xor a7, xh, yh
1638 bltz a7, .Lle_diff_signs
1639
1640 /* Check if x is negative. */
1641 bltz xh, .Lle_xneg
1642
1643 /* Check if x <= y. */
1644 bltu xh, yh, 4f
1645 bne xh, yh, 5f
1646 bltu yl, xl, 5f
1647 4: movi a2, 0
1648 leaf_return
1649
1650 .Lle_xneg:
1651 /* Check if y <= x. */
1652 bltu yh, xh, 4b
1653 bne yh, xh, 5f
1654 bgeu xl, yl, 4b
1655 5: movi a2, 1
1656 leaf_return
1657
1658 .Lle_diff_signs:
1659 bltz xh, 4b
1660
1661 /* Check if both x and y are zero. */
1662 or a7, xh, yh
1663 slli a7, a7, 1
1664 or a7, a7, xl
1665 or a7, a7, yl
1666 movi a2, 1
1667 movi a3, 0
1668 moveqz a2, a3, a7
1669 leaf_return
1670
1671
1672 /* Greater Than or Equal */
1673
1674 .align 4
1675 .global __gedf2
1676 .type __gedf2, @function
1677 __gedf2:
1678 leaf_entry sp, 16
1679 movi a6, 0x7ff00000
1680 ball xh, a6, 2f
1681 1: bnall yh, a6, .Llt_cmp
1682
1683 /* Check if y is a NaN. */
1684 slli a7, yh, 12
1685 or a7, a7, yl
1686 beqz a7, .Llt_cmp
1687 movi a2, -1
1688 leaf_return
1689
1690 /* Check if x is a NaN. */
1691 2: slli a7, xh, 12
1692 or a7, a7, xl
1693 beqz a7, 1b
1694 movi a2, -1
1695 leaf_return
1696
1697
1698 /* Less Than */
1699
1700 .align 4
1701 .global __ltdf2
1702 .type __ltdf2, @function
1703 __ltdf2:
1704 leaf_entry sp, 16
1705 movi a6, 0x7ff00000
1706 ball xh, a6, 2f
1707 1: bnall yh, a6, .Llt_cmp
1708
1709 /* Check if y is a NaN. */
1710 slli a7, yh, 12
1711 or a7, a7, yl
1712 beqz a7, .Llt_cmp
1713 movi a2, 0
1714 leaf_return
1715
1716 /* Check if x is a NaN. */
1717 2: slli a7, xh, 12
1718 or a7, a7, xl
1719 beqz a7, 1b
1720 movi a2, 0
1721 leaf_return
1722
1723 .Llt_cmp:
1724 /* Check if x and y have different signs. */
1725 xor a7, xh, yh
1726 bltz a7, .Llt_diff_signs
1727
1728 /* Check if x is negative. */
1729 bltz xh, .Llt_xneg
1730
1731 /* Check if x < y. */
1732 bltu xh, yh, 4f
1733 bne xh, yh, 5f
1734 bgeu xl, yl, 5f
1735 4: movi a2, -1
1736 leaf_return
1737
1738 .Llt_xneg:
1739 /* Check if y < x. */
1740 bltu yh, xh, 4b
1741 bne yh, xh, 5f
1742 bltu yl, xl, 4b
1743 5: movi a2, 0
1744 leaf_return
1745
1746 .Llt_diff_signs:
1747 bgez xh, 5b
1748
1749 /* Check if both x and y are nonzero. */
1750 or a7, xh, yh
1751 slli a7, a7, 1
1752 or a7, a7, xl
1753 or a7, a7, yl
1754 movi a2, 0
1755 movi a3, -1
1756 movnez a2, a3, a7
1757 leaf_return
1758
1759
1760 /* Unordered */
1761
1762 .align 4
1763 .global __unorddf2
1764 .type __unorddf2, @function
1765 __unorddf2:
1766 leaf_entry sp, 16
1767 movi a6, 0x7ff00000
1768 ball xh, a6, 3f
1769 1: ball yh, a6, 4f
1770 2: movi a2, 0
1771 leaf_return
1772
1773 3: slli a7, xh, 12
1774 or a7, a7, xl
1775 beqz a7, 1b
1776 movi a2, 1
1777 leaf_return
1778
1779 4: slli a7, yh, 12
1780 or a7, a7, yl
1781 beqz a7, 2b
1782 movi a2, 1
1783 leaf_return
1784
1785 #endif /* L_cmpdf2 */
1786
1787 #ifdef L_fixdfsi
1788
1789 .align 4
1790 .global __fixdfsi
1791 .type __fixdfsi, @function
1792 __fixdfsi:
1793 leaf_entry sp, 16
1794
1795 /* Check for NaN and Infinity. */
1796 movi a6, 0x7ff00000
1797 ball xh, a6, .Lfixdfsi_nan_or_inf
1798
1799 /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */
1800 extui a4, xh, 20, 11
1801 extui a5, a6, 19, 10 /* 0x3fe */
1802 sub a4, a4, a5
1803 bgei a4, 32, .Lfixdfsi_maxint
1804 blti a4, 1, .Lfixdfsi_zero
1805
1806 /* Add explicit "1.0" and shift << 11. */
1807 or a7, xh, a6
1808 ssai (32 - 11)
1809 src a5, a7, xl
1810
1811 /* Shift back to the right, based on the exponent. */
1812 ssl a4 /* shift by 32 - a4 */
1813 srl a5, a5
1814
1815 /* Negate the result if sign != 0. */
1816 neg a2, a5
1817 movgez a2, a5, a7
1818 leaf_return
1819
1820 .Lfixdfsi_nan_or_inf:
1821 /* Handle Infinity and NaN. */
1822 slli a4, xh, 12
1823 or a4, a4, xl
1824 beqz a4, .Lfixdfsi_maxint
1825
1826 /* Translate NaN to +maxint. */
1827 movi xh, 0
1828
1829 .Lfixdfsi_maxint:
1830 slli a4, a6, 11 /* 0x80000000 */
1831 addi a5, a4, -1 /* 0x7fffffff */
1832 movgez a4, a5, xh
1833 mov a2, a4
1834 leaf_return
1835
1836 .Lfixdfsi_zero:
1837 movi a2, 0
1838 leaf_return
1839
1840 #endif /* L_fixdfsi */
1841
1842 #ifdef L_fixdfdi
1843
1844 .align 4
1845 .global __fixdfdi
1846 .type __fixdfdi, @function
1847 __fixdfdi:
1848 leaf_entry sp, 16
1849
1850 /* Check for NaN and Infinity. */
1851 movi a6, 0x7ff00000
1852 ball xh, a6, .Lfixdfdi_nan_or_inf
1853
1854 /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */
1855 extui a4, xh, 20, 11
1856 extui a5, a6, 19, 10 /* 0x3fe */
1857 sub a4, a4, a5
1858 bgei a4, 64, .Lfixdfdi_maxint
1859 blti a4, 1, .Lfixdfdi_zero
1860
1861 /* Add explicit "1.0" and shift << 11. */
1862 or a7, xh, a6
1863 ssai (32 - 11)
1864 src xh, a7, xl
1865 sll xl, xl
1866
1867 /* Shift back to the right, based on the exponent. */
1868 ssl a4 /* shift by 64 - a4 */
1869 bgei a4, 32, .Lfixdfdi_smallshift
1870 srl xl, xh
1871 movi xh, 0
1872
1873 .Lfixdfdi_shifted:
1874 /* Negate the result if sign != 0. */
1875 bgez a7, 1f
1876 neg xl, xl
1877 neg xh, xh
1878 beqz xl, 1f
1879 addi xh, xh, -1
1880 1: leaf_return
1881
1882 .Lfixdfdi_smallshift:
1883 src xl, xh, xl
1884 srl xh, xh
1885 j .Lfixdfdi_shifted
1886
1887 .Lfixdfdi_nan_or_inf:
1888 /* Handle Infinity and NaN. */
1889 slli a4, xh, 12
1890 or a4, a4, xl
1891 beqz a4, .Lfixdfdi_maxint
1892
1893 /* Translate NaN to +maxint. */
1894 movi xh, 0
1895
1896 .Lfixdfdi_maxint:
1897 slli a7, a6, 11 /* 0x80000000 */
1898 bgez xh, 1f
1899 mov xh, a7
1900 movi xl, 0
1901 leaf_return
1902
1903 1: addi xh, a7, -1 /* 0x7fffffff */
1904 movi xl, -1
1905 leaf_return
1906
1907 .Lfixdfdi_zero:
1908 movi xh, 0
1909 movi xl, 0
1910 leaf_return
1911
1912 #endif /* L_fixdfdi */
1913
1914 #ifdef L_fixunsdfsi
1915
1916 .align 4
1917 .global __fixunsdfsi
1918 .type __fixunsdfsi, @function
1919 __fixunsdfsi:
1920 leaf_entry sp, 16
1921
1922 /* Check for NaN and Infinity. */
1923 movi a6, 0x7ff00000
1924 ball xh, a6, .Lfixunsdfsi_nan_or_inf
1925
1926 /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */
1927 extui a4, xh, 20, 11
1928 extui a5, a6, 20, 10 /* 0x3ff */
1929 sub a4, a4, a5
1930 bgei a4, 32, .Lfixunsdfsi_maxint
1931 bltz a4, .Lfixunsdfsi_zero
1932
1933 /* Add explicit "1.0" and shift << 11. */
1934 or a7, xh, a6
1935 ssai (32 - 11)
1936 src a5, a7, xl
1937
1938 /* Shift back to the right, based on the exponent. */
1939 addi a4, a4, 1
1940 beqi a4, 32, .Lfixunsdfsi_bigexp
1941 ssl a4 /* shift by 32 - a4 */
1942 srl a5, a5
1943
1944 /* Negate the result if sign != 0. */
1945 neg a2, a5
1946 movgez a2, a5, a7
1947 leaf_return
1948
1949 .Lfixunsdfsi_nan_or_inf:
1950 /* Handle Infinity and NaN. */
1951 slli a4, xh, 12
1952 or a4, a4, xl
1953 beqz a4, .Lfixunsdfsi_maxint
1954
1955 /* Translate NaN to 0xffffffff. */
1956 movi a2, -1
1957 leaf_return
1958
1959 .Lfixunsdfsi_maxint:
1960 slli a4, a6, 11 /* 0x80000000 */
1961 movi a5, -1 /* 0xffffffff */
1962 movgez a4, a5, xh
1963 mov a2, a4
1964 leaf_return
1965
1966 .Lfixunsdfsi_zero:
1967 movi a2, 0
1968 leaf_return
1969
1970 .Lfixunsdfsi_bigexp:
1971 /* Handle unsigned maximum exponent case. */
1972 bltz xh, 1f
1973 mov a2, a5 /* no shift needed */
1974 leaf_return
1975
1976 /* Return 0x80000000 if negative. */
1977 1: slli a2, a6, 11
1978 leaf_return
1979
1980 #endif /* L_fixunsdfsi */
1981
1982 #ifdef L_fixunsdfdi
1983
1984 .align 4
1985 .global __fixunsdfdi
1986 .type __fixunsdfdi, @function
1987 __fixunsdfdi:
1988 leaf_entry sp, 16
1989
1990 /* Check for NaN and Infinity. */
1991 movi a6, 0x7ff00000
1992 ball xh, a6, .Lfixunsdfdi_nan_or_inf
1993
1994 /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */
1995 extui a4, xh, 20, 11
1996 extui a5, a6, 20, 10 /* 0x3ff */
1997 sub a4, a4, a5
1998 bgei a4, 64, .Lfixunsdfdi_maxint
1999 bltz a4, .Lfixunsdfdi_zero
2000
2001 /* Add explicit "1.0" and shift << 11. */
2002 or a7, xh, a6
2003 ssai (32 - 11)
2004 src xh, a7, xl
2005 sll xl, xl
2006
2007 /* Shift back to the right, based on the exponent. */
2008 addi a4, a4, 1
2009 beqi a4, 64, .Lfixunsdfdi_bigexp
2010 ssl a4 /* shift by 64 - a4 */
2011 bgei a4, 32, .Lfixunsdfdi_smallshift
2012 srl xl, xh
2013 movi xh, 0
2014
2015 .Lfixunsdfdi_shifted:
2016 /* Negate the result if sign != 0. */
2017 bgez a7, 1f
2018 neg xl, xl
2019 neg xh, xh
2020 beqz xl, 1f
2021 addi xh, xh, -1
2022 1: leaf_return
2023
2024 .Lfixunsdfdi_smallshift:
2025 src xl, xh, xl
2026 srl xh, xh
2027 j .Lfixunsdfdi_shifted
2028
2029 .Lfixunsdfdi_nan_or_inf:
2030 /* Handle Infinity and NaN. */
2031 slli a4, xh, 12
2032 or a4, a4, xl
2033 beqz a4, .Lfixunsdfdi_maxint
2034
2035 /* Translate NaN to 0xffffffff.... */
2036 1: movi xh, -1
2037 movi xl, -1
2038 leaf_return
2039
2040 .Lfixunsdfdi_maxint:
2041 bgez xh, 1b
2042 2: slli xh, a6, 11 /* 0x80000000 */
2043 movi xl, 0
2044 leaf_return
2045
2046 .Lfixunsdfdi_zero:
2047 movi xh, 0
2048 movi xl, 0
2049 leaf_return
2050
2051 .Lfixunsdfdi_bigexp:
2052 /* Handle unsigned maximum exponent case. */
2053 bltz a7, 2b
2054 leaf_return /* no shift needed */
2055
2056 #endif /* L_fixunsdfdi */
2057
2058 #ifdef L_floatsidf
2059
2060 .align 4
2061 .global __floatunsidf
2062 .type __floatunsidf, @function
2063 __floatunsidf:
2064 leaf_entry sp, 16
2065 beqz a2, .Lfloatsidf_return_zero
2066
2067 /* Set the sign to zero and jump to the floatsidf code. */
2068 movi a7, 0
2069 j .Lfloatsidf_normalize
2070
2071 .align 4
2072 .global __floatsidf
2073 .type __floatsidf, @function
2074 __floatsidf:
2075 leaf_entry sp, 16
2076
2077 /* Check for zero. */
2078 beqz a2, .Lfloatsidf_return_zero
2079
2080 /* Save the sign. */
2081 extui a7, a2, 31, 1
2082
2083 /* Get the absolute value. */
2084 #if XCHAL_HAVE_ABS
2085 abs a2, a2
2086 #else
2087 neg a4, a2
2088 movltz a2, a4, a2
2089 #endif
2090
2091 .Lfloatsidf_normalize:
2092 /* Normalize with the first 1 bit in the msb. */
2093 do_nsau a4, a2, a5, a6
2094 ssl a4
2095 sll a5, a2
2096
2097 /* Shift the mantissa into position. */
2098 srli xh, a5, 11
2099 slli xl, a5, (32 - 11)
2100
2101 /* Set the exponent. */
2102 movi a5, 0x41d /* 0x3fe + 31 */
2103 sub a5, a5, a4
2104 slli a5, a5, 20
2105 add xh, xh, a5
2106
2107 /* Add the sign and return. */
2108 slli a7, a7, 31
2109 or xh, xh, a7
2110 leaf_return
2111
2112 .Lfloatsidf_return_zero:
2113 movi a3, 0
2114 leaf_return
2115
2116 #endif /* L_floatsidf */
2117
2118 #ifdef L_floatdidf
2119
2120 .align 4
2121 .global __floatundidf
2122 .type __floatundidf, @function
2123 __floatundidf:
2124 leaf_entry sp, 16
2125
2126 /* Check for zero. */
2127 or a4, xh, xl
2128 beqz a4, 2f
2129
2130 /* Set the sign to zero and jump to the floatdidf code. */
2131 movi a7, 0
2132 j .Lfloatdidf_normalize
2133
2134 .align 4
2135 .global __floatdidf
2136 .type __floatdidf, @function
2137 __floatdidf:
2138 leaf_entry sp, 16
2139
2140 /* Check for zero. */
2141 or a4, xh, xl
2142 beqz a4, 2f
2143
2144 /* Save the sign. */
2145 extui a7, xh, 31, 1
2146
2147 /* Get the absolute value. */
2148 bgez xh, .Lfloatdidf_normalize
2149 neg xl, xl
2150 neg xh, xh
2151 beqz xl, .Lfloatdidf_normalize
2152 addi xh, xh, -1
2153
2154 .Lfloatdidf_normalize:
2155 /* Normalize with the first 1 bit in the msb of xh. */
2156 beqz xh, .Lfloatdidf_bigshift
2157 do_nsau a4, xh, a5, a6
2158 ssl a4
2159 src xh, xh, xl
2160 sll xl, xl
2161
2162 .Lfloatdidf_shifted:
2163 /* Shift the mantissa into position, with rounding bits in a6. */
2164 ssai 11
2165 sll a6, xl
2166 src xl, xh, xl
2167 srl xh, xh
2168
2169 /* Set the exponent. */
2170 movi a5, 0x43d /* 0x3fe + 63 */
2171 sub a5, a5, a4
2172 slli a5, a5, 20
2173 add xh, xh, a5
2174
2175 /* Add the sign. */
2176 slli a7, a7, 31
2177 or xh, xh, a7
2178
2179 /* Round up if the leftover fraction is >= 1/2. */
2180 bgez a6, 2f
2181 addi xl, xl, 1
2182 beqz xl, .Lfloatdidf_roundcarry
2183
2184 /* Check if the leftover fraction is exactly 1/2. */
2185 slli a6, a6, 1
2186 beqz a6, .Lfloatdidf_exactlyhalf
2187 2: leaf_return
2188
2189 .Lfloatdidf_bigshift:
2190 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
2191 do_nsau a4, xl, a5, a6
2192 ssl a4
2193 sll xh, xl
2194 movi xl, 0
2195 addi a4, a4, 32
2196 j .Lfloatdidf_shifted
2197
2198 .Lfloatdidf_exactlyhalf:
2199 /* Round down to the nearest even value. */
2200 srli xl, xl, 1
2201 slli xl, xl, 1
2202 leaf_return
2203
2204 .Lfloatdidf_roundcarry:
2205 /* xl is always zero when the rounding increment overflows, so
2206 there's no need to round it to an even value. */
2207 addi xh, xh, 1
2208 /* Overflow to the exponent is OK. */
2209 leaf_return
2210
2211 #endif /* L_floatdidf */
2212
2213 #ifdef L_truncdfsf2
2214
2215 .align 4
2216 .global __truncdfsf2
2217 .type __truncdfsf2, @function
2218 __truncdfsf2:
2219 leaf_entry sp, 16
2220
2221 /* Adjust the exponent bias. */
2222 movi a4, (0x3ff - 0x7f) << 20
2223 sub a5, xh, a4
2224
2225 /* Check for underflow. */
2226 xor a6, xh, a5
2227 bltz a6, .Ltrunc_underflow
2228 extui a6, a5, 20, 11
2229 beqz a6, .Ltrunc_underflow
2230
2231 /* Check for overflow. */
2232 movi a4, 255
2233 bge a6, a4, .Ltrunc_overflow
2234
2235 /* Shift a5/xl << 3 into a5/a4. */
2236 ssai (32 - 3)
2237 src a5, a5, xl
2238 sll a4, xl
2239
2240 .Ltrunc_addsign:
2241 /* Add the sign bit. */
2242 extui a6, xh, 31, 1
2243 slli a6, a6, 31
2244 or a2, a6, a5
2245
2246 /* Round up if the leftover fraction is >= 1/2. */
2247 bgez a4, 1f
2248 addi a2, a2, 1
2249 /* Overflow to the exponent is OK. The answer will be correct. */
2250
2251 /* Check if the leftover fraction is exactly 1/2. */
2252 slli a4, a4, 1
2253 beqz a4, .Ltrunc_exactlyhalf
2254 1: leaf_return
2255
2256 .Ltrunc_exactlyhalf:
2257 /* Round down to the nearest even value. */
2258 srli a2, a2, 1
2259 slli a2, a2, 1
2260 leaf_return
2261
2262 .Ltrunc_overflow:
2263 /* Check if exponent == 0x7ff. */
2264 movi a4, 0x7ff00000
2265 bnall xh, a4, 1f
2266
2267 /* Check if mantissa is nonzero. */
2268 slli a5, xh, 12
2269 or a5, a5, xl
2270 beqz a5, 1f
2271
2272 /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */
2273 srli a4, a4, 1
2274
2275 1: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */
2276 /* Add the sign bit. */
2277 extui a6, xh, 31, 1
2278 ssai 1
2279 src a2, a6, a4
2280 leaf_return
2281
2282 .Ltrunc_underflow:
2283 /* Find shift count for a subnormal. Flush to zero if >= 32. */
2284 extui a6, xh, 20, 11
2285 movi a5, 0x3ff - 0x7f
2286 sub a6, a5, a6
2287 addi a6, a6, 1
2288 bgeui a6, 32, 1f
2289
2290 /* Replace the exponent with an explicit "1.0". */
2291 slli a5, a5, 13 /* 0x700000 */
2292 or a5, a5, xh
2293 slli a5, a5, 11
2294 srli a5, a5, 11
2295
2296 /* Shift the mantissa left by 3 bits (into a5/a4). */
2297 ssai (32 - 3)
2298 src a5, a5, xl
2299 sll a4, xl
2300
2301 /* Shift right by a6. */
2302 ssr a6
2303 sll a7, a4
2304 src a4, a5, a4
2305 srl a5, a5
2306 beqz a7, .Ltrunc_addsign
2307 or a4, a4, a6 /* any positive, nonzero value will work */
2308 j .Ltrunc_addsign
2309
2310 /* Return +/- zero. */
2311 1: extui a2, xh, 31, 1
2312 slli a2, a2, 31
2313 leaf_return
2314
2315 #endif /* L_truncdfsf2 */
2316
2317 #ifdef L_extendsfdf2
2318
2319 .align 4
2320 .global __extendsfdf2
2321 .type __extendsfdf2, @function
2322 __extendsfdf2:
2323 leaf_entry sp, 16
2324
2325 /* Save the sign bit and then shift it off. */
2326 extui a5, a2, 31, 1
2327 slli a5, a5, 31
2328 slli a4, a2, 1
2329
2330 /* Extract and check the exponent. */
2331 extui a6, a2, 23, 8
2332 beqz a6, .Lextend_expzero
2333 addi a6, a6, 1
2334 beqi a6, 256, .Lextend_nan_or_inf
2335
2336 /* Shift >> 3 into a4/xl. */
2337 srli a4, a4, 4
2338 slli xl, a2, (32 - 3)
2339
2340 /* Adjust the exponent bias. */
2341 movi a6, (0x3ff - 0x7f) << 20
2342 add a4, a4, a6
2343
2344 /* Add the sign bit. */
2345 or xh, a4, a5
2346 leaf_return
2347
2348 .Lextend_nan_or_inf:
2349 movi a4, 0x7ff00000
2350
2351 /* Check for NaN. */
2352 slli a7, a2, 9
2353 beqz a7, 1f
2354
2355 slli a6, a6, 11 /* 0x80000 */
2356 or a4, a4, a6
2357
2358 /* Add the sign and return. */
2359 1: or xh, a4, a5
2360 movi xl, 0
2361 leaf_return
2362
2363 .Lextend_expzero:
2364 beqz a4, 1b
2365
2366 /* Normalize it to have 8 zero bits before the first 1 bit. */
2367 do_nsau a7, a4, a2, a3
2368 addi a7, a7, -8
2369 ssl a7
2370 sll a4, a4
2371
2372 /* Shift >> 3 into a4/xl. */
2373 slli xl, a4, (32 - 3)
2374 srli a4, a4, 3
2375
2376 /* Set the exponent. */
2377 movi a6, 0x3fe - 0x7f
2378 sub a6, a6, a7
2379 slli a6, a6, 20
2380 add a4, a4, a6
2381
2382 /* Add the sign and return. */
2383 or xh, a4, a5
2384 leaf_return
2385
2386 #endif /* L_extendsfdf2 */
2387
2388