comparison gcc/config/xtensa/ieee754-sf.S @ 0:a06113de4d67

first commit
author kent <kent@cr.ie.u-ryukyu.ac.jp>
date Fri, 17 Jul 2009 14:47:48 +0900
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a06113de4d67
1 /* IEEE-754 single-precision functions for Xtensa
2 Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 #ifdef __XTENSA_EB__
27 #define xh a2
28 #define xl a3
29 #define yh a4
30 #define yl a5
31 #else
32 #define xh a3
33 #define xl a2
34 #define yh a5
35 #define yl a4
36 #endif
37
38 /* Warning! The branch displacements for some Xtensa branch instructions
39 are quite small, and this code has been carefully laid out to keep
40 branch targets in range. If you change anything, be sure to check that
41 the assembler is not relaxing anything to branch over a jump. */
42
43 #ifdef L_negsf2
44
45 .align 4
46 .global __negsf2
47 .type __negsf2, @function
48 __negsf2:
49 leaf_entry sp, 16
50 movi a4, 0x80000000
51 xor a2, a2, a4
52 leaf_return
53
54 #endif /* L_negsf2 */
55
56 #ifdef L_addsubsf3
57
58 /* Addition */
59 __addsf3_aux:
60
61 /* Handle NaNs and Infinities. (This code is placed before the
62 start of the function just to keep it in range of the limited
63 branch displacements.) */
64
65 .Ladd_xnan_or_inf:
66 /* If y is neither Infinity nor NaN, return x. */
67 bnall a3, a6, 1f
68 /* If x is a NaN, return it. Otherwise, return y. */
69 slli a7, a2, 9
70 beqz a7, .Ladd_ynan_or_inf
71 1: leaf_return
72
73 .Ladd_ynan_or_inf:
74 /* Return y. */
75 mov a2, a3
76 leaf_return
77
78 .Ladd_opposite_signs:
79 /* Operand signs differ. Do a subtraction. */
80 slli a7, a6, 8
81 xor a3, a3, a7
82 j .Lsub_same_sign
83
84 .align 4
85 .global __addsf3
86 .type __addsf3, @function
87 __addsf3:
88 leaf_entry sp, 16
89 movi a6, 0x7f800000
90
91 /* Check if the two operands have the same sign. */
92 xor a7, a2, a3
93 bltz a7, .Ladd_opposite_signs
94
95 .Ladd_same_sign:
96 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
97 ball a2, a6, .Ladd_xnan_or_inf
98 ball a3, a6, .Ladd_ynan_or_inf
99
100 /* Compare the exponents. The smaller operand will be shifted
101 right by the exponent difference and added to the larger
102 one. */
103 extui a7, a2, 23, 9
104 extui a8, a3, 23, 9
105 bltu a7, a8, .Ladd_shiftx
106
107 .Ladd_shifty:
108 /* Check if the smaller (or equal) exponent is zero. */
109 bnone a3, a6, .Ladd_yexpzero
110
111 /* Replace y sign/exponent with 0x008. */
112 or a3, a3, a6
113 slli a3, a3, 8
114 srli a3, a3, 8
115
116 .Ladd_yexpdiff:
117 /* Compute the exponent difference. */
118 sub a10, a7, a8
119
120 /* Exponent difference > 32 -- just return the bigger value. */
121 bgeui a10, 32, 1f
122
123 /* Shift y right by the exponent difference. Any bits that are
124 shifted out of y are saved in a9 for rounding the result. */
125 ssr a10
126 movi a9, 0
127 src a9, a3, a9
128 srl a3, a3
129
130 /* Do the addition. */
131 add a2, a2, a3
132
133 /* Check if the add overflowed into the exponent. */
134 extui a10, a2, 23, 9
135 beq a10, a7, .Ladd_round
136 mov a8, a7
137 j .Ladd_carry
138
139 .Ladd_yexpzero:
140 /* y is a subnormal value. Replace its sign/exponent with zero,
141 i.e., no implicit "1.0", and increment the apparent exponent
142 because subnormals behave as if they had the minimum (nonzero)
143 exponent. Test for the case when both exponents are zero. */
144 slli a3, a3, 9
145 srli a3, a3, 9
146 bnone a2, a6, .Ladd_bothexpzero
147 addi a8, a8, 1
148 j .Ladd_yexpdiff
149
150 .Ladd_bothexpzero:
151 /* Both exponents are zero. Handle this as a special case. There
152 is no need to shift or round, and the normal code for handling
153 a carry into the exponent field will not work because it
154 assumes there is an implicit "1.0" that needs to be added. */
155 add a2, a2, a3
156 1: leaf_return
157
158 .Ladd_xexpzero:
159 /* Same as "yexpzero" except skip handling the case when both
160 exponents are zero. */
161 slli a2, a2, 9
162 srli a2, a2, 9
163 addi a7, a7, 1
164 j .Ladd_xexpdiff
165
166 .Ladd_shiftx:
167 /* Same thing as the "shifty" code, but with x and y swapped. Also,
168 because the exponent difference is always nonzero in this version,
169 the shift sequence can use SLL and skip loading a constant zero. */
170 bnone a2, a6, .Ladd_xexpzero
171
172 or a2, a2, a6
173 slli a2, a2, 8
174 srli a2, a2, 8
175
176 .Ladd_xexpdiff:
177 sub a10, a8, a7
178 bgeui a10, 32, .Ladd_returny
179
180 ssr a10
181 sll a9, a2
182 srl a2, a2
183
184 add a2, a2, a3
185
186 /* Check if the add overflowed into the exponent. */
187 extui a10, a2, 23, 9
188 bne a10, a8, .Ladd_carry
189
190 .Ladd_round:
191 /* Round up if the leftover fraction is >= 1/2. */
192 bgez a9, 1f
193 addi a2, a2, 1
194
195 /* Check if the leftover fraction is exactly 1/2. */
196 slli a9, a9, 1
197 beqz a9, .Ladd_exactlyhalf
198 1: leaf_return
199
200 .Ladd_returny:
201 mov a2, a3
202 leaf_return
203
204 .Ladd_carry:
205 /* The addition has overflowed into the exponent field, so the
206 value needs to be renormalized. The mantissa of the result
207 can be recovered by subtracting the original exponent and
208 adding 0x800000 (which is the explicit "1.0" for the
209 mantissa of the non-shifted operand -- the "1.0" for the
210 shifted operand was already added). The mantissa can then
211 be shifted right by one bit. The explicit "1.0" of the
212 shifted mantissa then needs to be replaced by the exponent,
213 incremented by one to account for the normalizing shift.
214 It is faster to combine these operations: do the shift first
215 and combine the additions and subtractions. If x is the
216 original exponent, the result is:
217 shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
218 or:
219 shifted mantissa + ((x + 1) << 22)
220 Note that the exponent is incremented here by leaving the
221 explicit "1.0" of the mantissa in the exponent field. */
222
223 /* Shift x right by one bit. Save the lsb. */
224 mov a10, a2
225 srli a2, a2, 1
226
227 /* See explanation above. The original exponent is in a8. */
228 addi a8, a8, 1
229 slli a8, a8, 22
230 add a2, a2, a8
231
232 /* Return an Infinity if the exponent overflowed. */
233 ball a2, a6, .Ladd_infinity
234
235 /* Same thing as the "round" code except the msb of the leftover
236 fraction is bit 0 of a10, with the rest of the fraction in a9. */
237 bbci.l a10, 0, 1f
238 addi a2, a2, 1
239 beqz a9, .Ladd_exactlyhalf
240 1: leaf_return
241
242 .Ladd_infinity:
243 /* Clear the mantissa. */
244 srli a2, a2, 23
245 slli a2, a2, 23
246
247 /* The sign bit may have been lost in a carry-out. Put it back. */
248 slli a8, a8, 1
249 or a2, a2, a8
250 leaf_return
251
252 .Ladd_exactlyhalf:
253 /* Round down to the nearest even value. */
254 srli a2, a2, 1
255 slli a2, a2, 1
256 leaf_return
257
258
259 /* Subtraction */
260 __subsf3_aux:
261
262 /* Handle NaNs and Infinities. (This code is placed before the
263 start of the function just to keep it in range of the limited
264 branch displacements.) */
265
266 .Lsub_xnan_or_inf:
267 /* If y is neither Infinity nor NaN, return x. */
268 bnall a3, a6, 1f
269 /* Both x and y are either NaN or Inf, so the result is NaN. */
270 movi a4, 0x400000 /* make it a quiet NaN */
271 or a2, a2, a4
272 1: leaf_return
273
274 .Lsub_ynan_or_inf:
275 /* Negate y and return it. */
276 slli a7, a6, 8
277 xor a2, a3, a7
278 leaf_return
279
280 .Lsub_opposite_signs:
281 /* Operand signs differ. Do an addition. */
282 slli a7, a6, 8
283 xor a3, a3, a7
284 j .Ladd_same_sign
285
286 .align 4
287 .global __subsf3
288 .type __subsf3, @function
289 __subsf3:
290 leaf_entry sp, 16
291 movi a6, 0x7f800000
292
293 /* Check if the two operands have the same sign. */
294 xor a7, a2, a3
295 bltz a7, .Lsub_opposite_signs
296
297 .Lsub_same_sign:
298 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
299 ball a2, a6, .Lsub_xnan_or_inf
300 ball a3, a6, .Lsub_ynan_or_inf
301
302 /* Compare the operands. In contrast to addition, the entire
303 value matters here. */
304 extui a7, a2, 23, 8
305 extui a8, a3, 23, 8
306 bltu a2, a3, .Lsub_xsmaller
307
308 .Lsub_ysmaller:
309 /* Check if the smaller (or equal) exponent is zero. */
310 bnone a3, a6, .Lsub_yexpzero
311
312 /* Replace y sign/exponent with 0x008. */
313 or a3, a3, a6
314 slli a3, a3, 8
315 srli a3, a3, 8
316
317 .Lsub_yexpdiff:
318 /* Compute the exponent difference. */
319 sub a10, a7, a8
320
321 /* Exponent difference > 32 -- just return the bigger value. */
322 bgeui a10, 32, 1f
323
324 /* Shift y right by the exponent difference. Any bits that are
325 shifted out of y are saved in a9 for rounding the result. */
326 ssr a10
327 movi a9, 0
328 src a9, a3, a9
329 srl a3, a3
330
331 sub a2, a2, a3
332
333 /* Subtract the leftover bits in a9 from zero and propagate any
334 borrow from a2. */
335 neg a9, a9
336 addi a10, a2, -1
337 movnez a2, a10, a9
338
339 /* Check if the subtract underflowed into the exponent. */
340 extui a10, a2, 23, 8
341 beq a10, a7, .Lsub_round
342 j .Lsub_borrow
343
344 .Lsub_yexpzero:
345 /* Return zero if the inputs are equal. (For the non-subnormal
346 case, subtracting the "1.0" will cause a borrow from the exponent
347 and this case can be detected when handling the borrow.) */
348 beq a2, a3, .Lsub_return_zero
349
350 /* y is a subnormal value. Replace its sign/exponent with zero,
351 i.e., no implicit "1.0". Unless x is also a subnormal, increment
352 y's apparent exponent because subnormals behave as if they had
353 the minimum (nonzero) exponent. */
354 slli a3, a3, 9
355 srli a3, a3, 9
356 bnone a2, a6, .Lsub_yexpdiff
357 addi a8, a8, 1
358 j .Lsub_yexpdiff
359
360 .Lsub_returny:
361 /* Negate and return y. */
362 slli a7, a6, 8
363 xor a2, a3, a7
364 1: leaf_return
365
366 .Lsub_xsmaller:
367 /* Same thing as the "ysmaller" code, but with x and y swapped and
368 with y negated. */
369 bnone a2, a6, .Lsub_xexpzero
370
371 or a2, a2, a6
372 slli a2, a2, 8
373 srli a2, a2, 8
374
375 .Lsub_xexpdiff:
376 sub a10, a8, a7
377 bgeui a10, 32, .Lsub_returny
378
379 ssr a10
380 movi a9, 0
381 src a9, a2, a9
382 srl a2, a2
383
384 /* Negate y. */
385 slli a11, a6, 8
386 xor a3, a3, a11
387
388 sub a2, a3, a2
389
390 neg a9, a9
391 addi a10, a2, -1
392 movnez a2, a10, a9
393
394 /* Check if the subtract underflowed into the exponent. */
395 extui a10, a2, 23, 8
396 bne a10, a8, .Lsub_borrow
397
398 .Lsub_round:
399 /* Round up if the leftover fraction is >= 1/2. */
400 bgez a9, 1f
401 addi a2, a2, 1
402
403 /* Check if the leftover fraction is exactly 1/2. */
404 slli a9, a9, 1
405 beqz a9, .Lsub_exactlyhalf
406 1: leaf_return
407
408 .Lsub_xexpzero:
409 /* Same as "yexpzero". */
410 beq a2, a3, .Lsub_return_zero
411 slli a2, a2, 9
412 srli a2, a2, 9
413 bnone a3, a6, .Lsub_xexpdiff
414 addi a7, a7, 1
415 j .Lsub_xexpdiff
416
417 .Lsub_return_zero:
418 movi a2, 0
419 leaf_return
420
421 .Lsub_borrow:
422 /* The subtraction has underflowed into the exponent field, so the
423 value needs to be renormalized. Shift the mantissa left as
424 needed to remove any leading zeros and adjust the exponent
425 accordingly. If the exponent is not large enough to remove
426 all the leading zeros, the result will be a subnormal value. */
427
428 slli a8, a2, 9
429 beqz a8, .Lsub_xzero
430 do_nsau a6, a8, a7, a11
431 srli a8, a8, 9
432 bge a6, a10, .Lsub_subnormal
433 addi a6, a6, 1
434
435 .Lsub_normalize_shift:
436 /* Shift the mantissa (a8/a9) left by a6. */
437 ssl a6
438 src a8, a8, a9
439 sll a9, a9
440
441 /* Combine the shifted mantissa with the sign and exponent,
442 decrementing the exponent by a6. (The exponent has already
443 been decremented by one due to the borrow from the subtraction,
444 but adding the mantissa will increment the exponent by one.) */
445 srli a2, a2, 23
446 sub a2, a2, a6
447 slli a2, a2, 23
448 add a2, a2, a8
449 j .Lsub_round
450
451 .Lsub_exactlyhalf:
452 /* Round down to the nearest even value. */
453 srli a2, a2, 1
454 slli a2, a2, 1
455 leaf_return
456
457 .Lsub_xzero:
458 /* If there was a borrow from the exponent, and the mantissa and
459 guard digits are all zero, then the inputs were equal and the
460 result should be zero. */
461 beqz a9, .Lsub_return_zero
462
463 /* Only the guard digit is nonzero. Shift by min(24, a10). */
464 addi a11, a10, -24
465 movi a6, 24
466 movltz a6, a10, a11
467 j .Lsub_normalize_shift
468
469 .Lsub_subnormal:
470 /* The exponent is too small to shift away all the leading zeros.
471 Set a6 to the current exponent (which has already been
472 decremented by the borrow) so that the exponent of the result
473 will be zero. Do not add 1 to a6 in this case, because: (1)
474 adding the mantissa will not increment the exponent, so there is
475 no need to subtract anything extra from the exponent to
476 compensate, and (2) the effective exponent of a subnormal is 1
477 not 0 so the shift amount must be 1 smaller than normal. */
478 mov a6, a10
479 j .Lsub_normalize_shift
480
481 #endif /* L_addsubsf3 */
482
483 #ifdef L_mulsf3
484
485 /* Multiplication */
486 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
487 #define XCHAL_NO_MUL 1
488 #endif
489
490 __mulsf3_aux:
491
492 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
493 (This code is placed before the start of the function just to
494 keep it in range of the limited branch displacements.) */
495
496 .Lmul_xexpzero:
497 /* Clear the sign bit of x. */
498 slli a2, a2, 1
499 srli a2, a2, 1
500
501 /* If x is zero, return zero. */
502 beqz a2, .Lmul_return_zero
503
504 /* Normalize x. Adjust the exponent in a8. */
505 do_nsau a10, a2, a11, a12
506 addi a10, a10, -8
507 ssl a10
508 sll a2, a2
509 movi a8, 1
510 sub a8, a8, a10
511 j .Lmul_xnormalized
512
513 .Lmul_yexpzero:
514 /* Clear the sign bit of y. */
515 slli a3, a3, 1
516 srli a3, a3, 1
517
518 /* If y is zero, return zero. */
519 beqz a3, .Lmul_return_zero
520
521 /* Normalize y. Adjust the exponent in a9. */
522 do_nsau a10, a3, a11, a12
523 addi a10, a10, -8
524 ssl a10
525 sll a3, a3
526 movi a9, 1
527 sub a9, a9, a10
528 j .Lmul_ynormalized
529
530 .Lmul_return_zero:
531 /* Return zero with the appropriate sign bit. */
532 srli a2, a7, 31
533 slli a2, a2, 31
534 j .Lmul_done
535
536 .Lmul_xnan_or_inf:
537 /* If y is zero, return NaN. */
538 slli a8, a3, 1
539 bnez a8, 1f
540 movi a4, 0x400000 /* make it a quiet NaN */
541 or a2, a2, a4
542 j .Lmul_done
543 1:
544 /* If y is NaN, return y. */
545 bnall a3, a6, .Lmul_returnx
546 slli a8, a3, 9
547 beqz a8, .Lmul_returnx
548
549 .Lmul_returny:
550 mov a2, a3
551
552 .Lmul_returnx:
553 /* Set the sign bit and return. */
554 extui a7, a7, 31, 1
555 slli a2, a2, 1
556 ssai 1
557 src a2, a7, a2
558 j .Lmul_done
559
560 .Lmul_ynan_or_inf:
561 /* If x is zero, return NaN. */
562 slli a8, a2, 1
563 bnez a8, .Lmul_returny
564 movi a7, 0x400000 /* make it a quiet NaN */
565 or a2, a3, a7
566 j .Lmul_done
567
568 .align 4
569 .global __mulsf3
570 .type __mulsf3, @function
571 __mulsf3:
572 #if __XTENSA_CALL0_ABI__
573 leaf_entry sp, 32
574 addi sp, sp, -32
575 s32i a12, sp, 16
576 s32i a13, sp, 20
577 s32i a14, sp, 24
578 s32i a15, sp, 28
579 #elif XCHAL_NO_MUL
580 /* This is not really a leaf function; allocate enough stack space
581 to allow CALL12s to a helper function. */
582 leaf_entry sp, 64
583 #else
584 leaf_entry sp, 32
585 #endif
586 movi a6, 0x7f800000
587
588 /* Get the sign of the result. */
589 xor a7, a2, a3
590
591 /* Check for NaN and infinity. */
592 ball a2, a6, .Lmul_xnan_or_inf
593 ball a3, a6, .Lmul_ynan_or_inf
594
595 /* Extract the exponents. */
596 extui a8, a2, 23, 8
597 extui a9, a3, 23, 8
598
599 beqz a8, .Lmul_xexpzero
600 .Lmul_xnormalized:
601 beqz a9, .Lmul_yexpzero
602 .Lmul_ynormalized:
603
604 /* Add the exponents. */
605 add a8, a8, a9
606
607 /* Replace sign/exponent fields with explicit "1.0". */
608 movi a10, 0xffffff
609 or a2, a2, a6
610 and a2, a2, a10
611 or a3, a3, a6
612 and a3, a3, a10
613
614 /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
615
616 #if XCHAL_HAVE_MUL32_HIGH
617
618 mull a6, a2, a3
619 muluh a2, a2, a3
620
621 #else
622
623 /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
624 products. These partial products are:
625
626 0 xl * yl
627
628 1 xl * yh
629 2 xh * yl
630
631 3 xh * yh
632
633 If using the Mul16 or Mul32 multiplier options, these input
634 chunks must be stored in separate registers. For Mac16, the
635 UMUL.AA.* opcodes can specify that the inputs come from either
636 half of the registers, so there is no need to shift them out
637 ahead of time. If there is no multiply hardware, the 16-bit
638 chunks can be extracted when setting up the arguments to the
639 separate multiply function. */
640
641 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
642 /* Calling a separate multiply function will clobber a0 and requires
643 use of a8 as a temporary, so save those values now. (The function
644 uses a custom ABI so nothing else needs to be saved.) */
645 s32i a0, sp, 0
646 s32i a8, sp, 4
647 #endif
648
649 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
650
651 #define a2h a4
652 #define a3h a5
653
654 /* Get the high halves of the inputs into registers. */
655 srli a2h, a2, 16
656 srli a3h, a3, 16
657
658 #define a2l a2
659 #define a3l a3
660
661 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
662 /* Clear the high halves of the inputs. This does not matter
663 for MUL16 because the high bits are ignored. */
664 extui a2, a2, 0, 16
665 extui a3, a3, 0, 16
666 #endif
667 #endif /* MUL16 || MUL32 */
668
669
670 #if XCHAL_HAVE_MUL16
671
672 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
673 mul16u dst, xreg ## xhalf, yreg ## yhalf
674
675 #elif XCHAL_HAVE_MUL32
676
677 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
678 mull dst, xreg ## xhalf, yreg ## yhalf
679
680 #elif XCHAL_HAVE_MAC16
681
682 /* The preprocessor insists on inserting a space when concatenating after
683 a period in the definition of do_mul below. These macros are a workaround
684 using underscores instead of periods when doing the concatenation. */
685 #define umul_aa_ll umul.aa.ll
686 #define umul_aa_lh umul.aa.lh
687 #define umul_aa_hl umul.aa.hl
688 #define umul_aa_hh umul.aa.hh
689
690 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
691 umul_aa_ ## xhalf ## yhalf xreg, yreg; \
692 rsr dst, ACCLO
693
694 #else /* no multiply hardware */
695
696 #define set_arg_l(dst, src) \
697 extui dst, src, 0, 16
698 #define set_arg_h(dst, src) \
699 srli dst, src, 16
700
701 #if __XTENSA_CALL0_ABI__
702 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
703 set_arg_ ## xhalf (a13, xreg); \
704 set_arg_ ## yhalf (a14, yreg); \
705 call0 .Lmul_mulsi3; \
706 mov dst, a12
707 #else
708 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
709 set_arg_ ## xhalf (a14, xreg); \
710 set_arg_ ## yhalf (a15, yreg); \
711 call12 .Lmul_mulsi3; \
712 mov dst, a14
713 #endif /* __XTENSA_CALL0_ABI__ */
714
715 #endif /* no multiply hardware */
716
717 /* Add pp1 and pp2 into a6 with carry-out in a9. */
718 do_mul(a6, a2, l, a3, h) /* pp 1 */
719 do_mul(a11, a2, h, a3, l) /* pp 2 */
720 movi a9, 0
721 add a6, a6, a11
722 bgeu a6, a11, 1f
723 addi a9, a9, 1
724 1:
725 /* Shift the high half of a9/a6 into position in a9. Note that
726 this value can be safely incremented without any carry-outs. */
727 ssai 16
728 src a9, a9, a6
729
730 /* Compute the low word into a6. */
731 do_mul(a11, a2, l, a3, l) /* pp 0 */
732 sll a6, a6
733 add a6, a6, a11
734 bgeu a6, a11, 1f
735 addi a9, a9, 1
736 1:
737 /* Compute the high word into a2. */
738 do_mul(a2, a2, h, a3, h) /* pp 3 */
739 add a2, a2, a9
740
741 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
742 /* Restore values saved on the stack during the multiplication. */
743 l32i a0, sp, 0
744 l32i a8, sp, 4
745 #endif
746 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
747
748 /* Shift left by 9 bits, unless there was a carry-out from the
749 multiply, in which case, shift by 8 bits and increment the
750 exponent. */
751 movi a4, 9
752 srli a5, a2, 24 - 9
753 beqz a5, 1f
754 addi a4, a4, -1
755 addi a8, a8, 1
756 1: ssl a4
757 src a2, a2, a6
758 sll a6, a6
759
760 /* Subtract the extra bias from the exponent sum (plus one to account
761 for the explicit "1.0" of the mantissa that will be added to the
762 exponent in the final result). */
763 movi a4, 0x80
764 sub a8, a8, a4
765
766 /* Check for over/underflow. The value in a8 is one less than the
767 final exponent, so values in the range 0..fd are OK here. */
768 movi a4, 0xfe
769 bgeu a8, a4, .Lmul_overflow
770
771 .Lmul_round:
772 /* Round. */
773 bgez a6, .Lmul_rounded
774 addi a2, a2, 1
775 slli a6, a6, 1
776 beqz a6, .Lmul_exactlyhalf
777
778 .Lmul_rounded:
779 /* Add the exponent to the mantissa. */
780 slli a8, a8, 23
781 add a2, a2, a8
782
783 .Lmul_addsign:
784 /* Add the sign bit. */
785 srli a7, a7, 31
786 slli a7, a7, 31
787 or a2, a2, a7
788
789 .Lmul_done:
790 #if __XTENSA_CALL0_ABI__
791 l32i a12, sp, 16
792 l32i a13, sp, 20
793 l32i a14, sp, 24
794 l32i a15, sp, 28
795 addi sp, sp, 32
796 #endif
797 leaf_return
798
799 .Lmul_exactlyhalf:
800 /* Round down to the nearest even value. */
801 srli a2, a2, 1
802 slli a2, a2, 1
803 j .Lmul_rounded
804
805 .Lmul_overflow:
806 bltz a8, .Lmul_underflow
807 /* Return +/- Infinity. */
808 movi a8, 0xff
809 slli a2, a8, 23
810 j .Lmul_addsign
811
812 .Lmul_underflow:
813 /* Create a subnormal value, where the exponent field contains zero,
814 but the effective exponent is 1. The value of a8 is one less than
815 the actual exponent, so just negate it to get the shift amount. */
816 neg a8, a8
817 mov a9, a6
818 ssr a8
819 bgeui a8, 32, .Lmul_flush_to_zero
820
821 /* Shift a2 right. Any bits that are shifted out of a2 are saved
822 in a6 (combined with the shifted-out bits currently in a6) for
823 rounding the result. */
824 sll a6, a2
825 srl a2, a2
826
827 /* Set the exponent to zero. */
828 movi a8, 0
829
830 /* Pack any nonzero bits shifted out into a6. */
831 beqz a9, .Lmul_round
832 movi a9, 1
833 or a6, a6, a9
834 j .Lmul_round
835
836 .Lmul_flush_to_zero:
837 /* Return zero with the appropriate sign bit. */
838 srli a2, a7, 31
839 slli a2, a2, 31
840 j .Lmul_done
841
842 #if XCHAL_NO_MUL
843
844 /* For Xtensa processors with no multiply hardware, this simplified
845 version of _mulsi3 is used for multiplying 16-bit chunks of
846 the floating-point mantissas. When using CALL0, this function
847 uses a custom ABI: the inputs are passed in a13 and a14, the
848 result is returned in a12, and a8 and a15 are clobbered. */
849 .align 4
850 .Lmul_mulsi3:
851 leaf_entry sp, 16
852 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
853 movi \dst, 0
854 1: add \tmp1, \src2, \dst
855 extui \tmp2, \src1, 0, 1
856 movnez \dst, \tmp1, \tmp2
857
858 do_addx2 \tmp1, \src2, \dst, \tmp1
859 extui \tmp2, \src1, 1, 1
860 movnez \dst, \tmp1, \tmp2
861
862 do_addx4 \tmp1, \src2, \dst, \tmp1
863 extui \tmp2, \src1, 2, 1
864 movnez \dst, \tmp1, \tmp2
865
866 do_addx8 \tmp1, \src2, \dst, \tmp1
867 extui \tmp2, \src1, 3, 1
868 movnez \dst, \tmp1, \tmp2
869
870 srli \src1, \src1, 4
871 slli \src2, \src2, 4
872 bnez \src1, 1b
873 .endm
874 #if __XTENSA_CALL0_ABI__
875 mul_mulsi3_body a12, a13, a14, a15, a8
876 #else
877 /* The result will be written into a2, so save that argument in a4. */
878 mov a4, a2
879 mul_mulsi3_body a2, a4, a3, a5, a6
880 #endif
881 leaf_return
882 #endif /* XCHAL_NO_MUL */
883 #endif /* L_mulsf3 */
884
885 #ifdef L_divsf3
886
887 /* Division */
888 __divsf3_aux:
889
890 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
891 (This code is placed before the start of the function just to
892 keep it in range of the limited branch displacements.) */
893
894 .Ldiv_yexpzero:
895 /* Clear the sign bit of y. */
896 slli a3, a3, 1
897 srli a3, a3, 1
898
899 /* Check for division by zero. */
900 beqz a3, .Ldiv_yzero
901
902 /* Normalize y. Adjust the exponent in a9. */
903 do_nsau a10, a3, a4, a5
904 addi a10, a10, -8
905 ssl a10
906 sll a3, a3
907 movi a9, 1
908 sub a9, a9, a10
909 j .Ldiv_ynormalized
910
911 .Ldiv_yzero:
912 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
913 slli a4, a2, 1
914 srli a4, a4, 1
915 srli a2, a7, 31
916 slli a2, a2, 31
917 or a2, a2, a6
918 bnez a4, 1f
919 movi a4, 0x400000 /* make it a quiet NaN */
920 or a2, a2, a4
921 1: leaf_return
922
923 .Ldiv_xexpzero:
924 /* Clear the sign bit of x. */
925 slli a2, a2, 1
926 srli a2, a2, 1
927
928 /* If x is zero, return zero. */
929 beqz a2, .Ldiv_return_zero
930
931 /* Normalize x. Adjust the exponent in a8. */
932 do_nsau a10, a2, a4, a5
933 addi a10, a10, -8
934 ssl a10
935 sll a2, a2
936 movi a8, 1
937 sub a8, a8, a10
938 j .Ldiv_xnormalized
939
940 .Ldiv_return_zero:
941 /* Return zero with the appropriate sign bit. */
942 srli a2, a7, 31
943 slli a2, a2, 31
944 leaf_return
945
946 .Ldiv_xnan_or_inf:
947 /* Set the sign bit of the result. */
948 srli a7, a3, 31
949 slli a7, a7, 31
950 xor a2, a2, a7
951 /* If y is NaN or Inf, return NaN. */
952 bnall a3, a6, 1f
953 movi a4, 0x400000 /* make it a quiet NaN */
954 or a2, a2, a4
955 1: leaf_return
956
957 .Ldiv_ynan_or_inf:
958 /* If y is Infinity, return zero. */
959 slli a8, a3, 9
960 beqz a8, .Ldiv_return_zero
961 /* y is NaN; return it. */
962 mov a2, a3
963 leaf_return
964
965 .align 4
966 .global __divsf3
967 .type __divsf3, @function
968 __divsf3:
969 leaf_entry sp, 16
970 movi a6, 0x7f800000
971
972 /* Get the sign of the result. */
973 xor a7, a2, a3
974
975 /* Check for NaN and infinity. */
976 ball a2, a6, .Ldiv_xnan_or_inf
977 ball a3, a6, .Ldiv_ynan_or_inf
978
979 /* Extract the exponents. */
980 extui a8, a2, 23, 8
981 extui a9, a3, 23, 8
982
983 beqz a9, .Ldiv_yexpzero
984 .Ldiv_ynormalized:
985 beqz a8, .Ldiv_xexpzero
986 .Ldiv_xnormalized:
987
988 /* Subtract the exponents. */
989 sub a8, a8, a9
990
991 /* Replace sign/exponent fields with explicit "1.0". */
992 movi a10, 0xffffff
993 or a2, a2, a6
994 and a2, a2, a10
995 or a3, a3, a6
996 and a3, a3, a10
997
998 /* The first digit of the mantissa division must be a one.
999 Shift x (and adjust the exponent) as needed to make this true. */
1000 bltu a3, a2, 1f
1001 slli a2, a2, 1
1002 addi a8, a8, -1
1003 1:
1004 /* Do the first subtraction and shift. */
1005 sub a2, a2, a3
1006 slli a2, a2, 1
1007
1008 /* Put the quotient into a10. */
1009 movi a10, 1
1010
1011 /* Divide one bit at a time for 23 bits. */
1012 movi a9, 23
1013 #if XCHAL_HAVE_LOOPS
1014 loop a9, .Ldiv_loopend
1015 #endif
1016 .Ldiv_loop:
1017 /* Shift the quotient << 1. */
1018 slli a10, a10, 1
1019
1020 /* Is this digit a 0 or 1? */
1021 bltu a2, a3, 1f
1022
1023 /* Output a 1 and subtract. */
1024 addi a10, a10, 1
1025 sub a2, a2, a3
1026
1027 /* Shift the dividend << 1. */
1028 1: slli a2, a2, 1
1029
1030 #if !XCHAL_HAVE_LOOPS
1031 addi a9, a9, -1
1032 bnez a9, .Ldiv_loop
1033 #endif
1034 .Ldiv_loopend:
1035
1036 /* Add the exponent bias (less one to account for the explicit "1.0"
1037 of the mantissa that will be added to the exponent in the final
1038 result). */
1039 addi a8, a8, 0x7e
1040
1041 /* Check for over/underflow. The value in a8 is one less than the
1042 final exponent, so values in the range 0..fd are OK here. */
1043 movi a4, 0xfe
1044 bgeu a8, a4, .Ldiv_overflow
1045
1046 .Ldiv_round:
1047 /* Round. The remainder (<< 1) is in a2. */
1048 bltu a2, a3, .Ldiv_rounded
1049 addi a10, a10, 1
1050 beq a2, a3, .Ldiv_exactlyhalf
1051
1052 .Ldiv_rounded:
1053 /* Add the exponent to the mantissa. */
1054 slli a8, a8, 23
1055 add a2, a10, a8
1056
1057 .Ldiv_addsign:
1058 /* Add the sign bit. */
1059 srli a7, a7, 31
1060 slli a7, a7, 31
1061 or a2, a2, a7
1062 leaf_return
1063
1064 .Ldiv_overflow:
1065 bltz a8, .Ldiv_underflow
1066 /* Return +/- Infinity. */
1067 addi a8, a4, 1 /* 0xff */
1068 slli a2, a8, 23
1069 j .Ldiv_addsign
1070
1071 .Ldiv_exactlyhalf:
1072 /* Remainder is exactly half the divisor. Round even. */
1073 srli a10, a10, 1
1074 slli a10, a10, 1
1075 j .Ldiv_rounded
1076
1077 .Ldiv_underflow:
1078 /* Create a subnormal value, where the exponent field contains zero,
1079 but the effective exponent is 1. The value of a8 is one less than
1080 the actual exponent, so just negate it to get the shift amount. */
1081 neg a8, a8
1082 ssr a8
1083 bgeui a8, 32, .Ldiv_flush_to_zero
1084
1085 /* Shift a10 right. Any bits that are shifted out of a10 are
1086 saved in a6 for rounding the result. */
1087 sll a6, a10
1088 srl a10, a10
1089
1090 /* Set the exponent to zero. */
1091 movi a8, 0
1092
1093 /* Pack any nonzero remainder (in a2) into a6. */
1094 beqz a2, 1f
1095 movi a9, 1
1096 or a6, a6, a9
1097
1098 /* Round a10 based on the bits shifted out into a6. */
1099 1: bgez a6, .Ldiv_rounded
1100 addi a10, a10, 1
1101 slli a6, a6, 1
1102 bnez a6, .Ldiv_rounded
1103 srli a10, a10, 1
1104 slli a10, a10, 1
1105 j .Ldiv_rounded
1106
1107 .Ldiv_flush_to_zero:
1108 /* Return zero with the appropriate sign bit. */
1109 srli a2, a7, 31
1110 slli a2, a2, 31
1111 leaf_return
1112
1113 #endif /* L_divsf3 */
1114
1115 #ifdef L_cmpsf2
1116
1117 /* Equal and Not Equal */
1118
1119 .align 4
1120 .global __eqsf2
1121 .global __nesf2
1122 .set __nesf2, __eqsf2
1123 .type __eqsf2, @function
1124 __eqsf2:
1125 leaf_entry sp, 16
1126 bne a2, a3, 4f
1127
1128 /* The values are equal but NaN != NaN. Check the exponent. */
1129 movi a6, 0x7f800000
1130 ball a2, a6, 3f
1131
1132 /* Equal. */
1133 movi a2, 0
1134 leaf_return
1135
1136 /* Not equal. */
1137 2: movi a2, 1
1138 leaf_return
1139
1140 /* Check if the mantissas are nonzero. */
1141 3: slli a7, a2, 9
1142 j 5f
1143
1144 /* Check if x and y are zero with different signs. */
1145 4: or a7, a2, a3
1146 slli a7, a7, 1
1147
1148 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
1149 or x when exponent(x) = 0x7f8 and x == y. */
1150 5: movi a2, 0
1151 movi a3, 1
1152 movnez a2, a3, a7
1153 leaf_return
1154
1155
1156 /* Greater Than */
1157
1158 .align 4
1159 .global __gtsf2
1160 .type __gtsf2, @function
1161 __gtsf2:
1162 leaf_entry sp, 16
1163 movi a6, 0x7f800000
1164 ball a2, a6, 2f
1165 1: bnall a3, a6, .Lle_cmp
1166
1167 /* Check if y is a NaN. */
1168 slli a7, a3, 9
1169 beqz a7, .Lle_cmp
1170 movi a2, 0
1171 leaf_return
1172
1173 /* Check if x is a NaN. */
1174 2: slli a7, a2, 9
1175 beqz a7, 1b
1176 movi a2, 0
1177 leaf_return
1178
1179
1180 /* Less Than or Equal */
1181
1182 .align 4
1183 .global __lesf2
1184 .type __lesf2, @function
1185 __lesf2:
1186 leaf_entry sp, 16
1187 movi a6, 0x7f800000
1188 ball a2, a6, 2f
1189 1: bnall a3, a6, .Lle_cmp
1190
1191 /* Check if y is a NaN. */
1192 slli a7, a3, 9
1193 beqz a7, .Lle_cmp
1194 movi a2, 1
1195 leaf_return
1196
1197 /* Check if x is a NaN. */
1198 2: slli a7, a2, 9
1199 beqz a7, 1b
1200 movi a2, 1
1201 leaf_return
1202
1203 .Lle_cmp:
1204 /* Check if x and y have different signs. */
1205 xor a7, a2, a3
1206 bltz a7, .Lle_diff_signs
1207
1208 /* Check if x is negative. */
1209 bltz a2, .Lle_xneg
1210
1211 /* Check if x <= y. */
1212 bltu a3, a2, 5f
1213 4: movi a2, 0
1214 leaf_return
1215
1216 .Lle_xneg:
1217 /* Check if y <= x. */
1218 bgeu a2, a3, 4b
1219 5: movi a2, 1
1220 leaf_return
1221
1222 .Lle_diff_signs:
1223 bltz a2, 4b
1224
1225 /* Check if both x and y are zero. */
1226 or a7, a2, a3
1227 slli a7, a7, 1
1228 movi a2, 1
1229 movi a3, 0
1230 moveqz a2, a3, a7
1231 leaf_return
1232
1233
1234 /* Greater Than or Equal */
1235
1236 .align 4
1237 .global __gesf2
1238 .type __gesf2, @function
1239 __gesf2:
1240 leaf_entry sp, 16
1241 movi a6, 0x7f800000
1242 ball a2, a6, 2f
1243 1: bnall a3, a6, .Llt_cmp
1244
1245 /* Check if y is a NaN. */
1246 slli a7, a3, 9
1247 beqz a7, .Llt_cmp
1248 movi a2, -1
1249 leaf_return
1250
1251 /* Check if x is a NaN. */
1252 2: slli a7, a2, 9
1253 beqz a7, 1b
1254 movi a2, -1
1255 leaf_return
1256
1257
1258 /* Less Than */
1259
1260 .align 4
1261 .global __ltsf2
1262 .type __ltsf2, @function
1263 __ltsf2:
1264 leaf_entry sp, 16
1265 movi a6, 0x7f800000
1266 ball a2, a6, 2f
1267 1: bnall a3, a6, .Llt_cmp
1268
1269 /* Check if y is a NaN. */
1270 slli a7, a3, 9
1271 beqz a7, .Llt_cmp
1272 movi a2, 0
1273 leaf_return
1274
1275 /* Check if x is a NaN. */
1276 2: slli a7, a2, 9
1277 beqz a7, 1b
1278 movi a2, 0
1279 leaf_return
1280
1281 .Llt_cmp:
1282 /* Check if x and y have different signs. */
1283 xor a7, a2, a3
1284 bltz a7, .Llt_diff_signs
1285
1286 /* Check if x is negative. */
1287 bltz a2, .Llt_xneg
1288
1289 /* Check if x < y. */
1290 bgeu a2, a3, 5f
1291 4: movi a2, -1
1292 leaf_return
1293
1294 .Llt_xneg:
1295 /* Check if y < x. */
1296 bltu a3, a2, 4b
1297 5: movi a2, 0
1298 leaf_return
1299
1300 .Llt_diff_signs:
1301 bgez a2, 5b
1302
1303 /* Check if both x and y are nonzero. */
1304 or a7, a2, a3
1305 slli a7, a7, 1
1306 movi a2, 0
1307 movi a3, -1
1308 movnez a2, a3, a7
1309 leaf_return
1310
1311
1312 /* Unordered */
1313
1314 .align 4
1315 .global __unordsf2
1316 .type __unordsf2, @function
1317 __unordsf2:
1318 leaf_entry sp, 16
1319 movi a6, 0x7f800000
1320 ball a2, a6, 3f
1321 1: ball a3, a6, 4f
1322 2: movi a2, 0
1323 leaf_return
1324
1325 3: slli a7, a2, 9
1326 beqz a7, 1b
1327 movi a2, 1
1328 leaf_return
1329
1330 4: slli a7, a3, 9
1331 beqz a7, 2b
1332 movi a2, 1
1333 leaf_return
1334
1335 #endif /* L_cmpsf2 */
1336
1337 #ifdef L_fixsfsi
1338
1339 .align 4
1340 .global __fixsfsi
1341 .type __fixsfsi, @function
1342 __fixsfsi:
1343 leaf_entry sp, 16
1344
1345 /* Check for NaN and Infinity. */
1346 movi a6, 0x7f800000
1347 ball a2, a6, .Lfixsfsi_nan_or_inf
1348
1349 /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
1350 extui a4, a2, 23, 8
1351 addi a4, a4, -0x7e
1352 bgei a4, 32, .Lfixsfsi_maxint
1353 blti a4, 1, .Lfixsfsi_zero
1354
1355 /* Add explicit "1.0" and shift << 8. */
1356 or a7, a2, a6
1357 slli a5, a7, 8
1358
1359 /* Shift back to the right, based on the exponent. */
1360 ssl a4 /* shift by 32 - a4 */
1361 srl a5, a5
1362
1363 /* Negate the result if sign != 0. */
1364 neg a2, a5
1365 movgez a2, a5, a7
1366 leaf_return
1367
1368 .Lfixsfsi_nan_or_inf:
1369 /* Handle Infinity and NaN. */
1370 slli a4, a2, 9
1371 beqz a4, .Lfixsfsi_maxint
1372
1373 /* Translate NaN to +maxint. */
1374 movi a2, 0
1375
1376 .Lfixsfsi_maxint:
1377 slli a4, a6, 8 /* 0x80000000 */
1378 addi a5, a4, -1 /* 0x7fffffff */
1379 movgez a4, a5, a2
1380 mov a2, a4
1381 leaf_return
1382
1383 .Lfixsfsi_zero:
1384 movi a2, 0
1385 leaf_return
1386
1387 #endif /* L_fixsfsi */
1388
1389 #ifdef L_fixsfdi
1390
1391 .align 4
1392 .global __fixsfdi
1393 .type __fixsfdi, @function
1394 __fixsfdi:
1395 leaf_entry sp, 16
1396
1397 /* Check for NaN and Infinity. */
1398 movi a6, 0x7f800000
1399 ball a2, a6, .Lfixsfdi_nan_or_inf
1400
1401 /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
1402 extui a4, a2, 23, 8
1403 addi a4, a4, -0x7e
1404 bgei a4, 64, .Lfixsfdi_maxint
1405 blti a4, 1, .Lfixsfdi_zero
1406
1407 /* Add explicit "1.0" and shift << 8. */
1408 or a7, a2, a6
1409 slli xh, a7, 8
1410
1411 /* Shift back to the right, based on the exponent. */
1412 ssl a4 /* shift by 64 - a4 */
1413 bgei a4, 32, .Lfixsfdi_smallshift
1414 srl xl, xh
1415 movi xh, 0
1416
1417 .Lfixsfdi_shifted:
1418 /* Negate the result if sign != 0. */
1419 bgez a7, 1f
1420 neg xl, xl
1421 neg xh, xh
1422 beqz xl, 1f
1423 addi xh, xh, -1
1424 1: leaf_return
1425
1426 .Lfixsfdi_smallshift:
1427 movi xl, 0
1428 sll xl, xh
1429 srl xh, xh
1430 j .Lfixsfdi_shifted
1431
1432 .Lfixsfdi_nan_or_inf:
1433 /* Handle Infinity and NaN. */
1434 slli a4, a2, 9
1435 beqz a4, .Lfixsfdi_maxint
1436
1437 /* Translate NaN to +maxint. */
1438 movi a2, 0
1439
1440 .Lfixsfdi_maxint:
1441 slli a7, a6, 8 /* 0x80000000 */
1442 bgez a2, 1f
1443 mov xh, a7
1444 movi xl, 0
1445 leaf_return
1446
1447 1: addi xh, a7, -1 /* 0x7fffffff */
1448 movi xl, -1
1449 leaf_return
1450
1451 .Lfixsfdi_zero:
1452 movi xh, 0
1453 movi xl, 0
1454 leaf_return
1455
1456 #endif /* L_fixsfdi */
1457
1458 #ifdef L_fixunssfsi
1459
1460 .align 4
1461 .global __fixunssfsi
1462 .type __fixunssfsi, @function
1463 __fixunssfsi:
1464 leaf_entry sp, 16
1465
1466 /* Check for NaN and Infinity. */
1467 movi a6, 0x7f800000
1468 ball a2, a6, .Lfixunssfsi_nan_or_inf
1469
1470 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
1471 extui a4, a2, 23, 8
1472 addi a4, a4, -0x7f
1473 bgei a4, 32, .Lfixunssfsi_maxint
1474 bltz a4, .Lfixunssfsi_zero
1475
1476 /* Add explicit "1.0" and shift << 8. */
1477 or a7, a2, a6
1478 slli a5, a7, 8
1479
1480 /* Shift back to the right, based on the exponent. */
1481 addi a4, a4, 1
1482 beqi a4, 32, .Lfixunssfsi_bigexp
1483 ssl a4 /* shift by 32 - a4 */
1484 srl a5, a5
1485
1486 /* Negate the result if sign != 0. */
1487 neg a2, a5
1488 movgez a2, a5, a7
1489 leaf_return
1490
1491 .Lfixunssfsi_nan_or_inf:
1492 /* Handle Infinity and NaN. */
1493 slli a4, a2, 9
1494 beqz a4, .Lfixunssfsi_maxint
1495
1496 /* Translate NaN to 0xffffffff. */
1497 movi a2, -1
1498 leaf_return
1499
1500 .Lfixunssfsi_maxint:
1501 slli a4, a6, 8 /* 0x80000000 */
1502 movi a5, -1 /* 0xffffffff */
1503 movgez a4, a5, a2
1504 mov a2, a4
1505 leaf_return
1506
1507 .Lfixunssfsi_zero:
1508 movi a2, 0
1509 leaf_return
1510
1511 .Lfixunssfsi_bigexp:
1512 /* Handle unsigned maximum exponent case. */
1513 bltz a2, 1f
1514 mov a2, a5 /* no shift needed */
1515 leaf_return
1516
1517 /* Return 0x80000000 if negative. */
1518 1: slli a2, a6, 8
1519 leaf_return
1520
1521 #endif /* L_fixunssfsi */
1522
1523 #ifdef L_fixunssfdi
1524
1525 .align 4
1526 .global __fixunssfdi
1527 .type __fixunssfdi, @function
1528 __fixunssfdi:
1529 leaf_entry sp, 16
1530
1531 /* Check for NaN and Infinity. */
1532 movi a6, 0x7f800000
1533 ball a2, a6, .Lfixunssfdi_nan_or_inf
1534
1535 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
1536 extui a4, a2, 23, 8
1537 addi a4, a4, -0x7f
1538 bgei a4, 64, .Lfixunssfdi_maxint
1539 bltz a4, .Lfixunssfdi_zero
1540
1541 /* Add explicit "1.0" and shift << 8. */
1542 or a7, a2, a6
1543 slli xh, a7, 8
1544
1545 /* Shift back to the right, based on the exponent. */
1546 addi a4, a4, 1
1547 beqi a4, 64, .Lfixunssfdi_bigexp
1548 ssl a4 /* shift by 64 - a4 */
1549 bgei a4, 32, .Lfixunssfdi_smallshift
1550 srl xl, xh
1551 movi xh, 0
1552
1553 .Lfixunssfdi_shifted:
1554 /* Negate the result if sign != 0. */
1555 bgez a7, 1f
1556 neg xl, xl
1557 neg xh, xh
1558 beqz xl, 1f
1559 addi xh, xh, -1
1560 1: leaf_return
1561
1562 .Lfixunssfdi_smallshift:
1563 movi xl, 0
1564 src xl, xh, xl
1565 srl xh, xh
1566 j .Lfixunssfdi_shifted
1567
1568 .Lfixunssfdi_nan_or_inf:
1569 /* Handle Infinity and NaN. */
1570 slli a4, a2, 9
1571 beqz a4, .Lfixunssfdi_maxint
1572
1573 /* Translate NaN to 0xffffffff.... */
1574 1: movi xh, -1
1575 movi xl, -1
1576 leaf_return
1577
1578 .Lfixunssfdi_maxint:
1579 bgez a2, 1b
1580 2: slli xh, a6, 8 /* 0x80000000 */
1581 movi xl, 0
1582 leaf_return
1583
1584 .Lfixunssfdi_zero:
1585 movi xh, 0
1586 movi xl, 0
1587 leaf_return
1588
1589 .Lfixunssfdi_bigexp:
1590 /* Handle unsigned maximum exponent case. */
1591 bltz a7, 2b
1592 movi xl, 0
1593 leaf_return /* no shift needed */
1594
1595 #endif /* L_fixunssfdi */
1596
1597 #ifdef L_floatsisf
1598
1599 .align 4
1600 .global __floatunsisf
1601 .type __floatunsisf, @function
1602 __floatunsisf:
1603 leaf_entry sp, 16
1604 beqz a2, .Lfloatsisf_return
1605
1606 /* Set the sign to zero and jump to the floatsisf code. */
1607 movi a7, 0
1608 j .Lfloatsisf_normalize
1609
1610 .align 4
1611 .global __floatsisf
1612 .type __floatsisf, @function
1613 __floatsisf:
1614 leaf_entry sp, 16
1615
1616 /* Check for zero. */
1617 beqz a2, .Lfloatsisf_return
1618
1619 /* Save the sign. */
1620 extui a7, a2, 31, 1
1621
1622 /* Get the absolute value. */
1623 #if XCHAL_HAVE_ABS
1624 abs a2, a2
1625 #else
1626 neg a4, a2
1627 movltz a2, a4, a2
1628 #endif
1629
1630 .Lfloatsisf_normalize:
1631 /* Normalize with the first 1 bit in the msb. */
1632 do_nsau a4, a2, a5, a6
1633 ssl a4
1634 sll a5, a2
1635
1636 /* Shift the mantissa into position, with rounding bits in a6. */
1637 srli a2, a5, 8
1638 slli a6, a5, (32 - 8)
1639
1640 /* Set the exponent. */
1641 movi a5, 0x9d /* 0x7e + 31 */
1642 sub a5, a5, a4
1643 slli a5, a5, 23
1644 add a2, a2, a5
1645
1646 /* Add the sign. */
1647 slli a7, a7, 31
1648 or a2, a2, a7
1649
1650 /* Round up if the leftover fraction is >= 1/2. */
1651 bgez a6, .Lfloatsisf_return
1652 addi a2, a2, 1 /* Overflow to the exponent is OK. */
1653
1654 /* Check if the leftover fraction is exactly 1/2. */
1655 slli a6, a6, 1
1656 beqz a6, .Lfloatsisf_exactlyhalf
1657
1658 .Lfloatsisf_return:
1659 leaf_return
1660
1661 .Lfloatsisf_exactlyhalf:
1662 /* Round down to the nearest even value. */
1663 srli a2, a2, 1
1664 slli a2, a2, 1
1665 leaf_return
1666
1667 #endif /* L_floatsisf */
1668
1669 #ifdef L_floatdisf
1670
1671 .align 4
1672 .global __floatundisf
1673 .type __floatundisf, @function
1674 __floatundisf:
1675 leaf_entry sp, 16
1676
1677 /* Check for zero. */
1678 or a4, xh, xl
1679 beqz a4, 2f
1680
1681 /* Set the sign to zero and jump to the floatdisf code. */
1682 movi a7, 0
1683 j .Lfloatdisf_normalize
1684
1685 .align 4
1686 .global __floatdisf
1687 .type __floatdisf, @function
1688 __floatdisf:
1689 leaf_entry sp, 16
1690
1691 /* Check for zero. */
1692 or a4, xh, xl
1693 beqz a4, 2f
1694
1695 /* Save the sign. */
1696 extui a7, xh, 31, 1
1697
1698 /* Get the absolute value. */
1699 bgez xh, .Lfloatdisf_normalize
1700 neg xl, xl
1701 neg xh, xh
1702 beqz xl, .Lfloatdisf_normalize
1703 addi xh, xh, -1
1704
1705 .Lfloatdisf_normalize:
1706 /* Normalize with the first 1 bit in the msb of xh. */
1707 beqz xh, .Lfloatdisf_bigshift
1708 do_nsau a4, xh, a5, a6
1709 ssl a4
1710 src xh, xh, xl
1711 sll xl, xl
1712
1713 .Lfloatdisf_shifted:
1714 /* Shift the mantissa into position, with rounding bits in a6. */
1715 ssai 8
1716 sll a5, xl
1717 src a6, xh, xl
1718 srl xh, xh
1719 beqz a5, 1f
1720 movi a5, 1
1721 or a6, a6, a5
1722 1:
1723 /* Set the exponent. */
1724 movi a5, 0xbd /* 0x7e + 63 */
1725 sub a5, a5, a4
1726 slli a5, a5, 23
1727 add a2, xh, a5
1728
1729 /* Add the sign. */
1730 slli a7, a7, 31
1731 or a2, a2, a7
1732
1733 /* Round up if the leftover fraction is >= 1/2. */
1734 bgez a6, 2f
1735 addi a2, a2, 1 /* Overflow to the exponent is OK. */
1736
1737 /* Check if the leftover fraction is exactly 1/2. */
1738 slli a6, a6, 1
1739 beqz a6, .Lfloatdisf_exactlyhalf
1740 2: leaf_return
1741
1742 .Lfloatdisf_bigshift:
1743 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
1744 do_nsau a4, xl, a5, a6
1745 ssl a4
1746 sll xh, xl
1747 movi xl, 0
1748 addi a4, a4, 32
1749 j .Lfloatdisf_shifted
1750
1751 .Lfloatdisf_exactlyhalf:
1752 /* Round down to the nearest even value. */
1753 srli a2, a2, 1
1754 slli a2, a2, 1
1755 leaf_return
1756
1757 #endif /* L_floatdisf */