0
|
1 /* IEEE-754 single-precision functions for Xtensa
|
|
2 Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
|
|
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
|
|
4
|
|
5 This file is part of GCC.
|
|
6
|
|
7 GCC is free software; you can redistribute it and/or modify it
|
|
8 under the terms of the GNU General Public License as published by
|
|
9 the Free Software Foundation; either version 3, or (at your option)
|
|
10 any later version.
|
|
11
|
|
12 GCC is distributed in the hope that it will be useful, but WITHOUT
|
|
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
|
|
15 License for more details.
|
|
16
|
|
17 Under Section 7 of GPL version 3, you are granted additional
|
|
18 permissions described in the GCC Runtime Library Exception, version
|
|
19 3.1, as published by the Free Software Foundation.
|
|
20
|
|
21 You should have received a copy of the GNU General Public License and
|
|
22 a copy of the GCC Runtime Library Exception along with this program;
|
|
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
24 <http://www.gnu.org/licenses/>. */
|
|
25
|
|
26 #ifdef __XTENSA_EB__
|
|
27 #define xh a2
|
|
28 #define xl a3
|
|
29 #define yh a4
|
|
30 #define yl a5
|
|
31 #else
|
|
32 #define xh a3
|
|
33 #define xl a2
|
|
34 #define yh a5
|
|
35 #define yl a4
|
|
36 #endif
|
|
37
|
|
38 /* Warning! The branch displacements for some Xtensa branch instructions
|
|
39 are quite small, and this code has been carefully laid out to keep
|
|
40 branch targets in range. If you change anything, be sure to check that
|
|
41 the assembler is not relaxing anything to branch over a jump. */
|
|
42
|
|
43 #ifdef L_negsf2
|
|
44
|
|
45 .align 4
|
|
46 .global __negsf2
|
|
47 .type __negsf2, @function
|
|
48 __negsf2:
|
|
49 leaf_entry sp, 16
|
|
50 movi a4, 0x80000000
|
|
51 xor a2, a2, a4
|
|
52 leaf_return
|
|
53
|
|
54 #endif /* L_negsf2 */
|
|
55
|
|
56 #ifdef L_addsubsf3
|
|
57
|
|
58 /* Addition */
|
|
59 __addsf3_aux:
|
|
60
|
|
61 /* Handle NaNs and Infinities. (This code is placed before the
|
|
62 start of the function just to keep it in range of the limited
|
|
63 branch displacements.) */
|
|
64
|
|
65 .Ladd_xnan_or_inf:
|
|
66 /* If y is neither Infinity nor NaN, return x. */
|
|
67 bnall a3, a6, 1f
|
|
68 /* If x is a NaN, return it. Otherwise, return y. */
|
|
69 slli a7, a2, 9
|
|
70 beqz a7, .Ladd_ynan_or_inf
|
|
71 1: leaf_return
|
|
72
|
|
73 .Ladd_ynan_or_inf:
|
|
74 /* Return y. */
|
|
75 mov a2, a3
|
|
76 leaf_return
|
|
77
|
|
78 .Ladd_opposite_signs:
|
|
79 /* Operand signs differ. Do a subtraction. */
|
|
80 slli a7, a6, 8
|
|
81 xor a3, a3, a7
|
|
82 j .Lsub_same_sign
|
|
83
|
|
84 .align 4
|
|
85 .global __addsf3
|
|
86 .type __addsf3, @function
|
|
87 __addsf3:
|
|
88 leaf_entry sp, 16
|
|
89 movi a6, 0x7f800000
|
|
90
|
|
91 /* Check if the two operands have the same sign. */
|
|
92 xor a7, a2, a3
|
|
93 bltz a7, .Ladd_opposite_signs
|
|
94
|
|
95 .Ladd_same_sign:
|
|
96 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
|
|
97 ball a2, a6, .Ladd_xnan_or_inf
|
|
98 ball a3, a6, .Ladd_ynan_or_inf
|
|
99
|
|
100 /* Compare the exponents. The smaller operand will be shifted
|
|
101 right by the exponent difference and added to the larger
|
|
102 one. */
|
|
103 extui a7, a2, 23, 9
|
|
104 extui a8, a3, 23, 9
|
|
105 bltu a7, a8, .Ladd_shiftx
|
|
106
|
|
107 .Ladd_shifty:
|
|
108 /* Check if the smaller (or equal) exponent is zero. */
|
|
109 bnone a3, a6, .Ladd_yexpzero
|
|
110
|
|
111 /* Replace y sign/exponent with 0x008. */
|
|
112 or a3, a3, a6
|
|
113 slli a3, a3, 8
|
|
114 srli a3, a3, 8
|
|
115
|
|
116 .Ladd_yexpdiff:
|
|
117 /* Compute the exponent difference. */
|
|
118 sub a10, a7, a8
|
|
119
|
|
120 /* Exponent difference > 32 -- just return the bigger value. */
|
|
121 bgeui a10, 32, 1f
|
|
122
|
|
123 /* Shift y right by the exponent difference. Any bits that are
|
|
124 shifted out of y are saved in a9 for rounding the result. */
|
|
125 ssr a10
|
|
126 movi a9, 0
|
|
127 src a9, a3, a9
|
|
128 srl a3, a3
|
|
129
|
|
130 /* Do the addition. */
|
|
131 add a2, a2, a3
|
|
132
|
|
133 /* Check if the add overflowed into the exponent. */
|
|
134 extui a10, a2, 23, 9
|
|
135 beq a10, a7, .Ladd_round
|
|
136 mov a8, a7
|
|
137 j .Ladd_carry
|
|
138
|
|
139 .Ladd_yexpzero:
|
|
140 /* y is a subnormal value. Replace its sign/exponent with zero,
|
|
141 i.e., no implicit "1.0", and increment the apparent exponent
|
|
142 because subnormals behave as if they had the minimum (nonzero)
|
|
143 exponent. Test for the case when both exponents are zero. */
|
|
144 slli a3, a3, 9
|
|
145 srli a3, a3, 9
|
|
146 bnone a2, a6, .Ladd_bothexpzero
|
|
147 addi a8, a8, 1
|
|
148 j .Ladd_yexpdiff
|
|
149
|
|
150 .Ladd_bothexpzero:
|
|
151 /* Both exponents are zero. Handle this as a special case. There
|
|
152 is no need to shift or round, and the normal code for handling
|
|
153 a carry into the exponent field will not work because it
|
|
154 assumes there is an implicit "1.0" that needs to be added. */
|
|
155 add a2, a2, a3
|
|
156 1: leaf_return
|
|
157
|
|
158 .Ladd_xexpzero:
|
|
159 /* Same as "yexpzero" except skip handling the case when both
|
|
160 exponents are zero. */
|
|
161 slli a2, a2, 9
|
|
162 srli a2, a2, 9
|
|
163 addi a7, a7, 1
|
|
164 j .Ladd_xexpdiff
|
|
165
|
|
166 .Ladd_shiftx:
|
|
167 /* Same thing as the "shifty" code, but with x and y swapped. Also,
|
|
168 because the exponent difference is always nonzero in this version,
|
|
169 the shift sequence can use SLL and skip loading a constant zero. */
|
|
170 bnone a2, a6, .Ladd_xexpzero
|
|
171
|
|
172 or a2, a2, a6
|
|
173 slli a2, a2, 8
|
|
174 srli a2, a2, 8
|
|
175
|
|
176 .Ladd_xexpdiff:
|
|
177 sub a10, a8, a7
|
|
178 bgeui a10, 32, .Ladd_returny
|
|
179
|
|
180 ssr a10
|
|
181 sll a9, a2
|
|
182 srl a2, a2
|
|
183
|
|
184 add a2, a2, a3
|
|
185
|
|
186 /* Check if the add overflowed into the exponent. */
|
|
187 extui a10, a2, 23, 9
|
|
188 bne a10, a8, .Ladd_carry
|
|
189
|
|
190 .Ladd_round:
|
|
191 /* Round up if the leftover fraction is >= 1/2. */
|
|
192 bgez a9, 1f
|
|
193 addi a2, a2, 1
|
|
194
|
|
195 /* Check if the leftover fraction is exactly 1/2. */
|
|
196 slli a9, a9, 1
|
|
197 beqz a9, .Ladd_exactlyhalf
|
|
198 1: leaf_return
|
|
199
|
|
200 .Ladd_returny:
|
|
201 mov a2, a3
|
|
202 leaf_return
|
|
203
|
|
204 .Ladd_carry:
|
|
205 /* The addition has overflowed into the exponent field, so the
|
|
206 value needs to be renormalized. The mantissa of the result
|
|
207 can be recovered by subtracting the original exponent and
|
|
208 adding 0x800000 (which is the explicit "1.0" for the
|
|
209 mantissa of the non-shifted operand -- the "1.0" for the
|
|
210 shifted operand was already added). The mantissa can then
|
|
211 be shifted right by one bit. The explicit "1.0" of the
|
|
212 shifted mantissa then needs to be replaced by the exponent,
|
|
213 incremented by one to account for the normalizing shift.
|
|
214 It is faster to combine these operations: do the shift first
|
|
215 and combine the additions and subtractions. If x is the
|
|
216 original exponent, the result is:
|
|
217 shifted mantissa - (x << 22) + (1 << 22) + (x << 23)
|
|
218 or:
|
|
219 shifted mantissa + ((x + 1) << 22)
|
|
220 Note that the exponent is incremented here by leaving the
|
|
221 explicit "1.0" of the mantissa in the exponent field. */
|
|
222
|
|
223 /* Shift x right by one bit. Save the lsb. */
|
|
224 mov a10, a2
|
|
225 srli a2, a2, 1
|
|
226
|
|
227 /* See explanation above. The original exponent is in a8. */
|
|
228 addi a8, a8, 1
|
|
229 slli a8, a8, 22
|
|
230 add a2, a2, a8
|
|
231
|
|
232 /* Return an Infinity if the exponent overflowed. */
|
|
233 ball a2, a6, .Ladd_infinity
|
|
234
|
|
235 /* Same thing as the "round" code except the msb of the leftover
|
|
236 fraction is bit 0 of a10, with the rest of the fraction in a9. */
|
|
237 bbci.l a10, 0, 1f
|
|
238 addi a2, a2, 1
|
|
239 beqz a9, .Ladd_exactlyhalf
|
|
240 1: leaf_return
|
|
241
|
|
242 .Ladd_infinity:
|
|
243 /* Clear the mantissa. */
|
|
244 srli a2, a2, 23
|
|
245 slli a2, a2, 23
|
|
246
|
|
247 /* The sign bit may have been lost in a carry-out. Put it back. */
|
|
248 slli a8, a8, 1
|
|
249 or a2, a2, a8
|
|
250 leaf_return
|
|
251
|
|
252 .Ladd_exactlyhalf:
|
|
253 /* Round down to the nearest even value. */
|
|
254 srli a2, a2, 1
|
|
255 slli a2, a2, 1
|
|
256 leaf_return
|
|
257
|
|
258
|
|
259 /* Subtraction */
|
|
260 __subsf3_aux:
|
|
261
|
|
262 /* Handle NaNs and Infinities. (This code is placed before the
|
|
263 start of the function just to keep it in range of the limited
|
|
264 branch displacements.) */
|
|
265
|
|
266 .Lsub_xnan_or_inf:
|
|
267 /* If y is neither Infinity nor NaN, return x. */
|
|
268 bnall a3, a6, 1f
|
|
269 /* Both x and y are either NaN or Inf, so the result is NaN. */
|
|
270 movi a4, 0x400000 /* make it a quiet NaN */
|
|
271 or a2, a2, a4
|
|
272 1: leaf_return
|
|
273
|
|
274 .Lsub_ynan_or_inf:
|
|
275 /* Negate y and return it. */
|
|
276 slli a7, a6, 8
|
|
277 xor a2, a3, a7
|
|
278 leaf_return
|
|
279
|
|
280 .Lsub_opposite_signs:
|
|
281 /* Operand signs differ. Do an addition. */
|
|
282 slli a7, a6, 8
|
|
283 xor a3, a3, a7
|
|
284 j .Ladd_same_sign
|
|
285
|
|
286 .align 4
|
|
287 .global __subsf3
|
|
288 .type __subsf3, @function
|
|
289 __subsf3:
|
|
290 leaf_entry sp, 16
|
|
291 movi a6, 0x7f800000
|
|
292
|
|
293 /* Check if the two operands have the same sign. */
|
|
294 xor a7, a2, a3
|
|
295 bltz a7, .Lsub_opposite_signs
|
|
296
|
|
297 .Lsub_same_sign:
|
|
298 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */
|
|
299 ball a2, a6, .Lsub_xnan_or_inf
|
|
300 ball a3, a6, .Lsub_ynan_or_inf
|
|
301
|
|
302 /* Compare the operands. In contrast to addition, the entire
|
|
303 value matters here. */
|
|
304 extui a7, a2, 23, 8
|
|
305 extui a8, a3, 23, 8
|
|
306 bltu a2, a3, .Lsub_xsmaller
|
|
307
|
|
308 .Lsub_ysmaller:
|
|
309 /* Check if the smaller (or equal) exponent is zero. */
|
|
310 bnone a3, a6, .Lsub_yexpzero
|
|
311
|
|
312 /* Replace y sign/exponent with 0x008. */
|
|
313 or a3, a3, a6
|
|
314 slli a3, a3, 8
|
|
315 srli a3, a3, 8
|
|
316
|
|
317 .Lsub_yexpdiff:
|
|
318 /* Compute the exponent difference. */
|
|
319 sub a10, a7, a8
|
|
320
|
|
321 /* Exponent difference > 32 -- just return the bigger value. */
|
|
322 bgeui a10, 32, 1f
|
|
323
|
|
324 /* Shift y right by the exponent difference. Any bits that are
|
|
325 shifted out of y are saved in a9 for rounding the result. */
|
|
326 ssr a10
|
|
327 movi a9, 0
|
|
328 src a9, a3, a9
|
|
329 srl a3, a3
|
|
330
|
|
331 sub a2, a2, a3
|
|
332
|
|
333 /* Subtract the leftover bits in a9 from zero and propagate any
|
|
334 borrow from a2. */
|
|
335 neg a9, a9
|
|
336 addi a10, a2, -1
|
|
337 movnez a2, a10, a9
|
|
338
|
|
339 /* Check if the subtract underflowed into the exponent. */
|
|
340 extui a10, a2, 23, 8
|
|
341 beq a10, a7, .Lsub_round
|
|
342 j .Lsub_borrow
|
|
343
|
|
344 .Lsub_yexpzero:
|
|
345 /* Return zero if the inputs are equal. (For the non-subnormal
|
|
346 case, subtracting the "1.0" will cause a borrow from the exponent
|
|
347 and this case can be detected when handling the borrow.) */
|
|
348 beq a2, a3, .Lsub_return_zero
|
|
349
|
|
350 /* y is a subnormal value. Replace its sign/exponent with zero,
|
|
351 i.e., no implicit "1.0". Unless x is also a subnormal, increment
|
|
352 y's apparent exponent because subnormals behave as if they had
|
|
353 the minimum (nonzero) exponent. */
|
|
354 slli a3, a3, 9
|
|
355 srli a3, a3, 9
|
|
356 bnone a2, a6, .Lsub_yexpdiff
|
|
357 addi a8, a8, 1
|
|
358 j .Lsub_yexpdiff
|
|
359
|
|
360 .Lsub_returny:
|
|
361 /* Negate and return y. */
|
|
362 slli a7, a6, 8
|
|
363 xor a2, a3, a7
|
|
364 1: leaf_return
|
|
365
|
|
366 .Lsub_xsmaller:
|
|
367 /* Same thing as the "ysmaller" code, but with x and y swapped and
|
|
368 with y negated. */
|
|
369 bnone a2, a6, .Lsub_xexpzero
|
|
370
|
|
371 or a2, a2, a6
|
|
372 slli a2, a2, 8
|
|
373 srli a2, a2, 8
|
|
374
|
|
375 .Lsub_xexpdiff:
|
|
376 sub a10, a8, a7
|
|
377 bgeui a10, 32, .Lsub_returny
|
|
378
|
|
379 ssr a10
|
|
380 movi a9, 0
|
|
381 src a9, a2, a9
|
|
382 srl a2, a2
|
|
383
|
|
384 /* Negate y. */
|
|
385 slli a11, a6, 8
|
|
386 xor a3, a3, a11
|
|
387
|
|
388 sub a2, a3, a2
|
|
389
|
|
390 neg a9, a9
|
|
391 addi a10, a2, -1
|
|
392 movnez a2, a10, a9
|
|
393
|
|
394 /* Check if the subtract underflowed into the exponent. */
|
|
395 extui a10, a2, 23, 8
|
|
396 bne a10, a8, .Lsub_borrow
|
|
397
|
|
398 .Lsub_round:
|
|
399 /* Round up if the leftover fraction is >= 1/2. */
|
|
400 bgez a9, 1f
|
|
401 addi a2, a2, 1
|
|
402
|
|
403 /* Check if the leftover fraction is exactly 1/2. */
|
|
404 slli a9, a9, 1
|
|
405 beqz a9, .Lsub_exactlyhalf
|
|
406 1: leaf_return
|
|
407
|
|
408 .Lsub_xexpzero:
|
|
409 /* Same as "yexpzero". */
|
|
410 beq a2, a3, .Lsub_return_zero
|
|
411 slli a2, a2, 9
|
|
412 srli a2, a2, 9
|
|
413 bnone a3, a6, .Lsub_xexpdiff
|
|
414 addi a7, a7, 1
|
|
415 j .Lsub_xexpdiff
|
|
416
|
|
417 .Lsub_return_zero:
|
|
418 movi a2, 0
|
|
419 leaf_return
|
|
420
|
|
421 .Lsub_borrow:
|
|
422 /* The subtraction has underflowed into the exponent field, so the
|
|
423 value needs to be renormalized. Shift the mantissa left as
|
|
424 needed to remove any leading zeros and adjust the exponent
|
|
425 accordingly. If the exponent is not large enough to remove
|
|
426 all the leading zeros, the result will be a subnormal value. */
|
|
427
|
|
428 slli a8, a2, 9
|
|
429 beqz a8, .Lsub_xzero
|
|
430 do_nsau a6, a8, a7, a11
|
|
431 srli a8, a8, 9
|
|
432 bge a6, a10, .Lsub_subnormal
|
|
433 addi a6, a6, 1
|
|
434
|
|
435 .Lsub_normalize_shift:
|
|
436 /* Shift the mantissa (a8/a9) left by a6. */
|
|
437 ssl a6
|
|
438 src a8, a8, a9
|
|
439 sll a9, a9
|
|
440
|
|
441 /* Combine the shifted mantissa with the sign and exponent,
|
|
442 decrementing the exponent by a6. (The exponent has already
|
|
443 been decremented by one due to the borrow from the subtraction,
|
|
444 but adding the mantissa will increment the exponent by one.) */
|
|
445 srli a2, a2, 23
|
|
446 sub a2, a2, a6
|
|
447 slli a2, a2, 23
|
|
448 add a2, a2, a8
|
|
449 j .Lsub_round
|
|
450
|
|
451 .Lsub_exactlyhalf:
|
|
452 /* Round down to the nearest even value. */
|
|
453 srli a2, a2, 1
|
|
454 slli a2, a2, 1
|
|
455 leaf_return
|
|
456
|
|
457 .Lsub_xzero:
|
|
458 /* If there was a borrow from the exponent, and the mantissa and
|
|
459 guard digits are all zero, then the inputs were equal and the
|
|
460 result should be zero. */
|
|
461 beqz a9, .Lsub_return_zero
|
|
462
|
|
463 /* Only the guard digit is nonzero. Shift by min(24, a10). */
|
|
464 addi a11, a10, -24
|
|
465 movi a6, 24
|
|
466 movltz a6, a10, a11
|
|
467 j .Lsub_normalize_shift
|
|
468
|
|
469 .Lsub_subnormal:
|
|
470 /* The exponent is too small to shift away all the leading zeros.
|
|
471 Set a6 to the current exponent (which has already been
|
|
472 decremented by the borrow) so that the exponent of the result
|
|
473 will be zero. Do not add 1 to a6 in this case, because: (1)
|
|
474 adding the mantissa will not increment the exponent, so there is
|
|
475 no need to subtract anything extra from the exponent to
|
|
476 compensate, and (2) the effective exponent of a subnormal is 1
|
|
477 not 0 so the shift amount must be 1 smaller than normal. */
|
|
478 mov a6, a10
|
|
479 j .Lsub_normalize_shift
|
|
480
|
|
481 #endif /* L_addsubsf3 */
|
|
482
|
|
483 #ifdef L_mulsf3
|
|
484
|
|
485 /* Multiplication */
|
|
486 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
|
|
487 #define XCHAL_NO_MUL 1
|
|
488 #endif
|
|
489
|
|
490 __mulsf3_aux:
|
|
491
|
|
492 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
|
|
493 (This code is placed before the start of the function just to
|
|
494 keep it in range of the limited branch displacements.) */
|
|
495
|
|
496 .Lmul_xexpzero:
|
|
497 /* Clear the sign bit of x. */
|
|
498 slli a2, a2, 1
|
|
499 srli a2, a2, 1
|
|
500
|
|
501 /* If x is zero, return zero. */
|
|
502 beqz a2, .Lmul_return_zero
|
|
503
|
|
504 /* Normalize x. Adjust the exponent in a8. */
|
|
505 do_nsau a10, a2, a11, a12
|
|
506 addi a10, a10, -8
|
|
507 ssl a10
|
|
508 sll a2, a2
|
|
509 movi a8, 1
|
|
510 sub a8, a8, a10
|
|
511 j .Lmul_xnormalized
|
|
512
|
|
513 .Lmul_yexpzero:
|
|
514 /* Clear the sign bit of y. */
|
|
515 slli a3, a3, 1
|
|
516 srli a3, a3, 1
|
|
517
|
|
518 /* If y is zero, return zero. */
|
|
519 beqz a3, .Lmul_return_zero
|
|
520
|
|
521 /* Normalize y. Adjust the exponent in a9. */
|
|
522 do_nsau a10, a3, a11, a12
|
|
523 addi a10, a10, -8
|
|
524 ssl a10
|
|
525 sll a3, a3
|
|
526 movi a9, 1
|
|
527 sub a9, a9, a10
|
|
528 j .Lmul_ynormalized
|
|
529
|
|
530 .Lmul_return_zero:
|
|
531 /* Return zero with the appropriate sign bit. */
|
|
532 srli a2, a7, 31
|
|
533 slli a2, a2, 31
|
|
534 j .Lmul_done
|
|
535
|
|
536 .Lmul_xnan_or_inf:
|
|
537 /* If y is zero, return NaN. */
|
|
538 slli a8, a3, 1
|
|
539 bnez a8, 1f
|
|
540 movi a4, 0x400000 /* make it a quiet NaN */
|
|
541 or a2, a2, a4
|
|
542 j .Lmul_done
|
|
543 1:
|
|
544 /* If y is NaN, return y. */
|
|
545 bnall a3, a6, .Lmul_returnx
|
|
546 slli a8, a3, 9
|
|
547 beqz a8, .Lmul_returnx
|
|
548
|
|
549 .Lmul_returny:
|
|
550 mov a2, a3
|
|
551
|
|
552 .Lmul_returnx:
|
|
553 /* Set the sign bit and return. */
|
|
554 extui a7, a7, 31, 1
|
|
555 slli a2, a2, 1
|
|
556 ssai 1
|
|
557 src a2, a7, a2
|
|
558 j .Lmul_done
|
|
559
|
|
560 .Lmul_ynan_or_inf:
|
|
561 /* If x is zero, return NaN. */
|
|
562 slli a8, a2, 1
|
|
563 bnez a8, .Lmul_returny
|
|
564 movi a7, 0x400000 /* make it a quiet NaN */
|
|
565 or a2, a3, a7
|
|
566 j .Lmul_done
|
|
567
|
|
568 .align 4
|
|
569 .global __mulsf3
|
|
570 .type __mulsf3, @function
|
|
571 __mulsf3:
|
|
572 #if __XTENSA_CALL0_ABI__
|
|
573 leaf_entry sp, 32
|
|
574 addi sp, sp, -32
|
|
575 s32i a12, sp, 16
|
|
576 s32i a13, sp, 20
|
|
577 s32i a14, sp, 24
|
|
578 s32i a15, sp, 28
|
|
579 #elif XCHAL_NO_MUL
|
|
580 /* This is not really a leaf function; allocate enough stack space
|
|
581 to allow CALL12s to a helper function. */
|
|
582 leaf_entry sp, 64
|
|
583 #else
|
|
584 leaf_entry sp, 32
|
|
585 #endif
|
|
586 movi a6, 0x7f800000
|
|
587
|
|
588 /* Get the sign of the result. */
|
|
589 xor a7, a2, a3
|
|
590
|
|
591 /* Check for NaN and infinity. */
|
|
592 ball a2, a6, .Lmul_xnan_or_inf
|
|
593 ball a3, a6, .Lmul_ynan_or_inf
|
|
594
|
|
595 /* Extract the exponents. */
|
|
596 extui a8, a2, 23, 8
|
|
597 extui a9, a3, 23, 8
|
|
598
|
|
599 beqz a8, .Lmul_xexpzero
|
|
600 .Lmul_xnormalized:
|
|
601 beqz a9, .Lmul_yexpzero
|
|
602 .Lmul_ynormalized:
|
|
603
|
|
604 /* Add the exponents. */
|
|
605 add a8, a8, a9
|
|
606
|
|
607 /* Replace sign/exponent fields with explicit "1.0". */
|
|
608 movi a10, 0xffffff
|
|
609 or a2, a2, a6
|
|
610 and a2, a2, a10
|
|
611 or a3, a3, a6
|
|
612 and a3, a3, a10
|
|
613
|
|
614 /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */
|
|
615
|
|
616 #if XCHAL_HAVE_MUL32_HIGH
|
|
617
|
|
618 mull a6, a2, a3
|
|
619 muluh a2, a2, a3
|
|
620
|
|
621 #else
|
|
622
|
|
623 /* Break the inputs into 16-bit chunks and compute 4 32-bit partial
|
|
624 products. These partial products are:
|
|
625
|
|
626 0 xl * yl
|
|
627
|
|
628 1 xl * yh
|
|
629 2 xh * yl
|
|
630
|
|
631 3 xh * yh
|
|
632
|
|
633 If using the Mul16 or Mul32 multiplier options, these input
|
|
634 chunks must be stored in separate registers. For Mac16, the
|
|
635 UMUL.AA.* opcodes can specify that the inputs come from either
|
|
636 half of the registers, so there is no need to shift them out
|
|
637 ahead of time. If there is no multiply hardware, the 16-bit
|
|
638 chunks can be extracted when setting up the arguments to the
|
|
639 separate multiply function. */
|
|
640
|
|
641 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
|
|
642 /* Calling a separate multiply function will clobber a0 and requires
|
|
643 use of a8 as a temporary, so save those values now. (The function
|
|
644 uses a custom ABI so nothing else needs to be saved.) */
|
|
645 s32i a0, sp, 0
|
|
646 s32i a8, sp, 4
|
|
647 #endif
|
|
648
|
|
649 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
|
|
650
|
|
651 #define a2h a4
|
|
652 #define a3h a5
|
|
653
|
|
654 /* Get the high halves of the inputs into registers. */
|
|
655 srli a2h, a2, 16
|
|
656 srli a3h, a3, 16
|
|
657
|
|
658 #define a2l a2
|
|
659 #define a3l a3
|
|
660
|
|
661 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
|
|
662 /* Clear the high halves of the inputs. This does not matter
|
|
663 for MUL16 because the high bits are ignored. */
|
|
664 extui a2, a2, 0, 16
|
|
665 extui a3, a3, 0, 16
|
|
666 #endif
|
|
667 #endif /* MUL16 || MUL32 */
|
|
668
|
|
669
|
|
670 #if XCHAL_HAVE_MUL16
|
|
671
|
|
672 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
|
673 mul16u dst, xreg ## xhalf, yreg ## yhalf
|
|
674
|
|
675 #elif XCHAL_HAVE_MUL32
|
|
676
|
|
677 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
|
678 mull dst, xreg ## xhalf, yreg ## yhalf
|
|
679
|
|
680 #elif XCHAL_HAVE_MAC16
|
|
681
|
|
682 /* The preprocessor insists on inserting a space when concatenating after
|
|
683 a period in the definition of do_mul below. These macros are a workaround
|
|
684 using underscores instead of periods when doing the concatenation. */
|
|
685 #define umul_aa_ll umul.aa.ll
|
|
686 #define umul_aa_lh umul.aa.lh
|
|
687 #define umul_aa_hl umul.aa.hl
|
|
688 #define umul_aa_hh umul.aa.hh
|
|
689
|
|
690 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
|
691 umul_aa_ ## xhalf ## yhalf xreg, yreg; \
|
|
692 rsr dst, ACCLO
|
|
693
|
|
694 #else /* no multiply hardware */
|
|
695
|
|
696 #define set_arg_l(dst, src) \
|
|
697 extui dst, src, 0, 16
|
|
698 #define set_arg_h(dst, src) \
|
|
699 srli dst, src, 16
|
|
700
|
|
701 #if __XTENSA_CALL0_ABI__
|
|
702 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
|
703 set_arg_ ## xhalf (a13, xreg); \
|
|
704 set_arg_ ## yhalf (a14, yreg); \
|
|
705 call0 .Lmul_mulsi3; \
|
|
706 mov dst, a12
|
|
707 #else
|
|
708 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
|
709 set_arg_ ## xhalf (a14, xreg); \
|
|
710 set_arg_ ## yhalf (a15, yreg); \
|
|
711 call12 .Lmul_mulsi3; \
|
|
712 mov dst, a14
|
|
713 #endif /* __XTENSA_CALL0_ABI__ */
|
|
714
|
|
715 #endif /* no multiply hardware */
|
|
716
|
|
717 /* Add pp1 and pp2 into a6 with carry-out in a9. */
|
|
718 do_mul(a6, a2, l, a3, h) /* pp 1 */
|
|
719 do_mul(a11, a2, h, a3, l) /* pp 2 */
|
|
720 movi a9, 0
|
|
721 add a6, a6, a11
|
|
722 bgeu a6, a11, 1f
|
|
723 addi a9, a9, 1
|
|
724 1:
|
|
725 /* Shift the high half of a9/a6 into position in a9. Note that
|
|
726 this value can be safely incremented without any carry-outs. */
|
|
727 ssai 16
|
|
728 src a9, a9, a6
|
|
729
|
|
730 /* Compute the low word into a6. */
|
|
731 do_mul(a11, a2, l, a3, l) /* pp 0 */
|
|
732 sll a6, a6
|
|
733 add a6, a6, a11
|
|
734 bgeu a6, a11, 1f
|
|
735 addi a9, a9, 1
|
|
736 1:
|
|
737 /* Compute the high word into a2. */
|
|
738 do_mul(a2, a2, h, a3, h) /* pp 3 */
|
|
739 add a2, a2, a9
|
|
740
|
|
741 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
|
|
742 /* Restore values saved on the stack during the multiplication. */
|
|
743 l32i a0, sp, 0
|
|
744 l32i a8, sp, 4
|
|
745 #endif
|
|
746 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
|
|
747
|
|
748 /* Shift left by 9 bits, unless there was a carry-out from the
|
|
749 multiply, in which case, shift by 8 bits and increment the
|
|
750 exponent. */
|
|
751 movi a4, 9
|
|
752 srli a5, a2, 24 - 9
|
|
753 beqz a5, 1f
|
|
754 addi a4, a4, -1
|
|
755 addi a8, a8, 1
|
|
756 1: ssl a4
|
|
757 src a2, a2, a6
|
|
758 sll a6, a6
|
|
759
|
|
760 /* Subtract the extra bias from the exponent sum (plus one to account
|
|
761 for the explicit "1.0" of the mantissa that will be added to the
|
|
762 exponent in the final result). */
|
|
763 movi a4, 0x80
|
|
764 sub a8, a8, a4
|
|
765
|
|
766 /* Check for over/underflow. The value in a8 is one less than the
|
|
767 final exponent, so values in the range 0..fd are OK here. */
|
|
768 movi a4, 0xfe
|
|
769 bgeu a8, a4, .Lmul_overflow
|
|
770
|
|
771 .Lmul_round:
|
|
772 /* Round. */
|
|
773 bgez a6, .Lmul_rounded
|
|
774 addi a2, a2, 1
|
|
775 slli a6, a6, 1
|
|
776 beqz a6, .Lmul_exactlyhalf
|
|
777
|
|
778 .Lmul_rounded:
|
|
779 /* Add the exponent to the mantissa. */
|
|
780 slli a8, a8, 23
|
|
781 add a2, a2, a8
|
|
782
|
|
783 .Lmul_addsign:
|
|
784 /* Add the sign bit. */
|
|
785 srli a7, a7, 31
|
|
786 slli a7, a7, 31
|
|
787 or a2, a2, a7
|
|
788
|
|
789 .Lmul_done:
|
|
790 #if __XTENSA_CALL0_ABI__
|
|
791 l32i a12, sp, 16
|
|
792 l32i a13, sp, 20
|
|
793 l32i a14, sp, 24
|
|
794 l32i a15, sp, 28
|
|
795 addi sp, sp, 32
|
|
796 #endif
|
|
797 leaf_return
|
|
798
|
|
799 .Lmul_exactlyhalf:
|
|
800 /* Round down to the nearest even value. */
|
|
801 srli a2, a2, 1
|
|
802 slli a2, a2, 1
|
|
803 j .Lmul_rounded
|
|
804
|
|
805 .Lmul_overflow:
|
|
806 bltz a8, .Lmul_underflow
|
|
807 /* Return +/- Infinity. */
|
|
808 movi a8, 0xff
|
|
809 slli a2, a8, 23
|
|
810 j .Lmul_addsign
|
|
811
|
|
812 .Lmul_underflow:
|
|
813 /* Create a subnormal value, where the exponent field contains zero,
|
|
814 but the effective exponent is 1. The value of a8 is one less than
|
|
815 the actual exponent, so just negate it to get the shift amount. */
|
|
816 neg a8, a8
|
|
817 mov a9, a6
|
|
818 ssr a8
|
|
819 bgeui a8, 32, .Lmul_flush_to_zero
|
|
820
|
|
821 /* Shift a2 right. Any bits that are shifted out of a2 are saved
|
|
822 in a6 (combined with the shifted-out bits currently in a6) for
|
|
823 rounding the result. */
|
|
824 sll a6, a2
|
|
825 srl a2, a2
|
|
826
|
|
827 /* Set the exponent to zero. */
|
|
828 movi a8, 0
|
|
829
|
|
830 /* Pack any nonzero bits shifted out into a6. */
|
|
831 beqz a9, .Lmul_round
|
|
832 movi a9, 1
|
|
833 or a6, a6, a9
|
|
834 j .Lmul_round
|
|
835
|
|
836 .Lmul_flush_to_zero:
|
|
837 /* Return zero with the appropriate sign bit. */
|
|
838 srli a2, a7, 31
|
|
839 slli a2, a2, 31
|
|
840 j .Lmul_done
|
|
841
|
|
842 #if XCHAL_NO_MUL
|
|
843
|
|
844 /* For Xtensa processors with no multiply hardware, this simplified
|
|
845 version of _mulsi3 is used for multiplying 16-bit chunks of
|
|
846 the floating-point mantissas. When using CALL0, this function
|
|
847 uses a custom ABI: the inputs are passed in a13 and a14, the
|
|
848 result is returned in a12, and a8 and a15 are clobbered. */
|
|
849 .align 4
|
|
850 .Lmul_mulsi3:
|
|
851 leaf_entry sp, 16
|
|
852 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
|
|
853 movi \dst, 0
|
|
854 1: add \tmp1, \src2, \dst
|
|
855 extui \tmp2, \src1, 0, 1
|
|
856 movnez \dst, \tmp1, \tmp2
|
|
857
|
|
858 do_addx2 \tmp1, \src2, \dst, \tmp1
|
|
859 extui \tmp2, \src1, 1, 1
|
|
860 movnez \dst, \tmp1, \tmp2
|
|
861
|
|
862 do_addx4 \tmp1, \src2, \dst, \tmp1
|
|
863 extui \tmp2, \src1, 2, 1
|
|
864 movnez \dst, \tmp1, \tmp2
|
|
865
|
|
866 do_addx8 \tmp1, \src2, \dst, \tmp1
|
|
867 extui \tmp2, \src1, 3, 1
|
|
868 movnez \dst, \tmp1, \tmp2
|
|
869
|
|
870 srli \src1, \src1, 4
|
|
871 slli \src2, \src2, 4
|
|
872 bnez \src1, 1b
|
|
873 .endm
|
|
874 #if __XTENSA_CALL0_ABI__
|
|
875 mul_mulsi3_body a12, a13, a14, a15, a8
|
|
876 #else
|
|
877 /* The result will be written into a2, so save that argument in a4. */
|
|
878 mov a4, a2
|
|
879 mul_mulsi3_body a2, a4, a3, a5, a6
|
|
880 #endif
|
|
881 leaf_return
|
|
882 #endif /* XCHAL_NO_MUL */
|
|
883 #endif /* L_mulsf3 */
|
|
884
|
|
885 #ifdef L_divsf3
|
|
886
|
|
887 /* Division */
|
|
888 __divsf3_aux:
|
|
889
|
|
890 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
|
|
891 (This code is placed before the start of the function just to
|
|
892 keep it in range of the limited branch displacements.) */
|
|
893
|
|
894 .Ldiv_yexpzero:
|
|
895 /* Clear the sign bit of y. */
|
|
896 slli a3, a3, 1
|
|
897 srli a3, a3, 1
|
|
898
|
|
899 /* Check for division by zero. */
|
|
900 beqz a3, .Ldiv_yzero
|
|
901
|
|
902 /* Normalize y. Adjust the exponent in a9. */
|
|
903 do_nsau a10, a3, a4, a5
|
|
904 addi a10, a10, -8
|
|
905 ssl a10
|
|
906 sll a3, a3
|
|
907 movi a9, 1
|
|
908 sub a9, a9, a10
|
|
909 j .Ldiv_ynormalized
|
|
910
|
|
911 .Ldiv_yzero:
|
|
912 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
|
|
913 slli a4, a2, 1
|
|
914 srli a4, a4, 1
|
|
915 srli a2, a7, 31
|
|
916 slli a2, a2, 31
|
|
917 or a2, a2, a6
|
|
918 bnez a4, 1f
|
|
919 movi a4, 0x400000 /* make it a quiet NaN */
|
|
920 or a2, a2, a4
|
|
921 1: leaf_return
|
|
922
|
|
923 .Ldiv_xexpzero:
|
|
924 /* Clear the sign bit of x. */
|
|
925 slli a2, a2, 1
|
|
926 srli a2, a2, 1
|
|
927
|
|
928 /* If x is zero, return zero. */
|
|
929 beqz a2, .Ldiv_return_zero
|
|
930
|
|
931 /* Normalize x. Adjust the exponent in a8. */
|
|
932 do_nsau a10, a2, a4, a5
|
|
933 addi a10, a10, -8
|
|
934 ssl a10
|
|
935 sll a2, a2
|
|
936 movi a8, 1
|
|
937 sub a8, a8, a10
|
|
938 j .Ldiv_xnormalized
|
|
939
|
|
940 .Ldiv_return_zero:
|
|
941 /* Return zero with the appropriate sign bit. */
|
|
942 srli a2, a7, 31
|
|
943 slli a2, a2, 31
|
|
944 leaf_return
|
|
945
|
|
946 .Ldiv_xnan_or_inf:
|
|
947 /* Set the sign bit of the result. */
|
|
948 srli a7, a3, 31
|
|
949 slli a7, a7, 31
|
|
950 xor a2, a2, a7
|
|
951 /* If y is NaN or Inf, return NaN. */
|
|
952 bnall a3, a6, 1f
|
|
953 movi a4, 0x400000 /* make it a quiet NaN */
|
|
954 or a2, a2, a4
|
|
955 1: leaf_return
|
|
956
|
|
957 .Ldiv_ynan_or_inf:
|
|
958 /* If y is Infinity, return zero. */
|
|
959 slli a8, a3, 9
|
|
960 beqz a8, .Ldiv_return_zero
|
|
961 /* y is NaN; return it. */
|
|
962 mov a2, a3
|
|
963 leaf_return
|
|
964
|
|
965 .align 4
|
|
966 .global __divsf3
|
|
967 .type __divsf3, @function
|
|
968 __divsf3:
|
|
969 leaf_entry sp, 16
|
|
970 movi a6, 0x7f800000
|
|
971
|
|
972 /* Get the sign of the result. */
|
|
973 xor a7, a2, a3
|
|
974
|
|
975 /* Check for NaN and infinity. */
|
|
976 ball a2, a6, .Ldiv_xnan_or_inf
|
|
977 ball a3, a6, .Ldiv_ynan_or_inf
|
|
978
|
|
979 /* Extract the exponents. */
|
|
980 extui a8, a2, 23, 8
|
|
981 extui a9, a3, 23, 8
|
|
982
|
|
983 beqz a9, .Ldiv_yexpzero
|
|
984 .Ldiv_ynormalized:
|
|
985 beqz a8, .Ldiv_xexpzero
|
|
986 .Ldiv_xnormalized:
|
|
987
|
|
988 /* Subtract the exponents. */
|
|
989 sub a8, a8, a9
|
|
990
|
|
991 /* Replace sign/exponent fields with explicit "1.0". */
|
|
992 movi a10, 0xffffff
|
|
993 or a2, a2, a6
|
|
994 and a2, a2, a10
|
|
995 or a3, a3, a6
|
|
996 and a3, a3, a10
|
|
997
|
|
998 /* The first digit of the mantissa division must be a one.
|
|
999 Shift x (and adjust the exponent) as needed to make this true. */
|
|
1000 bltu a3, a2, 1f
|
|
1001 slli a2, a2, 1
|
|
1002 addi a8, a8, -1
|
|
1003 1:
|
|
1004 /* Do the first subtraction and shift. */
|
|
1005 sub a2, a2, a3
|
|
1006 slli a2, a2, 1
|
|
1007
|
|
1008 /* Put the quotient into a10. */
|
|
1009 movi a10, 1
|
|
1010
|
|
1011 /* Divide one bit at a time for 23 bits. */
|
|
1012 movi a9, 23
|
|
1013 #if XCHAL_HAVE_LOOPS
|
|
1014 loop a9, .Ldiv_loopend
|
|
1015 #endif
|
|
1016 .Ldiv_loop:
|
|
1017 /* Shift the quotient << 1. */
|
|
1018 slli a10, a10, 1
|
|
1019
|
|
1020 /* Is this digit a 0 or 1? */
|
|
1021 bltu a2, a3, 1f
|
|
1022
|
|
1023 /* Output a 1 and subtract. */
|
|
1024 addi a10, a10, 1
|
|
1025 sub a2, a2, a3
|
|
1026
|
|
1027 /* Shift the dividend << 1. */
|
|
1028 1: slli a2, a2, 1
|
|
1029
|
|
1030 #if !XCHAL_HAVE_LOOPS
|
|
1031 addi a9, a9, -1
|
|
1032 bnez a9, .Ldiv_loop
|
|
1033 #endif
|
|
1034 .Ldiv_loopend:
|
|
1035
|
|
1036 /* Add the exponent bias (less one to account for the explicit "1.0"
|
|
1037 of the mantissa that will be added to the exponent in the final
|
|
1038 result). */
|
|
1039 addi a8, a8, 0x7e
|
|
1040
|
|
1041 /* Check for over/underflow. The value in a8 is one less than the
|
|
1042 final exponent, so values in the range 0..fd are OK here. */
|
|
1043 movi a4, 0xfe
|
|
1044 bgeu a8, a4, .Ldiv_overflow
|
|
1045
|
|
1046 .Ldiv_round:
|
|
1047 /* Round. The remainder (<< 1) is in a2. */
|
|
1048 bltu a2, a3, .Ldiv_rounded
|
|
1049 addi a10, a10, 1
|
|
1050 beq a2, a3, .Ldiv_exactlyhalf
|
|
1051
|
|
1052 .Ldiv_rounded:
|
|
1053 /* Add the exponent to the mantissa. */
|
|
1054 slli a8, a8, 23
|
|
1055 add a2, a10, a8
|
|
1056
|
|
1057 .Ldiv_addsign:
|
|
1058 /* Add the sign bit. */
|
|
1059 srli a7, a7, 31
|
|
1060 slli a7, a7, 31
|
|
1061 or a2, a2, a7
|
|
1062 leaf_return
|
|
1063
|
|
1064 .Ldiv_overflow:
|
|
1065 bltz a8, .Ldiv_underflow
|
|
1066 /* Return +/- Infinity. */
|
|
1067 addi a8, a4, 1 /* 0xff */
|
|
1068 slli a2, a8, 23
|
|
1069 j .Ldiv_addsign
|
|
1070
|
|
1071 .Ldiv_exactlyhalf:
|
|
1072 /* Remainder is exactly half the divisor. Round even. */
|
|
1073 srli a10, a10, 1
|
|
1074 slli a10, a10, 1
|
|
1075 j .Ldiv_rounded
|
|
1076
|
|
1077 .Ldiv_underflow:
|
|
1078 /* Create a subnormal value, where the exponent field contains zero,
|
|
1079 but the effective exponent is 1. The value of a8 is one less than
|
|
1080 the actual exponent, so just negate it to get the shift amount. */
|
|
1081 neg a8, a8
|
|
1082 ssr a8
|
|
1083 bgeui a8, 32, .Ldiv_flush_to_zero
|
|
1084
|
|
1085 /* Shift a10 right. Any bits that are shifted out of a10 are
|
|
1086 saved in a6 for rounding the result. */
|
|
1087 sll a6, a10
|
|
1088 srl a10, a10
|
|
1089
|
|
1090 /* Set the exponent to zero. */
|
|
1091 movi a8, 0
|
|
1092
|
|
1093 /* Pack any nonzero remainder (in a2) into a6. */
|
|
1094 beqz a2, 1f
|
|
1095 movi a9, 1
|
|
1096 or a6, a6, a9
|
|
1097
|
|
1098 /* Round a10 based on the bits shifted out into a6. */
|
|
1099 1: bgez a6, .Ldiv_rounded
|
|
1100 addi a10, a10, 1
|
|
1101 slli a6, a6, 1
|
|
1102 bnez a6, .Ldiv_rounded
|
|
1103 srli a10, a10, 1
|
|
1104 slli a10, a10, 1
|
|
1105 j .Ldiv_rounded
|
|
1106
|
|
1107 .Ldiv_flush_to_zero:
|
|
1108 /* Return zero with the appropriate sign bit. */
|
|
1109 srli a2, a7, 31
|
|
1110 slli a2, a2, 31
|
|
1111 leaf_return
|
|
1112
|
|
1113 #endif /* L_divsf3 */
|
|
1114
|
|
1115 #ifdef L_cmpsf2
|
|
1116
|
|
1117 /* Equal and Not Equal */
|
|
1118
|
|
1119 .align 4
|
|
1120 .global __eqsf2
|
|
1121 .global __nesf2
|
|
1122 .set __nesf2, __eqsf2
|
|
1123 .type __eqsf2, @function
|
|
1124 __eqsf2:
|
|
1125 leaf_entry sp, 16
|
|
1126 bne a2, a3, 4f
|
|
1127
|
|
1128 /* The values are equal but NaN != NaN. Check the exponent. */
|
|
1129 movi a6, 0x7f800000
|
|
1130 ball a2, a6, 3f
|
|
1131
|
|
1132 /* Equal. */
|
|
1133 movi a2, 0
|
|
1134 leaf_return
|
|
1135
|
|
1136 /* Not equal. */
|
|
1137 2: movi a2, 1
|
|
1138 leaf_return
|
|
1139
|
|
1140 /* Check if the mantissas are nonzero. */
|
|
1141 3: slli a7, a2, 9
|
|
1142 j 5f
|
|
1143
|
|
1144 /* Check if x and y are zero with different signs. */
|
|
1145 4: or a7, a2, a3
|
|
1146 slli a7, a7, 1
|
|
1147
|
|
1148 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
|
|
1149 or x when exponent(x) = 0x7f8 and x == y. */
|
|
1150 5: movi a2, 0
|
|
1151 movi a3, 1
|
|
1152 movnez a2, a3, a7
|
|
1153 leaf_return
|
|
1154
|
|
1155
|
|
1156 /* Greater Than */
|
|
1157
|
|
1158 .align 4
|
|
1159 .global __gtsf2
|
|
1160 .type __gtsf2, @function
|
|
1161 __gtsf2:
|
|
1162 leaf_entry sp, 16
|
|
1163 movi a6, 0x7f800000
|
|
1164 ball a2, a6, 2f
|
|
1165 1: bnall a3, a6, .Lle_cmp
|
|
1166
|
|
1167 /* Check if y is a NaN. */
|
|
1168 slli a7, a3, 9
|
|
1169 beqz a7, .Lle_cmp
|
|
1170 movi a2, 0
|
|
1171 leaf_return
|
|
1172
|
|
1173 /* Check if x is a NaN. */
|
|
1174 2: slli a7, a2, 9
|
|
1175 beqz a7, 1b
|
|
1176 movi a2, 0
|
|
1177 leaf_return
|
|
1178
|
|
1179
|
|
1180 /* Less Than or Equal */
|
|
1181
|
|
1182 .align 4
|
|
1183 .global __lesf2
|
|
1184 .type __lesf2, @function
|
|
1185 __lesf2:
|
|
1186 leaf_entry sp, 16
|
|
1187 movi a6, 0x7f800000
|
|
1188 ball a2, a6, 2f
|
|
1189 1: bnall a3, a6, .Lle_cmp
|
|
1190
|
|
1191 /* Check if y is a NaN. */
|
|
1192 slli a7, a3, 9
|
|
1193 beqz a7, .Lle_cmp
|
|
1194 movi a2, 1
|
|
1195 leaf_return
|
|
1196
|
|
1197 /* Check if x is a NaN. */
|
|
1198 2: slli a7, a2, 9
|
|
1199 beqz a7, 1b
|
|
1200 movi a2, 1
|
|
1201 leaf_return
|
|
1202
|
|
1203 .Lle_cmp:
|
|
1204 /* Check if x and y have different signs. */
|
|
1205 xor a7, a2, a3
|
|
1206 bltz a7, .Lle_diff_signs
|
|
1207
|
|
1208 /* Check if x is negative. */
|
|
1209 bltz a2, .Lle_xneg
|
|
1210
|
|
1211 /* Check if x <= y. */
|
|
1212 bltu a3, a2, 5f
|
|
1213 4: movi a2, 0
|
|
1214 leaf_return
|
|
1215
|
|
1216 .Lle_xneg:
|
|
1217 /* Check if y <= x. */
|
|
1218 bgeu a2, a3, 4b
|
|
1219 5: movi a2, 1
|
|
1220 leaf_return
|
|
1221
|
|
1222 .Lle_diff_signs:
|
|
1223 bltz a2, 4b
|
|
1224
|
|
1225 /* Check if both x and y are zero. */
|
|
1226 or a7, a2, a3
|
|
1227 slli a7, a7, 1
|
|
1228 movi a2, 1
|
|
1229 movi a3, 0
|
|
1230 moveqz a2, a3, a7
|
|
1231 leaf_return
|
|
1232
|
|
1233
|
|
1234 /* Greater Than or Equal */
|
|
1235
|
|
1236 .align 4
|
|
1237 .global __gesf2
|
|
1238 .type __gesf2, @function
|
|
1239 __gesf2:
|
|
1240 leaf_entry sp, 16
|
|
1241 movi a6, 0x7f800000
|
|
1242 ball a2, a6, 2f
|
|
1243 1: bnall a3, a6, .Llt_cmp
|
|
1244
|
|
1245 /* Check if y is a NaN. */
|
|
1246 slli a7, a3, 9
|
|
1247 beqz a7, .Llt_cmp
|
|
1248 movi a2, -1
|
|
1249 leaf_return
|
|
1250
|
|
1251 /* Check if x is a NaN. */
|
|
1252 2: slli a7, a2, 9
|
|
1253 beqz a7, 1b
|
|
1254 movi a2, -1
|
|
1255 leaf_return
|
|
1256
|
|
1257
|
|
1258 /* Less Than */
|
|
1259
|
|
1260 .align 4
|
|
1261 .global __ltsf2
|
|
1262 .type __ltsf2, @function
|
|
1263 __ltsf2:
|
|
1264 leaf_entry sp, 16
|
|
1265 movi a6, 0x7f800000
|
|
1266 ball a2, a6, 2f
|
|
1267 1: bnall a3, a6, .Llt_cmp
|
|
1268
|
|
1269 /* Check if y is a NaN. */
|
|
1270 slli a7, a3, 9
|
|
1271 beqz a7, .Llt_cmp
|
|
1272 movi a2, 0
|
|
1273 leaf_return
|
|
1274
|
|
1275 /* Check if x is a NaN. */
|
|
1276 2: slli a7, a2, 9
|
|
1277 beqz a7, 1b
|
|
1278 movi a2, 0
|
|
1279 leaf_return
|
|
1280
|
|
1281 .Llt_cmp:
|
|
1282 /* Check if x and y have different signs. */
|
|
1283 xor a7, a2, a3
|
|
1284 bltz a7, .Llt_diff_signs
|
|
1285
|
|
1286 /* Check if x is negative. */
|
|
1287 bltz a2, .Llt_xneg
|
|
1288
|
|
1289 /* Check if x < y. */
|
|
1290 bgeu a2, a3, 5f
|
|
1291 4: movi a2, -1
|
|
1292 leaf_return
|
|
1293
|
|
1294 .Llt_xneg:
|
|
1295 /* Check if y < x. */
|
|
1296 bltu a3, a2, 4b
|
|
1297 5: movi a2, 0
|
|
1298 leaf_return
|
|
1299
|
|
1300 .Llt_diff_signs:
|
|
1301 bgez a2, 5b
|
|
1302
|
|
1303 /* Check if both x and y are nonzero. */
|
|
1304 or a7, a2, a3
|
|
1305 slli a7, a7, 1
|
|
1306 movi a2, 0
|
|
1307 movi a3, -1
|
|
1308 movnez a2, a3, a7
|
|
1309 leaf_return
|
|
1310
|
|
1311
|
|
1312 /* Unordered */
|
|
1313
|
|
1314 .align 4
|
|
1315 .global __unordsf2
|
|
1316 .type __unordsf2, @function
|
|
1317 __unordsf2:
|
|
1318 leaf_entry sp, 16
|
|
1319 movi a6, 0x7f800000
|
|
1320 ball a2, a6, 3f
|
|
1321 1: ball a3, a6, 4f
|
|
1322 2: movi a2, 0
|
|
1323 leaf_return
|
|
1324
|
|
1325 3: slli a7, a2, 9
|
|
1326 beqz a7, 1b
|
|
1327 movi a2, 1
|
|
1328 leaf_return
|
|
1329
|
|
1330 4: slli a7, a3, 9
|
|
1331 beqz a7, 2b
|
|
1332 movi a2, 1
|
|
1333 leaf_return
|
|
1334
|
|
1335 #endif /* L_cmpsf2 */
|
|
1336
|
|
1337 #ifdef L_fixsfsi
|
|
1338
|
|
1339 .align 4
|
|
1340 .global __fixsfsi
|
|
1341 .type __fixsfsi, @function
|
|
1342 __fixsfsi:
|
|
1343 leaf_entry sp, 16
|
|
1344
|
|
1345 /* Check for NaN and Infinity. */
|
|
1346 movi a6, 0x7f800000
|
|
1347 ball a2, a6, .Lfixsfsi_nan_or_inf
|
|
1348
|
|
1349 /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */
|
|
1350 extui a4, a2, 23, 8
|
|
1351 addi a4, a4, -0x7e
|
|
1352 bgei a4, 32, .Lfixsfsi_maxint
|
|
1353 blti a4, 1, .Lfixsfsi_zero
|
|
1354
|
|
1355 /* Add explicit "1.0" and shift << 8. */
|
|
1356 or a7, a2, a6
|
|
1357 slli a5, a7, 8
|
|
1358
|
|
1359 /* Shift back to the right, based on the exponent. */
|
|
1360 ssl a4 /* shift by 32 - a4 */
|
|
1361 srl a5, a5
|
|
1362
|
|
1363 /* Negate the result if sign != 0. */
|
|
1364 neg a2, a5
|
|
1365 movgez a2, a5, a7
|
|
1366 leaf_return
|
|
1367
|
|
1368 .Lfixsfsi_nan_or_inf:
|
|
1369 /* Handle Infinity and NaN. */
|
|
1370 slli a4, a2, 9
|
|
1371 beqz a4, .Lfixsfsi_maxint
|
|
1372
|
|
1373 /* Translate NaN to +maxint. */
|
|
1374 movi a2, 0
|
|
1375
|
|
1376 .Lfixsfsi_maxint:
|
|
1377 slli a4, a6, 8 /* 0x80000000 */
|
|
1378 addi a5, a4, -1 /* 0x7fffffff */
|
|
1379 movgez a4, a5, a2
|
|
1380 mov a2, a4
|
|
1381 leaf_return
|
|
1382
|
|
1383 .Lfixsfsi_zero:
|
|
1384 movi a2, 0
|
|
1385 leaf_return
|
|
1386
|
|
1387 #endif /* L_fixsfsi */
|
|
1388
|
|
1389 #ifdef L_fixsfdi
|
|
1390
|
|
1391 .align 4
|
|
1392 .global __fixsfdi
|
|
1393 .type __fixsfdi, @function
|
|
1394 __fixsfdi:
|
|
1395 leaf_entry sp, 16
|
|
1396
|
|
1397 /* Check for NaN and Infinity. */
|
|
1398 movi a6, 0x7f800000
|
|
1399 ball a2, a6, .Lfixsfdi_nan_or_inf
|
|
1400
|
|
1401 /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */
|
|
1402 extui a4, a2, 23, 8
|
|
1403 addi a4, a4, -0x7e
|
|
1404 bgei a4, 64, .Lfixsfdi_maxint
|
|
1405 blti a4, 1, .Lfixsfdi_zero
|
|
1406
|
|
1407 /* Add explicit "1.0" and shift << 8. */
|
|
1408 or a7, a2, a6
|
|
1409 slli xh, a7, 8
|
|
1410
|
|
1411 /* Shift back to the right, based on the exponent. */
|
|
1412 ssl a4 /* shift by 64 - a4 */
|
|
1413 bgei a4, 32, .Lfixsfdi_smallshift
|
|
1414 srl xl, xh
|
|
1415 movi xh, 0
|
|
1416
|
|
1417 .Lfixsfdi_shifted:
|
|
1418 /* Negate the result if sign != 0. */
|
|
1419 bgez a7, 1f
|
|
1420 neg xl, xl
|
|
1421 neg xh, xh
|
|
1422 beqz xl, 1f
|
|
1423 addi xh, xh, -1
|
|
1424 1: leaf_return
|
|
1425
|
|
1426 .Lfixsfdi_smallshift:
|
|
1427 movi xl, 0
|
|
1428 sll xl, xh
|
|
1429 srl xh, xh
|
|
1430 j .Lfixsfdi_shifted
|
|
1431
|
|
1432 .Lfixsfdi_nan_or_inf:
|
|
1433 /* Handle Infinity and NaN. */
|
|
1434 slli a4, a2, 9
|
|
1435 beqz a4, .Lfixsfdi_maxint
|
|
1436
|
|
1437 /* Translate NaN to +maxint. */
|
|
1438 movi a2, 0
|
|
1439
|
|
1440 .Lfixsfdi_maxint:
|
|
1441 slli a7, a6, 8 /* 0x80000000 */
|
|
1442 bgez a2, 1f
|
|
1443 mov xh, a7
|
|
1444 movi xl, 0
|
|
1445 leaf_return
|
|
1446
|
|
1447 1: addi xh, a7, -1 /* 0x7fffffff */
|
|
1448 movi xl, -1
|
|
1449 leaf_return
|
|
1450
|
|
1451 .Lfixsfdi_zero:
|
|
1452 movi xh, 0
|
|
1453 movi xl, 0
|
|
1454 leaf_return
|
|
1455
|
|
1456 #endif /* L_fixsfdi */
|
|
1457
|
|
1458 #ifdef L_fixunssfsi
|
|
1459
|
|
1460 .align 4
|
|
1461 .global __fixunssfsi
|
|
1462 .type __fixunssfsi, @function
|
|
1463 __fixunssfsi:
|
|
1464 leaf_entry sp, 16
|
|
1465
|
|
1466 /* Check for NaN and Infinity. */
|
|
1467 movi a6, 0x7f800000
|
|
1468 ball a2, a6, .Lfixunssfsi_nan_or_inf
|
|
1469
|
|
1470 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */
|
|
1471 extui a4, a2, 23, 8
|
|
1472 addi a4, a4, -0x7f
|
|
1473 bgei a4, 32, .Lfixunssfsi_maxint
|
|
1474 bltz a4, .Lfixunssfsi_zero
|
|
1475
|
|
1476 /* Add explicit "1.0" and shift << 8. */
|
|
1477 or a7, a2, a6
|
|
1478 slli a5, a7, 8
|
|
1479
|
|
1480 /* Shift back to the right, based on the exponent. */
|
|
1481 addi a4, a4, 1
|
|
1482 beqi a4, 32, .Lfixunssfsi_bigexp
|
|
1483 ssl a4 /* shift by 32 - a4 */
|
|
1484 srl a5, a5
|
|
1485
|
|
1486 /* Negate the result if sign != 0. */
|
|
1487 neg a2, a5
|
|
1488 movgez a2, a5, a7
|
|
1489 leaf_return
|
|
1490
|
|
1491 .Lfixunssfsi_nan_or_inf:
|
|
1492 /* Handle Infinity and NaN. */
|
|
1493 slli a4, a2, 9
|
|
1494 beqz a4, .Lfixunssfsi_maxint
|
|
1495
|
|
1496 /* Translate NaN to 0xffffffff. */
|
|
1497 movi a2, -1
|
|
1498 leaf_return
|
|
1499
|
|
1500 .Lfixunssfsi_maxint:
|
|
1501 slli a4, a6, 8 /* 0x80000000 */
|
|
1502 movi a5, -1 /* 0xffffffff */
|
|
1503 movgez a4, a5, a2
|
|
1504 mov a2, a4
|
|
1505 leaf_return
|
|
1506
|
|
1507 .Lfixunssfsi_zero:
|
|
1508 movi a2, 0
|
|
1509 leaf_return
|
|
1510
|
|
1511 .Lfixunssfsi_bigexp:
|
|
1512 /* Handle unsigned maximum exponent case. */
|
|
1513 bltz a2, 1f
|
|
1514 mov a2, a5 /* no shift needed */
|
|
1515 leaf_return
|
|
1516
|
|
1517 /* Return 0x80000000 if negative. */
|
|
1518 1: slli a2, a6, 8
|
|
1519 leaf_return
|
|
1520
|
|
1521 #endif /* L_fixunssfsi */
|
|
1522
|
|
1523 #ifdef L_fixunssfdi
|
|
1524
|
|
1525 .align 4
|
|
1526 .global __fixunssfdi
|
|
1527 .type __fixunssfdi, @function
|
|
1528 __fixunssfdi:
|
|
1529 leaf_entry sp, 16
|
|
1530
|
|
1531 /* Check for NaN and Infinity. */
|
|
1532 movi a6, 0x7f800000
|
|
1533 ball a2, a6, .Lfixunssfdi_nan_or_inf
|
|
1534
|
|
1535 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */
|
|
1536 extui a4, a2, 23, 8
|
|
1537 addi a4, a4, -0x7f
|
|
1538 bgei a4, 64, .Lfixunssfdi_maxint
|
|
1539 bltz a4, .Lfixunssfdi_zero
|
|
1540
|
|
1541 /* Add explicit "1.0" and shift << 8. */
|
|
1542 or a7, a2, a6
|
|
1543 slli xh, a7, 8
|
|
1544
|
|
1545 /* Shift back to the right, based on the exponent. */
|
|
1546 addi a4, a4, 1
|
|
1547 beqi a4, 64, .Lfixunssfdi_bigexp
|
|
1548 ssl a4 /* shift by 64 - a4 */
|
|
1549 bgei a4, 32, .Lfixunssfdi_smallshift
|
|
1550 srl xl, xh
|
|
1551 movi xh, 0
|
|
1552
|
|
1553 .Lfixunssfdi_shifted:
|
|
1554 /* Negate the result if sign != 0. */
|
|
1555 bgez a7, 1f
|
|
1556 neg xl, xl
|
|
1557 neg xh, xh
|
|
1558 beqz xl, 1f
|
|
1559 addi xh, xh, -1
|
|
1560 1: leaf_return
|
|
1561
|
|
1562 .Lfixunssfdi_smallshift:
|
|
1563 movi xl, 0
|
|
1564 src xl, xh, xl
|
|
1565 srl xh, xh
|
|
1566 j .Lfixunssfdi_shifted
|
|
1567
|
|
1568 .Lfixunssfdi_nan_or_inf:
|
|
1569 /* Handle Infinity and NaN. */
|
|
1570 slli a4, a2, 9
|
|
1571 beqz a4, .Lfixunssfdi_maxint
|
|
1572
|
|
1573 /* Translate NaN to 0xffffffff.... */
|
|
1574 1: movi xh, -1
|
|
1575 movi xl, -1
|
|
1576 leaf_return
|
|
1577
|
|
1578 .Lfixunssfdi_maxint:
|
|
1579 bgez a2, 1b
|
|
1580 2: slli xh, a6, 8 /* 0x80000000 */
|
|
1581 movi xl, 0
|
|
1582 leaf_return
|
|
1583
|
|
1584 .Lfixunssfdi_zero:
|
|
1585 movi xh, 0
|
|
1586 movi xl, 0
|
|
1587 leaf_return
|
|
1588
|
|
1589 .Lfixunssfdi_bigexp:
|
|
1590 /* Handle unsigned maximum exponent case. */
|
|
1591 bltz a7, 2b
|
|
1592 movi xl, 0
|
|
1593 leaf_return /* no shift needed */
|
|
1594
|
|
1595 #endif /* L_fixunssfdi */
|
|
1596
|
|
1597 #ifdef L_floatsisf
|
|
1598
|
|
1599 .align 4
|
|
1600 .global __floatunsisf
|
|
1601 .type __floatunsisf, @function
|
|
1602 __floatunsisf:
|
|
1603 leaf_entry sp, 16
|
|
1604 beqz a2, .Lfloatsisf_return
|
|
1605
|
|
1606 /* Set the sign to zero and jump to the floatsisf code. */
|
|
1607 movi a7, 0
|
|
1608 j .Lfloatsisf_normalize
|
|
1609
|
|
1610 .align 4
|
|
1611 .global __floatsisf
|
|
1612 .type __floatsisf, @function
|
|
1613 __floatsisf:
|
|
1614 leaf_entry sp, 16
|
|
1615
|
|
1616 /* Check for zero. */
|
|
1617 beqz a2, .Lfloatsisf_return
|
|
1618
|
|
1619 /* Save the sign. */
|
|
1620 extui a7, a2, 31, 1
|
|
1621
|
|
1622 /* Get the absolute value. */
|
|
1623 #if XCHAL_HAVE_ABS
|
|
1624 abs a2, a2
|
|
1625 #else
|
|
1626 neg a4, a2
|
|
1627 movltz a2, a4, a2
|
|
1628 #endif
|
|
1629
|
|
1630 .Lfloatsisf_normalize:
|
|
1631 /* Normalize with the first 1 bit in the msb. */
|
|
1632 do_nsau a4, a2, a5, a6
|
|
1633 ssl a4
|
|
1634 sll a5, a2
|
|
1635
|
|
1636 /* Shift the mantissa into position, with rounding bits in a6. */
|
|
1637 srli a2, a5, 8
|
|
1638 slli a6, a5, (32 - 8)
|
|
1639
|
|
1640 /* Set the exponent. */
|
|
1641 movi a5, 0x9d /* 0x7e + 31 */
|
|
1642 sub a5, a5, a4
|
|
1643 slli a5, a5, 23
|
|
1644 add a2, a2, a5
|
|
1645
|
|
1646 /* Add the sign. */
|
|
1647 slli a7, a7, 31
|
|
1648 or a2, a2, a7
|
|
1649
|
|
1650 /* Round up if the leftover fraction is >= 1/2. */
|
|
1651 bgez a6, .Lfloatsisf_return
|
|
1652 addi a2, a2, 1 /* Overflow to the exponent is OK. */
|
|
1653
|
|
1654 /* Check if the leftover fraction is exactly 1/2. */
|
|
1655 slli a6, a6, 1
|
|
1656 beqz a6, .Lfloatsisf_exactlyhalf
|
|
1657
|
|
1658 .Lfloatsisf_return:
|
|
1659 leaf_return
|
|
1660
|
|
1661 .Lfloatsisf_exactlyhalf:
|
|
1662 /* Round down to the nearest even value. */
|
|
1663 srli a2, a2, 1
|
|
1664 slli a2, a2, 1
|
|
1665 leaf_return
|
|
1666
|
|
1667 #endif /* L_floatsisf */
|
|
1668
|
|
1669 #ifdef L_floatdisf
|
|
1670
|
|
1671 .align 4
|
|
1672 .global __floatundisf
|
|
1673 .type __floatundisf, @function
|
|
1674 __floatundisf:
|
|
1675 leaf_entry sp, 16
|
|
1676
|
|
1677 /* Check for zero. */
|
|
1678 or a4, xh, xl
|
|
1679 beqz a4, 2f
|
|
1680
|
|
1681 /* Set the sign to zero and jump to the floatdisf code. */
|
|
1682 movi a7, 0
|
|
1683 j .Lfloatdisf_normalize
|
|
1684
|
|
1685 .align 4
|
|
1686 .global __floatdisf
|
|
1687 .type __floatdisf, @function
|
|
1688 __floatdisf:
|
|
1689 leaf_entry sp, 16
|
|
1690
|
|
1691 /* Check for zero. */
|
|
1692 or a4, xh, xl
|
|
1693 beqz a4, 2f
|
|
1694
|
|
1695 /* Save the sign. */
|
|
1696 extui a7, xh, 31, 1
|
|
1697
|
|
1698 /* Get the absolute value. */
|
|
1699 bgez xh, .Lfloatdisf_normalize
|
|
1700 neg xl, xl
|
|
1701 neg xh, xh
|
|
1702 beqz xl, .Lfloatdisf_normalize
|
|
1703 addi xh, xh, -1
|
|
1704
|
|
1705 .Lfloatdisf_normalize:
|
|
1706 /* Normalize with the first 1 bit in the msb of xh. */
|
|
1707 beqz xh, .Lfloatdisf_bigshift
|
|
1708 do_nsau a4, xh, a5, a6
|
|
1709 ssl a4
|
|
1710 src xh, xh, xl
|
|
1711 sll xl, xl
|
|
1712
|
|
1713 .Lfloatdisf_shifted:
|
|
1714 /* Shift the mantissa into position, with rounding bits in a6. */
|
|
1715 ssai 8
|
|
1716 sll a5, xl
|
|
1717 src a6, xh, xl
|
|
1718 srl xh, xh
|
|
1719 beqz a5, 1f
|
|
1720 movi a5, 1
|
|
1721 or a6, a6, a5
|
|
1722 1:
|
|
1723 /* Set the exponent. */
|
|
1724 movi a5, 0xbd /* 0x7e + 63 */
|
|
1725 sub a5, a5, a4
|
|
1726 slli a5, a5, 23
|
|
1727 add a2, xh, a5
|
|
1728
|
|
1729 /* Add the sign. */
|
|
1730 slli a7, a7, 31
|
|
1731 or a2, a2, a7
|
|
1732
|
|
1733 /* Round up if the leftover fraction is >= 1/2. */
|
|
1734 bgez a6, 2f
|
|
1735 addi a2, a2, 1 /* Overflow to the exponent is OK. */
|
|
1736
|
|
1737 /* Check if the leftover fraction is exactly 1/2. */
|
|
1738 slli a6, a6, 1
|
|
1739 beqz a6, .Lfloatdisf_exactlyhalf
|
|
1740 2: leaf_return
|
|
1741
|
|
1742 .Lfloatdisf_bigshift:
|
|
1743 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
|
|
1744 do_nsau a4, xl, a5, a6
|
|
1745 ssl a4
|
|
1746 sll xh, xl
|
|
1747 movi xl, 0
|
|
1748 addi a4, a4, 32
|
|
1749 j .Lfloatdisf_shifted
|
|
1750
|
|
1751 .Lfloatdisf_exactlyhalf:
|
|
1752 /* Round down to the nearest even value. */
|
|
1753 srli a2, a2, 1
|
|
1754 slli a2, a2, 1
|
|
1755 leaf_return
|
|
1756
|
|
1757 #endif /* L_floatdisf */
|