0
|
1 /* IEEE-754 double-precision functions for Xtensa
|
|
2 Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
|
|
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
|
|
4
|
|
5 This file is part of GCC.
|
|
6
|
|
7 GCC is free software; you can redistribute it and/or modify it
|
|
8 under the terms of the GNU General Public License as published by
|
|
9 the Free Software Foundation; either version 3, or (at your option)
|
|
10 any later version.
|
|
11
|
|
12 GCC is distributed in the hope that it will be useful, but WITHOUT
|
|
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
|
|
15 License for more details.
|
|
16
|
|
17 Under Section 7 of GPL version 3, you are granted additional
|
|
18 permissions described in the GCC Runtime Library Exception, version
|
|
19 3.1, as published by the Free Software Foundation.
|
|
20
|
|
21 You should have received a copy of the GNU General Public License and
|
|
22 a copy of the GCC Runtime Library Exception along with this program;
|
|
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
24 <http://www.gnu.org/licenses/>. */
|
|
25
|
|
26 #ifdef __XTENSA_EB__
|
|
27 #define xh a2
|
|
28 #define xl a3
|
|
29 #define yh a4
|
|
30 #define yl a5
|
|
31 #else
|
|
32 #define xh a3
|
|
33 #define xl a2
|
|
34 #define yh a5
|
|
35 #define yl a4
|
|
36 #endif
|
|
37
|
|
38 /* Warning! The branch displacements for some Xtensa branch instructions
|
|
39 are quite small, and this code has been carefully laid out to keep
|
|
40 branch targets in range. If you change anything, be sure to check that
|
|
41 the assembler is not relaxing anything to branch over a jump. */
|
|
42
|
|
43 #ifdef L_negdf2
|
|
44
|
|
45 .align 4
|
|
46 .global __negdf2
|
|
47 .type __negdf2, @function
|
|
48 __negdf2:
|
|
49 leaf_entry sp, 16
|
|
50 movi a4, 0x80000000
|
|
51 xor xh, xh, a4
|
|
52 leaf_return
|
|
53
|
|
54 #endif /* L_negdf2 */
|
|
55
|
|
56 #ifdef L_addsubdf3
|
|
57
|
|
58 /* Addition */
|
|
59 __adddf3_aux:
|
|
60
|
|
61 /* Handle NaNs and Infinities. (This code is placed before the
|
|
62 start of the function just to keep it in range of the limited
|
|
63 branch displacements.) */
|
|
64
|
|
65 .Ladd_xnan_or_inf:
|
|
66 /* If y is neither Infinity nor NaN, return x. */
|
|
67 bnall yh, a6, 1f
|
|
68 /* If x is a NaN, return it. Otherwise, return y. */
|
|
69 slli a7, xh, 12
|
|
70 or a7, a7, xl
|
|
71 beqz a7, .Ladd_ynan_or_inf
|
|
72 1: leaf_return
|
|
73
|
|
74 .Ladd_ynan_or_inf:
|
|
75 /* Return y. */
|
|
76 mov xh, yh
|
|
77 mov xl, yl
|
|
78 leaf_return
|
|
79
|
|
80 .Ladd_opposite_signs:
|
|
81 /* Operand signs differ. Do a subtraction. */
|
|
82 slli a7, a6, 11
|
|
83 xor yh, yh, a7
|
|
84 j .Lsub_same_sign
|
|
85
|
|
86 .align 4
|
|
87 .global __adddf3
|
|
88 .type __adddf3, @function
|
|
89 __adddf3:
|
|
90 leaf_entry sp, 16
|
|
91 movi a6, 0x7ff00000
|
|
92
|
|
93 /* Check if the two operands have the same sign. */
|
|
94 xor a7, xh, yh
|
|
95 bltz a7, .Ladd_opposite_signs
|
|
96
|
|
97 .Ladd_same_sign:
|
|
98 /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
|
|
99 ball xh, a6, .Ladd_xnan_or_inf
|
|
100 ball yh, a6, .Ladd_ynan_or_inf
|
|
101
|
|
102 /* Compare the exponents. The smaller operand will be shifted
|
|
103 right by the exponent difference and added to the larger
|
|
104 one. */
|
|
105 extui a7, xh, 20, 12
|
|
106 extui a8, yh, 20, 12
|
|
107 bltu a7, a8, .Ladd_shiftx
|
|
108
|
|
109 .Ladd_shifty:
|
|
110 /* Check if the smaller (or equal) exponent is zero. */
|
|
111 bnone yh, a6, .Ladd_yexpzero
|
|
112
|
|
113 /* Replace yh sign/exponent with 0x001. */
|
|
114 or yh, yh, a6
|
|
115 slli yh, yh, 11
|
|
116 srli yh, yh, 11
|
|
117
|
|
118 .Ladd_yexpdiff:
|
|
119 /* Compute the exponent difference. Optimize for difference < 32. */
|
|
120 sub a10, a7, a8
|
|
121 bgeui a10, 32, .Ladd_bigshifty
|
|
122
|
|
123 /* Shift yh/yl right by the exponent difference. Any bits that are
|
|
124 shifted out of yl are saved in a9 for rounding the result. */
|
|
125 ssr a10
|
|
126 movi a9, 0
|
|
127 src a9, yl, a9
|
|
128 src yl, yh, yl
|
|
129 srl yh, yh
|
|
130
|
|
131 .Ladd_addy:
|
|
132 /* Do the 64-bit addition. */
|
|
133 add xl, xl, yl
|
|
134 add xh, xh, yh
|
|
135 bgeu xl, yl, 1f
|
|
136 addi xh, xh, 1
|
|
137 1:
|
|
138 /* Check if the add overflowed into the exponent. */
|
|
139 extui a10, xh, 20, 12
|
|
140 beq a10, a7, .Ladd_round
|
|
141 mov a8, a7
|
|
142 j .Ladd_carry
|
|
143
|
|
144 .Ladd_yexpzero:
|
|
145 /* y is a subnormal value. Replace its sign/exponent with zero,
|
|
146 i.e., no implicit "1.0", and increment the apparent exponent
|
|
147 because subnormals behave as if they had the minimum (nonzero)
|
|
148 exponent. Test for the case when both exponents are zero. */
|
|
149 slli yh, yh, 12
|
|
150 srli yh, yh, 12
|
|
151 bnone xh, a6, .Ladd_bothexpzero
|
|
152 addi a8, a8, 1
|
|
153 j .Ladd_yexpdiff
|
|
154
|
|
155 .Ladd_bothexpzero:
|
|
156 /* Both exponents are zero. Handle this as a special case. There
|
|
157 is no need to shift or round, and the normal code for handling
|
|
158 a carry into the exponent field will not work because it
|
|
159 assumes there is an implicit "1.0" that needs to be added. */
|
|
160 add xl, xl, yl
|
|
161 add xh, xh, yh
|
|
162 bgeu xl, yl, 1f
|
|
163 addi xh, xh, 1
|
|
164 1: leaf_return
|
|
165
|
|
166 .Ladd_bigshifty:
|
|
167 /* Exponent difference > 64 -- just return the bigger value. */
|
|
168 bgeui a10, 64, 1b
|
|
169
|
|
170 /* Shift yh/yl right by the exponent difference. Any bits that are
|
|
171 shifted out are saved in a9 for rounding the result. */
|
|
172 ssr a10
|
|
173 sll a11, yl /* lost bits shifted out of yl */
|
|
174 src a9, yh, yl
|
|
175 srl yl, yh
|
|
176 movi yh, 0
|
|
177 beqz a11, .Ladd_addy
|
|
178 or a9, a9, a10 /* any positive, nonzero value will work */
|
|
179 j .Ladd_addy
|
|
180
|
|
181 .Ladd_xexpzero:
|
|
182 /* Same as "yexpzero" except skip handling the case when both
|
|
183 exponents are zero. */
|
|
184 slli xh, xh, 12
|
|
185 srli xh, xh, 12
|
|
186 addi a7, a7, 1
|
|
187 j .Ladd_xexpdiff
|
|
188
|
|
189 .Ladd_shiftx:
|
|
190 /* Same thing as the "shifty" code, but with x and y swapped. Also,
|
|
191 because the exponent difference is always nonzero in this version,
|
|
192 the shift sequence can use SLL and skip loading a constant zero. */
|
|
193 bnone xh, a6, .Ladd_xexpzero
|
|
194
|
|
195 or xh, xh, a6
|
|
196 slli xh, xh, 11
|
|
197 srli xh, xh, 11
|
|
198
|
|
199 .Ladd_xexpdiff:
|
|
200 sub a10, a8, a7
|
|
201 bgeui a10, 32, .Ladd_bigshiftx
|
|
202
|
|
203 ssr a10
|
|
204 sll a9, xl
|
|
205 src xl, xh, xl
|
|
206 srl xh, xh
|
|
207
|
|
208 .Ladd_addx:
|
|
209 add xl, xl, yl
|
|
210 add xh, xh, yh
|
|
211 bgeu xl, yl, 1f
|
|
212 addi xh, xh, 1
|
|
213 1:
|
|
214 /* Check if the add overflowed into the exponent. */
|
|
215 extui a10, xh, 20, 12
|
|
216 bne a10, a8, .Ladd_carry
|
|
217
|
|
218 .Ladd_round:
|
|
219 /* Round up if the leftover fraction is >= 1/2. */
|
|
220 bgez a9, 1f
|
|
221 addi xl, xl, 1
|
|
222 beqz xl, .Ladd_roundcarry
|
|
223
|
|
224 /* Check if the leftover fraction is exactly 1/2. */
|
|
225 slli a9, a9, 1
|
|
226 beqz a9, .Ladd_exactlyhalf
|
|
227 1: leaf_return
|
|
228
|
|
229 .Ladd_bigshiftx:
|
|
230 /* Mostly the same thing as "bigshifty".... */
|
|
231 bgeui a10, 64, .Ladd_returny
|
|
232
|
|
233 ssr a10
|
|
234 sll a11, xl
|
|
235 src a9, xh, xl
|
|
236 srl xl, xh
|
|
237 movi xh, 0
|
|
238 beqz a11, .Ladd_addx
|
|
239 or a9, a9, a10
|
|
240 j .Ladd_addx
|
|
241
|
|
242 .Ladd_returny:
|
|
243 mov xh, yh
|
|
244 mov xl, yl
|
|
245 leaf_return
|
|
246
|
|
247 .Ladd_carry:
|
|
248 /* The addition has overflowed into the exponent field, so the
|
|
249 value needs to be renormalized. The mantissa of the result
|
|
250 can be recovered by subtracting the original exponent and
|
|
251 adding 0x100000 (which is the explicit "1.0" for the
|
|
252 mantissa of the non-shifted operand -- the "1.0" for the
|
|
253 shifted operand was already added). The mantissa can then
|
|
254 be shifted right by one bit. The explicit "1.0" of the
|
|
255 shifted mantissa then needs to be replaced by the exponent,
|
|
256 incremented by one to account for the normalizing shift.
|
|
257 It is faster to combine these operations: do the shift first
|
|
258 and combine the additions and subtractions. If x is the
|
|
259 original exponent, the result is:
|
|
260 shifted mantissa - (x << 19) + (1 << 19) + (x << 20)
|
|
261 or:
|
|
262 shifted mantissa + ((x + 1) << 19)
|
|
263 Note that the exponent is incremented here by leaving the
|
|
264 explicit "1.0" of the mantissa in the exponent field. */
|
|
265
|
|
266 /* Shift xh/xl right by one bit. Save the lsb of xl. */
|
|
267 mov a10, xl
|
|
268 ssai 1
|
|
269 src xl, xh, xl
|
|
270 srl xh, xh
|
|
271
|
|
272 /* See explanation above. The original exponent is in a8. */
|
|
273 addi a8, a8, 1
|
|
274 slli a8, a8, 19
|
|
275 add xh, xh, a8
|
|
276
|
|
277 /* Return an Infinity if the exponent overflowed. */
|
|
278 ball xh, a6, .Ladd_infinity
|
|
279
|
|
280 /* Same thing as the "round" code except the msb of the leftover
|
|
281 fraction is bit 0 of a10, with the rest of the fraction in a9. */
|
|
282 bbci.l a10, 0, 1f
|
|
283 addi xl, xl, 1
|
|
284 beqz xl, .Ladd_roundcarry
|
|
285 beqz a9, .Ladd_exactlyhalf
|
|
286 1: leaf_return
|
|
287
|
|
288 .Ladd_infinity:
|
|
289 /* Clear the mantissa. */
|
|
290 movi xl, 0
|
|
291 srli xh, xh, 20
|
|
292 slli xh, xh, 20
|
|
293
|
|
294 /* The sign bit may have been lost in a carry-out. Put it back. */
|
|
295 slli a8, a8, 1
|
|
296 or xh, xh, a8
|
|
297 leaf_return
|
|
298
|
|
299 .Ladd_exactlyhalf:
|
|
300 /* Round down to the nearest even value. */
|
|
301 srli xl, xl, 1
|
|
302 slli xl, xl, 1
|
|
303 leaf_return
|
|
304
|
|
305 .Ladd_roundcarry:
|
|
306 /* xl is always zero when the rounding increment overflows, so
|
|
307 there's no need to round it to an even value. */
|
|
308 addi xh, xh, 1
|
|
309 /* Overflow to the exponent is OK. */
|
|
310 leaf_return
|
|
311
|
|
312
|
|
313 /* Subtraction */
|
|
314 __subdf3_aux:
|
|
315
|
|
316 /* Handle NaNs and Infinities. (This code is placed before the
|
|
317 start of the function just to keep it in range of the limited
|
|
318 branch displacements.) */
|
|
319
|
|
320 .Lsub_xnan_or_inf:
|
|
321 /* If y is neither Infinity nor NaN, return x. */
|
|
322 bnall yh, a6, 1f
|
|
323 /* Both x and y are either NaN or Inf, so the result is NaN. */
|
|
324 movi a4, 0x80000 /* make it a quiet NaN */
|
|
325 or xh, xh, a4
|
|
326 1: leaf_return
|
|
327
|
|
328 .Lsub_ynan_or_inf:
|
|
329 /* Negate y and return it. */
|
|
330 slli a7, a6, 11
|
|
331 xor xh, yh, a7
|
|
332 mov xl, yl
|
|
333 leaf_return
|
|
334
|
|
335 .Lsub_opposite_signs:
|
|
336 /* Operand signs differ. Do an addition. */
|
|
337 slli a7, a6, 11
|
|
338 xor yh, yh, a7
|
|
339 j .Ladd_same_sign
|
|
340
|
|
341 .align 4
|
|
342 .global __subdf3
|
|
343 .type __subdf3, @function
|
|
344 __subdf3:
|
|
345 leaf_entry sp, 16
|
|
346 movi a6, 0x7ff00000
|
|
347
|
|
348 /* Check if the two operands have the same sign. */
|
|
349 xor a7, xh, yh
|
|
350 bltz a7, .Lsub_opposite_signs
|
|
351
|
|
352 .Lsub_same_sign:
|
|
353 /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */
|
|
354 ball xh, a6, .Lsub_xnan_or_inf
|
|
355 ball yh, a6, .Lsub_ynan_or_inf
|
|
356
|
|
357 /* Compare the operands. In contrast to addition, the entire
|
|
358 value matters here. */
|
|
359 extui a7, xh, 20, 11
|
|
360 extui a8, yh, 20, 11
|
|
361 bltu xh, yh, .Lsub_xsmaller
|
|
362 beq xh, yh, .Lsub_compare_low
|
|
363
|
|
364 .Lsub_ysmaller:
|
|
365 /* Check if the smaller (or equal) exponent is zero. */
|
|
366 bnone yh, a6, .Lsub_yexpzero
|
|
367
|
|
368 /* Replace yh sign/exponent with 0x001. */
|
|
369 or yh, yh, a6
|
|
370 slli yh, yh, 11
|
|
371 srli yh, yh, 11
|
|
372
|
|
373 .Lsub_yexpdiff:
|
|
374 /* Compute the exponent difference. Optimize for difference < 32. */
|
|
375 sub a10, a7, a8
|
|
376 bgeui a10, 32, .Lsub_bigshifty
|
|
377
|
|
378 /* Shift yh/yl right by the exponent difference. Any bits that are
|
|
379 shifted out of yl are saved in a9 for rounding the result. */
|
|
380 ssr a10
|
|
381 movi a9, 0
|
|
382 src a9, yl, a9
|
|
383 src yl, yh, yl
|
|
384 srl yh, yh
|
|
385
|
|
386 .Lsub_suby:
|
|
387 /* Do the 64-bit subtraction. */
|
|
388 sub xh, xh, yh
|
|
389 bgeu xl, yl, 1f
|
|
390 addi xh, xh, -1
|
|
391 1: sub xl, xl, yl
|
|
392
|
|
393 /* Subtract the leftover bits in a9 from zero and propagate any
|
|
394 borrow from xh/xl. */
|
|
395 neg a9, a9
|
|
396 beqz a9, 1f
|
|
397 addi a5, xh, -1
|
|
398 moveqz xh, a5, xl
|
|
399 addi xl, xl, -1
|
|
400 1:
|
|
401 /* Check if the subtract underflowed into the exponent. */
|
|
402 extui a10, xh, 20, 11
|
|
403 beq a10, a7, .Lsub_round
|
|
404 j .Lsub_borrow
|
|
405
|
|
406 .Lsub_compare_low:
|
|
407 /* The high words are equal. Compare the low words. */
|
|
408 bltu xl, yl, .Lsub_xsmaller
|
|
409 bltu yl, xl, .Lsub_ysmaller
|
|
410 /* The operands are equal. Return 0.0. */
|
|
411 movi xh, 0
|
|
412 movi xl, 0
|
|
413 1: leaf_return
|
|
414
|
|
415 .Lsub_yexpzero:
|
|
416 /* y is a subnormal value. Replace its sign/exponent with zero,
|
|
417 i.e., no implicit "1.0". Unless x is also a subnormal, increment
|
|
418 y's apparent exponent because subnormals behave as if they had
|
|
419 the minimum (nonzero) exponent. */
|
|
420 slli yh, yh, 12
|
|
421 srli yh, yh, 12
|
|
422 bnone xh, a6, .Lsub_yexpdiff
|
|
423 addi a8, a8, 1
|
|
424 j .Lsub_yexpdiff
|
|
425
|
|
426 .Lsub_bigshifty:
|
|
427 /* Exponent difference > 64 -- just return the bigger value. */
|
|
428 bgeui a10, 64, 1b
|
|
429
|
|
430 /* Shift yh/yl right by the exponent difference. Any bits that are
|
|
431 shifted out are saved in a9 for rounding the result. */
|
|
432 ssr a10
|
|
433 sll a11, yl /* lost bits shifted out of yl */
|
|
434 src a9, yh, yl
|
|
435 srl yl, yh
|
|
436 movi yh, 0
|
|
437 beqz a11, .Lsub_suby
|
|
438 or a9, a9, a10 /* any positive, nonzero value will work */
|
|
439 j .Lsub_suby
|
|
440
|
|
441 .Lsub_xsmaller:
|
|
442 /* Same thing as the "ysmaller" code, but with x and y swapped and
|
|
443 with y negated. */
|
|
444 bnone xh, a6, .Lsub_xexpzero
|
|
445
|
|
446 or xh, xh, a6
|
|
447 slli xh, xh, 11
|
|
448 srli xh, xh, 11
|
|
449
|
|
450 .Lsub_xexpdiff:
|
|
451 sub a10, a8, a7
|
|
452 bgeui a10, 32, .Lsub_bigshiftx
|
|
453
|
|
454 ssr a10
|
|
455 movi a9, 0
|
|
456 src a9, xl, a9
|
|
457 src xl, xh, xl
|
|
458 srl xh, xh
|
|
459
|
|
460 /* Negate y. */
|
|
461 slli a11, a6, 11
|
|
462 xor yh, yh, a11
|
|
463
|
|
464 .Lsub_subx:
|
|
465 sub xl, yl, xl
|
|
466 sub xh, yh, xh
|
|
467 bgeu yl, xl, 1f
|
|
468 addi xh, xh, -1
|
|
469 1:
|
|
470 /* Subtract the leftover bits in a9 from zero and propagate any
|
|
471 borrow from xh/xl. */
|
|
472 neg a9, a9
|
|
473 beqz a9, 1f
|
|
474 addi a5, xh, -1
|
|
475 moveqz xh, a5, xl
|
|
476 addi xl, xl, -1
|
|
477 1:
|
|
478 /* Check if the subtract underflowed into the exponent. */
|
|
479 extui a10, xh, 20, 11
|
|
480 bne a10, a8, .Lsub_borrow
|
|
481
|
|
482 .Lsub_round:
|
|
483 /* Round up if the leftover fraction is >= 1/2. */
|
|
484 bgez a9, 1f
|
|
485 addi xl, xl, 1
|
|
486 beqz xl, .Lsub_roundcarry
|
|
487
|
|
488 /* Check if the leftover fraction is exactly 1/2. */
|
|
489 slli a9, a9, 1
|
|
490 beqz a9, .Lsub_exactlyhalf
|
|
491 1: leaf_return
|
|
492
|
|
493 .Lsub_xexpzero:
|
|
494 /* Same as "yexpzero". */
|
|
495 slli xh, xh, 12
|
|
496 srli xh, xh, 12
|
|
497 bnone yh, a6, .Lsub_xexpdiff
|
|
498 addi a7, a7, 1
|
|
499 j .Lsub_xexpdiff
|
|
500
|
|
501 .Lsub_bigshiftx:
|
|
502 /* Mostly the same thing as "bigshifty", but with the sign bit of the
|
|
503 shifted value set so that the subsequent subtraction flips the
|
|
504 sign of y. */
|
|
505 bgeui a10, 64, .Lsub_returny
|
|
506
|
|
507 ssr a10
|
|
508 sll a11, xl
|
|
509 src a9, xh, xl
|
|
510 srl xl, xh
|
|
511 slli xh, a6, 11 /* set sign bit of xh */
|
|
512 beqz a11, .Lsub_subx
|
|
513 or a9, a9, a10
|
|
514 j .Lsub_subx
|
|
515
|
|
516 .Lsub_returny:
|
|
517 /* Negate and return y. */
|
|
518 slli a7, a6, 11
|
|
519 xor xh, yh, a7
|
|
520 mov xl, yl
|
|
521 leaf_return
|
|
522
|
|
523 .Lsub_borrow:
|
|
524 /* The subtraction has underflowed into the exponent field, so the
|
|
525 value needs to be renormalized. Shift the mantissa left as
|
|
526 needed to remove any leading zeros and adjust the exponent
|
|
527 accordingly. If the exponent is not large enough to remove
|
|
528 all the leading zeros, the result will be a subnormal value. */
|
|
529
|
|
530 slli a8, xh, 12
|
|
531 beqz a8, .Lsub_xhzero
|
|
532 do_nsau a6, a8, a7, a11
|
|
533 srli a8, a8, 12
|
|
534 bge a6, a10, .Lsub_subnormal
|
|
535 addi a6, a6, 1
|
|
536
|
|
537 .Lsub_shift_lt32:
|
|
538 /* Shift the mantissa (a8/xl/a9) left by a6. */
|
|
539 ssl a6
|
|
540 src a8, a8, xl
|
|
541 src xl, xl, a9
|
|
542 sll a9, a9
|
|
543
|
|
544 /* Combine the shifted mantissa with the sign and exponent,
|
|
545 decrementing the exponent by a6. (The exponent has already
|
|
546 been decremented by one due to the borrow from the subtraction,
|
|
547 but adding the mantissa will increment the exponent by one.) */
|
|
548 srli xh, xh, 20
|
|
549 sub xh, xh, a6
|
|
550 slli xh, xh, 20
|
|
551 add xh, xh, a8
|
|
552 j .Lsub_round
|
|
553
|
|
554 .Lsub_exactlyhalf:
|
|
555 /* Round down to the nearest even value. */
|
|
556 srli xl, xl, 1
|
|
557 slli xl, xl, 1
|
|
558 leaf_return
|
|
559
|
|
560 .Lsub_roundcarry:
|
|
561 /* xl is always zero when the rounding increment overflows, so
|
|
562 there's no need to round it to an even value. */
|
|
563 addi xh, xh, 1
|
|
564 /* Overflow to the exponent is OK. */
|
|
565 leaf_return
|
|
566
|
|
567 .Lsub_xhzero:
|
|
568 /* When normalizing the result, all the mantissa bits in the high
|
|
569 word are zero. Shift by "20 + (leading zero count of xl) + 1". */
|
|
570 do_nsau a6, xl, a7, a11
|
|
571 addi a6, a6, 21
|
|
572 blt a10, a6, .Lsub_subnormal
|
|
573
|
|
574 .Lsub_normalize_shift:
|
|
575 bltui a6, 32, .Lsub_shift_lt32
|
|
576
|
|
577 ssl a6
|
|
578 src a8, xl, a9
|
|
579 sll xl, a9
|
|
580 movi a9, 0
|
|
581
|
|
582 srli xh, xh, 20
|
|
583 sub xh, xh, a6
|
|
584 slli xh, xh, 20
|
|
585 add xh, xh, a8
|
|
586 j .Lsub_round
|
|
587
|
|
588 .Lsub_subnormal:
|
|
589 /* The exponent is too small to shift away all the leading zeros.
|
|
590 Set a6 to the current exponent (which has already been
|
|
591 decremented by the borrow) so that the exponent of the result
|
|
592 will be zero. Do not add 1 to a6 in this case, because: (1)
|
|
593 adding the mantissa will not increment the exponent, so there is
|
|
594 no need to subtract anything extra from the exponent to
|
|
595 compensate, and (2) the effective exponent of a subnormal is 1
|
|
596 not 0 so the shift amount must be 1 smaller than normal. */
|
|
597 mov a6, a10
|
|
598 j .Lsub_normalize_shift
|
|
599
|
|
600 #endif /* L_addsubdf3 */
|
|
601
|
|
602 #ifdef L_muldf3
|
|
603
|
|
604 /* Multiplication */
|
|
605 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
|
|
606 #define XCHAL_NO_MUL 1
|
|
607 #endif
|
|
608
|
|
609 __muldf3_aux:
|
|
610
|
|
611 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
|
|
612 (This code is placed before the start of the function just to
|
|
613 keep it in range of the limited branch displacements.) */
|
|
614
|
|
615 .Lmul_xexpzero:
|
|
616 /* Clear the sign bit of x. */
|
|
617 slli xh, xh, 1
|
|
618 srli xh, xh, 1
|
|
619
|
|
620 /* If x is zero, return zero. */
|
|
621 or a10, xh, xl
|
|
622 beqz a10, .Lmul_return_zero
|
|
623
|
|
624 /* Normalize x. Adjust the exponent in a8. */
|
|
625 beqz xh, .Lmul_xh_zero
|
|
626 do_nsau a10, xh, a11, a12
|
|
627 addi a10, a10, -11
|
|
628 ssl a10
|
|
629 src xh, xh, xl
|
|
630 sll xl, xl
|
|
631 movi a8, 1
|
|
632 sub a8, a8, a10
|
|
633 j .Lmul_xnormalized
|
|
634 .Lmul_xh_zero:
|
|
635 do_nsau a10, xl, a11, a12
|
|
636 addi a10, a10, -11
|
|
637 movi a8, -31
|
|
638 sub a8, a8, a10
|
|
639 ssl a10
|
|
640 bltz a10, .Lmul_xl_srl
|
|
641 sll xh, xl
|
|
642 movi xl, 0
|
|
643 j .Lmul_xnormalized
|
|
644 .Lmul_xl_srl:
|
|
645 srl xh, xl
|
|
646 sll xl, xl
|
|
647 j .Lmul_xnormalized
|
|
648
|
|
649 .Lmul_yexpzero:
|
|
650 /* Clear the sign bit of y. */
|
|
651 slli yh, yh, 1
|
|
652 srli yh, yh, 1
|
|
653
|
|
654 /* If y is zero, return zero. */
|
|
655 or a10, yh, yl
|
|
656 beqz a10, .Lmul_return_zero
|
|
657
|
|
658 /* Normalize y. Adjust the exponent in a9. */
|
|
659 beqz yh, .Lmul_yh_zero
|
|
660 do_nsau a10, yh, a11, a12
|
|
661 addi a10, a10, -11
|
|
662 ssl a10
|
|
663 src yh, yh, yl
|
|
664 sll yl, yl
|
|
665 movi a9, 1
|
|
666 sub a9, a9, a10
|
|
667 j .Lmul_ynormalized
|
|
668 .Lmul_yh_zero:
|
|
669 do_nsau a10, yl, a11, a12
|
|
670 addi a10, a10, -11
|
|
671 movi a9, -31
|
|
672 sub a9, a9, a10
|
|
673 ssl a10
|
|
674 bltz a10, .Lmul_yl_srl
|
|
675 sll yh, yl
|
|
676 movi yl, 0
|
|
677 j .Lmul_ynormalized
|
|
678 .Lmul_yl_srl:
|
|
679 srl yh, yl
|
|
680 sll yl, yl
|
|
681 j .Lmul_ynormalized
|
|
682
|
|
683 .Lmul_return_zero:
|
|
684 /* Return zero with the appropriate sign bit. */
|
|
685 srli xh, a7, 31
|
|
686 slli xh, xh, 31
|
|
687 movi xl, 0
|
|
688 j .Lmul_done
|
|
689
|
|
690 .Lmul_xnan_or_inf:
|
|
691 /* If y is zero, return NaN. */
|
|
692 bnez yl, 1f
|
|
693 slli a8, yh, 1
|
|
694 bnez a8, 1f
|
|
695 movi a4, 0x80000 /* make it a quiet NaN */
|
|
696 or xh, xh, a4
|
|
697 j .Lmul_done
|
|
698 1:
|
|
699 /* If y is NaN, return y. */
|
|
700 bnall yh, a6, .Lmul_returnx
|
|
701 slli a8, yh, 12
|
|
702 or a8, a8, yl
|
|
703 beqz a8, .Lmul_returnx
|
|
704
|
|
705 .Lmul_returny:
|
|
706 mov xh, yh
|
|
707 mov xl, yl
|
|
708
|
|
709 .Lmul_returnx:
|
|
710 /* Set the sign bit and return. */
|
|
711 extui a7, a7, 31, 1
|
|
712 slli xh, xh, 1
|
|
713 ssai 1
|
|
714 src xh, a7, xh
|
|
715 j .Lmul_done
|
|
716
|
|
717 .Lmul_ynan_or_inf:
|
|
718 /* If x is zero, return NaN. */
|
|
719 bnez xl, .Lmul_returny
|
|
720 slli a8, xh, 1
|
|
721 bnez a8, .Lmul_returny
|
|
722 movi a7, 0x80000 /* make it a quiet NaN */
|
|
723 or xh, yh, a7
|
|
724 j .Lmul_done
|
|
725
|
|
726 .align 4
|
|
727 .global __muldf3
|
|
728 .type __muldf3, @function
|
|
729 __muldf3:
|
|
730 #if __XTENSA_CALL0_ABI__
|
|
731 leaf_entry sp, 32
|
|
732 addi sp, sp, -32
|
|
733 s32i a12, sp, 16
|
|
734 s32i a13, sp, 20
|
|
735 s32i a14, sp, 24
|
|
736 s32i a15, sp, 28
|
|
737 #elif XCHAL_NO_MUL
|
|
738 /* This is not really a leaf function; allocate enough stack space
|
|
739 to allow CALL12s to a helper function. */
|
|
740 leaf_entry sp, 64
|
|
741 #else
|
|
742 leaf_entry sp, 32
|
|
743 #endif
|
|
744 movi a6, 0x7ff00000
|
|
745
|
|
746 /* Get the sign of the result. */
|
|
747 xor a7, xh, yh
|
|
748
|
|
749 /* Check for NaN and infinity. */
|
|
750 ball xh, a6, .Lmul_xnan_or_inf
|
|
751 ball yh, a6, .Lmul_ynan_or_inf
|
|
752
|
|
753 /* Extract the exponents. */
|
|
754 extui a8, xh, 20, 11
|
|
755 extui a9, yh, 20, 11
|
|
756
|
|
757 beqz a8, .Lmul_xexpzero
|
|
758 .Lmul_xnormalized:
|
|
759 beqz a9, .Lmul_yexpzero
|
|
760 .Lmul_ynormalized:
|
|
761
|
|
762 /* Add the exponents. */
|
|
763 add a8, a8, a9
|
|
764
|
|
765 /* Replace sign/exponent fields with explicit "1.0". */
|
|
766 movi a10, 0x1fffff
|
|
767 or xh, xh, a6
|
|
768 and xh, xh, a10
|
|
769 or yh, yh, a6
|
|
770 and yh, yh, a10
|
|
771
|
|
772 /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6.
|
|
773 The least-significant word of the result is thrown away except
|
|
774 that if it is nonzero, the lsb of a6 is set to 1. */
|
|
775 #if XCHAL_HAVE_MUL32_HIGH
|
|
776
|
|
777 /* Compute a6 with any carry-outs in a10. */
|
|
778 movi a10, 0
|
|
779 mull a6, xl, yh
|
|
780 mull a11, xh, yl
|
|
781 add a6, a6, a11
|
|
782 bgeu a6, a11, 1f
|
|
783 addi a10, a10, 1
|
|
784 1:
|
|
785 muluh a11, xl, yl
|
|
786 add a6, a6, a11
|
|
787 bgeu a6, a11, 1f
|
|
788 addi a10, a10, 1
|
|
789 1:
|
|
790 /* If the low word of the result is nonzero, set the lsb of a6. */
|
|
791 mull a11, xl, yl
|
|
792 beqz a11, 1f
|
|
793 movi a9, 1
|
|
794 or a6, a6, a9
|
|
795 1:
|
|
796 /* Compute xl with any carry-outs in a9. */
|
|
797 movi a9, 0
|
|
798 mull a11, xh, yh
|
|
799 add a10, a10, a11
|
|
800 bgeu a10, a11, 1f
|
|
801 addi a9, a9, 1
|
|
802 1:
|
|
803 muluh a11, xh, yl
|
|
804 add a10, a10, a11
|
|
805 bgeu a10, a11, 1f
|
|
806 addi a9, a9, 1
|
|
807 1:
|
|
808 muluh xl, xl, yh
|
|
809 add xl, xl, a10
|
|
810 bgeu xl, a10, 1f
|
|
811 addi a9, a9, 1
|
|
812 1:
|
|
813 /* Compute xh. */
|
|
814 muluh xh, xh, yh
|
|
815 add xh, xh, a9
|
|
816
|
|
817 #else /* ! XCHAL_HAVE_MUL32_HIGH */
|
|
818
|
|
819 /* Break the inputs into 16-bit chunks and compute 16 32-bit partial
|
|
820 products. These partial products are:
|
|
821
|
|
822 0 xll * yll
|
|
823
|
|
824 1 xll * ylh
|
|
825 2 xlh * yll
|
|
826
|
|
827 3 xll * yhl
|
|
828 4 xlh * ylh
|
|
829 5 xhl * yll
|
|
830
|
|
831 6 xll * yhh
|
|
832 7 xlh * yhl
|
|
833 8 xhl * ylh
|
|
834 9 xhh * yll
|
|
835
|
|
836 10 xlh * yhh
|
|
837 11 xhl * yhl
|
|
838 12 xhh * ylh
|
|
839
|
|
840 13 xhl * yhh
|
|
841 14 xhh * yhl
|
|
842
|
|
843 15 xhh * yhh
|
|
844
|
|
845 where the input chunks are (hh, hl, lh, ll). If using the Mul16
|
|
846 or Mul32 multiplier options, these input chunks must be stored in
|
|
847 separate registers. For Mac16, the UMUL.AA.* opcodes can specify
|
|
848 that the inputs come from either half of the registers, so there
|
|
849 is no need to shift them out ahead of time. If there is no
|
|
850 multiply hardware, the 16-bit chunks can be extracted when setting
|
|
851 up the arguments to the separate multiply function. */
|
|
852
|
|
853 /* Save a7 since it is needed to hold a temporary value. */
|
|
854 s32i a7, sp, 4
|
|
855 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
|
|
856 /* Calling a separate multiply function will clobber a0 and requires
|
|
857 use of a8 as a temporary, so save those values now. (The function
|
|
858 uses a custom ABI so nothing else needs to be saved.) */
|
|
859 s32i a0, sp, 0
|
|
860 s32i a8, sp, 8
|
|
861 #endif
|
|
862
|
|
863 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
|
|
864
|
|
865 #define xlh a12
|
|
866 #define ylh a13
|
|
867 #define xhh a14
|
|
868 #define yhh a15
|
|
869
|
|
870 /* Get the high halves of the inputs into registers. */
|
|
871 srli xlh, xl, 16
|
|
872 srli ylh, yl, 16
|
|
873 srli xhh, xh, 16
|
|
874 srli yhh, yh, 16
|
|
875
|
|
876 #define xll xl
|
|
877 #define yll yl
|
|
878 #define xhl xh
|
|
879 #define yhl yh
|
|
880
|
|
881 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
|
|
882 /* Clear the high halves of the inputs. This does not matter
|
|
883 for MUL16 because the high bits are ignored. */
|
|
884 extui xl, xl, 0, 16
|
|
885 extui xh, xh, 0, 16
|
|
886 extui yl, yl, 0, 16
|
|
887 extui yh, yh, 0, 16
|
|
888 #endif
|
|
889 #endif /* MUL16 || MUL32 */
|
|
890
|
|
891
|
|
892 #if XCHAL_HAVE_MUL16
|
|
893
|
|
894 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
|
895 mul16u dst, xreg ## xhalf, yreg ## yhalf
|
|
896
|
|
897 #elif XCHAL_HAVE_MUL32
|
|
898
|
|
899 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
|
900 mull dst, xreg ## xhalf, yreg ## yhalf
|
|
901
|
|
902 #elif XCHAL_HAVE_MAC16
|
|
903
|
|
904 /* The preprocessor insists on inserting a space when concatenating after
|
|
905 a period in the definition of do_mul below. These macros are a workaround
|
|
906 using underscores instead of periods when doing the concatenation. */
|
|
907 #define umul_aa_ll umul.aa.ll
|
|
908 #define umul_aa_lh umul.aa.lh
|
|
909 #define umul_aa_hl umul.aa.hl
|
|
910 #define umul_aa_hh umul.aa.hh
|
|
911
|
|
912 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
|
913 umul_aa_ ## xhalf ## yhalf xreg, yreg; \
|
|
914 rsr dst, ACCLO
|
|
915
|
|
916 #else /* no multiply hardware */
|
|
917
|
|
918 #define set_arg_l(dst, src) \
|
|
919 extui dst, src, 0, 16
|
|
920 #define set_arg_h(dst, src) \
|
|
921 srli dst, src, 16
|
|
922
|
|
923 #if __XTENSA_CALL0_ABI__
|
|
924 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
|
925 set_arg_ ## xhalf (a13, xreg); \
|
|
926 set_arg_ ## yhalf (a14, yreg); \
|
|
927 call0 .Lmul_mulsi3; \
|
|
928 mov dst, a12
|
|
929 #else
|
|
930 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \
|
|
931 set_arg_ ## xhalf (a14, xreg); \
|
|
932 set_arg_ ## yhalf (a15, yreg); \
|
|
933 call12 .Lmul_mulsi3; \
|
|
934 mov dst, a14
|
|
935 #endif /* __XTENSA_CALL0_ABI__ */
|
|
936
|
|
937 #endif /* no multiply hardware */
|
|
938
|
|
939 /* Add pp1 and pp2 into a10 with carry-out in a9. */
|
|
940 do_mul(a10, xl, l, yl, h) /* pp 1 */
|
|
941 do_mul(a11, xl, h, yl, l) /* pp 2 */
|
|
942 movi a9, 0
|
|
943 add a10, a10, a11
|
|
944 bgeu a10, a11, 1f
|
|
945 addi a9, a9, 1
|
|
946 1:
|
|
947 /* Initialize a6 with a9/a10 shifted into position. Note that
|
|
948 this value can be safely incremented without any carry-outs. */
|
|
949 ssai 16
|
|
950 src a6, a9, a10
|
|
951
|
|
952 /* Compute the low word into a10. */
|
|
953 do_mul(a11, xl, l, yl, l) /* pp 0 */
|
|
954 sll a10, a10
|
|
955 add a10, a10, a11
|
|
956 bgeu a10, a11, 1f
|
|
957 addi a6, a6, 1
|
|
958 1:
|
|
959 /* Compute the contributions of pp0-5 to a6, with carry-outs in a9.
|
|
960 This is good enough to determine the low half of a6, so that any
|
|
961 nonzero bits from the low word of the result can be collapsed
|
|
962 into a6, freeing up a register. */
|
|
963 movi a9, 0
|
|
964 do_mul(a11, xl, l, yh, l) /* pp 3 */
|
|
965 add a6, a6, a11
|
|
966 bgeu a6, a11, 1f
|
|
967 addi a9, a9, 1
|
|
968 1:
|
|
969 do_mul(a11, xl, h, yl, h) /* pp 4 */
|
|
970 add a6, a6, a11
|
|
971 bgeu a6, a11, 1f
|
|
972 addi a9, a9, 1
|
|
973 1:
|
|
974 do_mul(a11, xh, l, yl, l) /* pp 5 */
|
|
975 add a6, a6, a11
|
|
976 bgeu a6, a11, 1f
|
|
977 addi a9, a9, 1
|
|
978 1:
|
|
979 /* Collapse any nonzero bits from the low word into a6. */
|
|
980 beqz a10, 1f
|
|
981 movi a11, 1
|
|
982 or a6, a6, a11
|
|
983 1:
|
|
984 /* Add pp6-9 into a11 with carry-outs in a10. */
|
|
985 do_mul(a7, xl, l, yh, h) /* pp 6 */
|
|
986 do_mul(a11, xh, h, yl, l) /* pp 9 */
|
|
987 movi a10, 0
|
|
988 add a11, a11, a7
|
|
989 bgeu a11, a7, 1f
|
|
990 addi a10, a10, 1
|
|
991 1:
|
|
992 do_mul(a7, xl, h, yh, l) /* pp 7 */
|
|
993 add a11, a11, a7
|
|
994 bgeu a11, a7, 1f
|
|
995 addi a10, a10, 1
|
|
996 1:
|
|
997 do_mul(a7, xh, l, yl, h) /* pp 8 */
|
|
998 add a11, a11, a7
|
|
999 bgeu a11, a7, 1f
|
|
1000 addi a10, a10, 1
|
|
1001 1:
|
|
1002 /* Shift a10/a11 into position, and add low half of a11 to a6. */
|
|
1003 src a10, a10, a11
|
|
1004 add a10, a10, a9
|
|
1005 sll a11, a11
|
|
1006 add a6, a6, a11
|
|
1007 bgeu a6, a11, 1f
|
|
1008 addi a10, a10, 1
|
|
1009 1:
|
|
1010 /* Add pp10-12 into xl with carry-outs in a9. */
|
|
1011 movi a9, 0
|
|
1012 do_mul(xl, xl, h, yh, h) /* pp 10 */
|
|
1013 add xl, xl, a10
|
|
1014 bgeu xl, a10, 1f
|
|
1015 addi a9, a9, 1
|
|
1016 1:
|
|
1017 do_mul(a10, xh, l, yh, l) /* pp 11 */
|
|
1018 add xl, xl, a10
|
|
1019 bgeu xl, a10, 1f
|
|
1020 addi a9, a9, 1
|
|
1021 1:
|
|
1022 do_mul(a10, xh, h, yl, h) /* pp 12 */
|
|
1023 add xl, xl, a10
|
|
1024 bgeu xl, a10, 1f
|
|
1025 addi a9, a9, 1
|
|
1026 1:
|
|
1027 /* Add pp13-14 into a11 with carry-outs in a10. */
|
|
1028 do_mul(a11, xh, l, yh, h) /* pp 13 */
|
|
1029 do_mul(a7, xh, h, yh, l) /* pp 14 */
|
|
1030 movi a10, 0
|
|
1031 add a11, a11, a7
|
|
1032 bgeu a11, a7, 1f
|
|
1033 addi a10, a10, 1
|
|
1034 1:
|
|
1035 /* Shift a10/a11 into position, and add low half of a11 to a6. */
|
|
1036 src a10, a10, a11
|
|
1037 add a10, a10, a9
|
|
1038 sll a11, a11
|
|
1039 add xl, xl, a11
|
|
1040 bgeu xl, a11, 1f
|
|
1041 addi a10, a10, 1
|
|
1042 1:
|
|
1043 /* Compute xh. */
|
|
1044 do_mul(xh, xh, h, yh, h) /* pp 15 */
|
|
1045 add xh, xh, a10
|
|
1046
|
|
1047 /* Restore values saved on the stack during the multiplication. */
|
|
1048 l32i a7, sp, 4
|
|
1049 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL
|
|
1050 l32i a0, sp, 0
|
|
1051 l32i a8, sp, 8
|
|
1052 #endif
|
|
1053 #endif /* ! XCHAL_HAVE_MUL32_HIGH */
|
|
1054
|
|
1055 /* Shift left by 12 bits, unless there was a carry-out from the
|
|
1056 multiply, in which case, shift by 11 bits and increment the
|
|
1057 exponent. Note: It is convenient to use the constant 0x3ff
|
|
1058 instead of 0x400 when removing the extra exponent bias (so that
|
|
1059 it is easy to construct 0x7fe for the overflow check). Reverse
|
|
1060 the logic here to decrement the exponent sum by one unless there
|
|
1061 was a carry-out. */
|
|
1062 movi a4, 11
|
|
1063 srli a5, xh, 21 - 12
|
|
1064 bnez a5, 1f
|
|
1065 addi a4, a4, 1
|
|
1066 addi a8, a8, -1
|
|
1067 1: ssl a4
|
|
1068 src xh, xh, xl
|
|
1069 src xl, xl, a6
|
|
1070 sll a6, a6
|
|
1071
|
|
1072 /* Subtract the extra bias from the exponent sum (plus one to account
|
|
1073 for the explicit "1.0" of the mantissa that will be added to the
|
|
1074 exponent in the final result). */
|
|
1075 movi a4, 0x3ff
|
|
1076 sub a8, a8, a4
|
|
1077
|
|
1078 /* Check for over/underflow. The value in a8 is one less than the
|
|
1079 final exponent, so values in the range 0..7fd are OK here. */
|
|
1080 slli a4, a4, 1 /* 0x7fe */
|
|
1081 bgeu a8, a4, .Lmul_overflow
|
|
1082
|
|
1083 .Lmul_round:
|
|
1084 /* Round. */
|
|
1085 bgez a6, .Lmul_rounded
|
|
1086 addi xl, xl, 1
|
|
1087 beqz xl, .Lmul_roundcarry
|
|
1088 slli a6, a6, 1
|
|
1089 beqz a6, .Lmul_exactlyhalf
|
|
1090
|
|
1091 .Lmul_rounded:
|
|
1092 /* Add the exponent to the mantissa. */
|
|
1093 slli a8, a8, 20
|
|
1094 add xh, xh, a8
|
|
1095
|
|
1096 .Lmul_addsign:
|
|
1097 /* Add the sign bit. */
|
|
1098 srli a7, a7, 31
|
|
1099 slli a7, a7, 31
|
|
1100 or xh, xh, a7
|
|
1101
|
|
1102 .Lmul_done:
|
|
1103 #if __XTENSA_CALL0_ABI__
|
|
1104 l32i a12, sp, 16
|
|
1105 l32i a13, sp, 20
|
|
1106 l32i a14, sp, 24
|
|
1107 l32i a15, sp, 28
|
|
1108 addi sp, sp, 32
|
|
1109 #endif
|
|
1110 leaf_return
|
|
1111
|
|
1112 .Lmul_exactlyhalf:
|
|
1113 /* Round down to the nearest even value. */
|
|
1114 srli xl, xl, 1
|
|
1115 slli xl, xl, 1
|
|
1116 j .Lmul_rounded
|
|
1117
|
|
1118 .Lmul_roundcarry:
|
|
1119 /* xl is always zero when the rounding increment overflows, so
|
|
1120 there's no need to round it to an even value. */
|
|
1121 addi xh, xh, 1
|
|
1122 /* Overflow is OK -- it will be added to the exponent. */
|
|
1123 j .Lmul_rounded
|
|
1124
|
|
1125 .Lmul_overflow:
|
|
1126 bltz a8, .Lmul_underflow
|
|
1127 /* Return +/- Infinity. */
|
|
1128 addi a8, a4, 1 /* 0x7ff */
|
|
1129 slli xh, a8, 20
|
|
1130 movi xl, 0
|
|
1131 j .Lmul_addsign
|
|
1132
|
|
1133 .Lmul_underflow:
|
|
1134 /* Create a subnormal value, where the exponent field contains zero,
|
|
1135 but the effective exponent is 1. The value of a8 is one less than
|
|
1136 the actual exponent, so just negate it to get the shift amount. */
|
|
1137 neg a8, a8
|
|
1138 mov a9, a6
|
|
1139 ssr a8
|
|
1140 bgeui a8, 32, .Lmul_bigshift
|
|
1141
|
|
1142 /* Shift xh/xl right. Any bits that are shifted out of xl are saved
|
|
1143 in a6 (combined with the shifted-out bits currently in a6) for
|
|
1144 rounding the result. */
|
|
1145 sll a6, xl
|
|
1146 src xl, xh, xl
|
|
1147 srl xh, xh
|
|
1148 j 1f
|
|
1149
|
|
1150 .Lmul_bigshift:
|
|
1151 bgeui a8, 64, .Lmul_flush_to_zero
|
|
1152 sll a10, xl /* lost bits shifted out of xl */
|
|
1153 src a6, xh, xl
|
|
1154 srl xl, xh
|
|
1155 movi xh, 0
|
|
1156 or a9, a9, a10
|
|
1157
|
|
1158 /* Set the exponent to zero. */
|
|
1159 1: movi a8, 0
|
|
1160
|
|
1161 /* Pack any nonzero bits shifted out into a6. */
|
|
1162 beqz a9, .Lmul_round
|
|
1163 movi a9, 1
|
|
1164 or a6, a6, a9
|
|
1165 j .Lmul_round
|
|
1166
|
|
1167 .Lmul_flush_to_zero:
|
|
1168 /* Return zero with the appropriate sign bit. */
|
|
1169 srli xh, a7, 31
|
|
1170 slli xh, xh, 31
|
|
1171 movi xl, 0
|
|
1172 j .Lmul_done
|
|
1173
|
|
1174 #if XCHAL_NO_MUL
|
|
1175
|
|
1176 /* For Xtensa processors with no multiply hardware, this simplified
|
|
1177 version of _mulsi3 is used for multiplying 16-bit chunks of
|
|
1178 the floating-point mantissas. When using CALL0, this function
|
|
1179 uses a custom ABI: the inputs are passed in a13 and a14, the
|
|
1180 result is returned in a12, and a8 and a15 are clobbered. */
|
|
1181 .align 4
|
|
1182 .Lmul_mulsi3:
|
|
1183 leaf_entry sp, 16
|
|
1184 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
|
|
1185 movi \dst, 0
|
|
1186 1: add \tmp1, \src2, \dst
|
|
1187 extui \tmp2, \src1, 0, 1
|
|
1188 movnez \dst, \tmp1, \tmp2
|
|
1189
|
|
1190 do_addx2 \tmp1, \src2, \dst, \tmp1
|
|
1191 extui \tmp2, \src1, 1, 1
|
|
1192 movnez \dst, \tmp1, \tmp2
|
|
1193
|
|
1194 do_addx4 \tmp1, \src2, \dst, \tmp1
|
|
1195 extui \tmp2, \src1, 2, 1
|
|
1196 movnez \dst, \tmp1, \tmp2
|
|
1197
|
|
1198 do_addx8 \tmp1, \src2, \dst, \tmp1
|
|
1199 extui \tmp2, \src1, 3, 1
|
|
1200 movnez \dst, \tmp1, \tmp2
|
|
1201
|
|
1202 srli \src1, \src1, 4
|
|
1203 slli \src2, \src2, 4
|
|
1204 bnez \src1, 1b
|
|
1205 .endm
|
|
1206 #if __XTENSA_CALL0_ABI__
|
|
1207 mul_mulsi3_body a12, a13, a14, a15, a8
|
|
1208 #else
|
|
1209 /* The result will be written into a2, so save that argument in a4. */
|
|
1210 mov a4, a2
|
|
1211 mul_mulsi3_body a2, a4, a3, a5, a6
|
|
1212 #endif
|
|
1213 leaf_return
|
|
1214 #endif /* XCHAL_NO_MUL */
|
|
1215 #endif /* L_muldf3 */
|
|
1216
|
|
1217 #ifdef L_divdf3
|
|
1218
|
|
1219 /* Division */
|
|
1220 __divdf3_aux:
|
|
1221
|
|
1222 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities).
|
|
1223 (This code is placed before the start of the function just to
|
|
1224 keep it in range of the limited branch displacements.) */
|
|
1225
|
|
1226 .Ldiv_yexpzero:
|
|
1227 /* Clear the sign bit of y. */
|
|
1228 slli yh, yh, 1
|
|
1229 srli yh, yh, 1
|
|
1230
|
|
1231 /* Check for division by zero. */
|
|
1232 or a10, yh, yl
|
|
1233 beqz a10, .Ldiv_yzero
|
|
1234
|
|
1235 /* Normalize y. Adjust the exponent in a9. */
|
|
1236 beqz yh, .Ldiv_yh_zero
|
|
1237 do_nsau a10, yh, a11, a9
|
|
1238 addi a10, a10, -11
|
|
1239 ssl a10
|
|
1240 src yh, yh, yl
|
|
1241 sll yl, yl
|
|
1242 movi a9, 1
|
|
1243 sub a9, a9, a10
|
|
1244 j .Ldiv_ynormalized
|
|
1245 .Ldiv_yh_zero:
|
|
1246 do_nsau a10, yl, a11, a9
|
|
1247 addi a10, a10, -11
|
|
1248 movi a9, -31
|
|
1249 sub a9, a9, a10
|
|
1250 ssl a10
|
|
1251 bltz a10, .Ldiv_yl_srl
|
|
1252 sll yh, yl
|
|
1253 movi yl, 0
|
|
1254 j .Ldiv_ynormalized
|
|
1255 .Ldiv_yl_srl:
|
|
1256 srl yh, yl
|
|
1257 sll yl, yl
|
|
1258 j .Ldiv_ynormalized
|
|
1259
|
|
1260 .Ldiv_yzero:
|
|
1261 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */
|
|
1262 slli xh, xh, 1
|
|
1263 srli xh, xh, 1
|
|
1264 or xl, xl, xh
|
|
1265 srli xh, a7, 31
|
|
1266 slli xh, xh, 31
|
|
1267 or xh, xh, a6
|
|
1268 bnez xl, 1f
|
|
1269 movi a4, 0x80000 /* make it a quiet NaN */
|
|
1270 or xh, xh, a4
|
|
1271 1: movi xl, 0
|
|
1272 leaf_return
|
|
1273
|
|
1274 .Ldiv_xexpzero:
|
|
1275 /* Clear the sign bit of x. */
|
|
1276 slli xh, xh, 1
|
|
1277 srli xh, xh, 1
|
|
1278
|
|
1279 /* If x is zero, return zero. */
|
|
1280 or a10, xh, xl
|
|
1281 beqz a10, .Ldiv_return_zero
|
|
1282
|
|
1283 /* Normalize x. Adjust the exponent in a8. */
|
|
1284 beqz xh, .Ldiv_xh_zero
|
|
1285 do_nsau a10, xh, a11, a8
|
|
1286 addi a10, a10, -11
|
|
1287 ssl a10
|
|
1288 src xh, xh, xl
|
|
1289 sll xl, xl
|
|
1290 movi a8, 1
|
|
1291 sub a8, a8, a10
|
|
1292 j .Ldiv_xnormalized
|
|
1293 .Ldiv_xh_zero:
|
|
1294 do_nsau a10, xl, a11, a8
|
|
1295 addi a10, a10, -11
|
|
1296 movi a8, -31
|
|
1297 sub a8, a8, a10
|
|
1298 ssl a10
|
|
1299 bltz a10, .Ldiv_xl_srl
|
|
1300 sll xh, xl
|
|
1301 movi xl, 0
|
|
1302 j .Ldiv_xnormalized
|
|
1303 .Ldiv_xl_srl:
|
|
1304 srl xh, xl
|
|
1305 sll xl, xl
|
|
1306 j .Ldiv_xnormalized
|
|
1307
|
|
1308 .Ldiv_return_zero:
|
|
1309 /* Return zero with the appropriate sign bit. */
|
|
1310 srli xh, a7, 31
|
|
1311 slli xh, xh, 31
|
|
1312 movi xl, 0
|
|
1313 leaf_return
|
|
1314
|
|
1315 .Ldiv_xnan_or_inf:
|
|
1316 /* Set the sign bit of the result. */
|
|
1317 srli a7, yh, 31
|
|
1318 slli a7, a7, 31
|
|
1319 xor xh, xh, a7
|
|
1320 /* If y is NaN or Inf, return NaN. */
|
|
1321 bnall yh, a6, 1f
|
|
1322 movi a4, 0x80000 /* make it a quiet NaN */
|
|
1323 or xh, xh, a4
|
|
1324 1: leaf_return
|
|
1325
|
|
1326 .Ldiv_ynan_or_inf:
|
|
1327 /* If y is Infinity, return zero. */
|
|
1328 slli a8, yh, 12
|
|
1329 or a8, a8, yl
|
|
1330 beqz a8, .Ldiv_return_zero
|
|
1331 /* y is NaN; return it. */
|
|
1332 mov xh, yh
|
|
1333 mov xl, yl
|
|
1334 leaf_return
|
|
1335
|
|
1336 .Ldiv_highequal1:
|
|
1337 bltu xl, yl, 2f
|
|
1338 j 3f
|
|
1339
|
|
1340 .align 4
|
|
1341 .global __divdf3
|
|
1342 .type __divdf3, @function
|
|
1343 __divdf3:
|
|
1344 leaf_entry sp, 16
|
|
1345 movi a6, 0x7ff00000
|
|
1346
|
|
1347 /* Get the sign of the result. */
|
|
1348 xor a7, xh, yh
|
|
1349
|
|
1350 /* Check for NaN and infinity. */
|
|
1351 ball xh, a6, .Ldiv_xnan_or_inf
|
|
1352 ball yh, a6, .Ldiv_ynan_or_inf
|
|
1353
|
|
1354 /* Extract the exponents. */
|
|
1355 extui a8, xh, 20, 11
|
|
1356 extui a9, yh, 20, 11
|
|
1357
|
|
1358 beqz a9, .Ldiv_yexpzero
|
|
1359 .Ldiv_ynormalized:
|
|
1360 beqz a8, .Ldiv_xexpzero
|
|
1361 .Ldiv_xnormalized:
|
|
1362
|
|
1363 /* Subtract the exponents. */
|
|
1364 sub a8, a8, a9
|
|
1365
|
|
1366 /* Replace sign/exponent fields with explicit "1.0". */
|
|
1367 movi a10, 0x1fffff
|
|
1368 or xh, xh, a6
|
|
1369 and xh, xh, a10
|
|
1370 or yh, yh, a6
|
|
1371 and yh, yh, a10
|
|
1372
|
|
1373 /* Set SAR for left shift by one. */
|
|
1374 ssai (32 - 1)
|
|
1375
|
|
1376 /* The first digit of the mantissa division must be a one.
|
|
1377 Shift x (and adjust the exponent) as needed to make this true. */
|
|
1378 bltu yh, xh, 3f
|
|
1379 beq yh, xh, .Ldiv_highequal1
|
|
1380 2: src xh, xh, xl
|
|
1381 sll xl, xl
|
|
1382 addi a8, a8, -1
|
|
1383 3:
|
|
1384 /* Do the first subtraction and shift. */
|
|
1385 sub xh, xh, yh
|
|
1386 bgeu xl, yl, 1f
|
|
1387 addi xh, xh, -1
|
|
1388 1: sub xl, xl, yl
|
|
1389 src xh, xh, xl
|
|
1390 sll xl, xl
|
|
1391
|
|
1392 /* Put the quotient into a10/a11. */
|
|
1393 movi a10, 0
|
|
1394 movi a11, 1
|
|
1395
|
|
1396 /* Divide one bit at a time for 52 bits. */
|
|
1397 movi a9, 52
|
|
1398 #if XCHAL_HAVE_LOOPS
|
|
1399 loop a9, .Ldiv_loopend
|
|
1400 #endif
|
|
1401 .Ldiv_loop:
|
|
1402 /* Shift the quotient << 1. */
|
|
1403 src a10, a10, a11
|
|
1404 sll a11, a11
|
|
1405
|
|
1406 /* Is this digit a 0 or 1? */
|
|
1407 bltu xh, yh, 3f
|
|
1408 beq xh, yh, .Ldiv_highequal2
|
|
1409
|
|
1410 /* Output a 1 and subtract. */
|
|
1411 2: addi a11, a11, 1
|
|
1412 sub xh, xh, yh
|
|
1413 bgeu xl, yl, 1f
|
|
1414 addi xh, xh, -1
|
|
1415 1: sub xl, xl, yl
|
|
1416
|
|
1417 /* Shift the dividend << 1. */
|
|
1418 3: src xh, xh, xl
|
|
1419 sll xl, xl
|
|
1420
|
|
1421 #if !XCHAL_HAVE_LOOPS
|
|
1422 addi a9, a9, -1
|
|
1423 bnez a9, .Ldiv_loop
|
|
1424 #endif
|
|
1425 .Ldiv_loopend:
|
|
1426
|
|
1427 /* Add the exponent bias (less one to account for the explicit "1.0"
|
|
1428 of the mantissa that will be added to the exponent in the final
|
|
1429 result). */
|
|
1430 movi a9, 0x3fe
|
|
1431 add a8, a8, a9
|
|
1432
|
|
1433 /* Check for over/underflow. The value in a8 is one less than the
|
|
1434 final exponent, so values in the range 0..7fd are OK here. */
|
|
1435 addmi a9, a9, 0x400 /* 0x7fe */
|
|
1436 bgeu a8, a9, .Ldiv_overflow
|
|
1437
|
|
1438 .Ldiv_round:
|
|
1439 /* Round. The remainder (<< 1) is in xh/xl. */
|
|
1440 bltu xh, yh, .Ldiv_rounded
|
|
1441 beq xh, yh, .Ldiv_highequal3
|
|
1442 .Ldiv_roundup:
|
|
1443 addi a11, a11, 1
|
|
1444 beqz a11, .Ldiv_roundcarry
|
|
1445
|
|
1446 .Ldiv_rounded:
|
|
1447 mov xl, a11
|
|
1448 /* Add the exponent to the mantissa. */
|
|
1449 slli a8, a8, 20
|
|
1450 add xh, a10, a8
|
|
1451
|
|
1452 .Ldiv_addsign:
|
|
1453 /* Add the sign bit. */
|
|
1454 srli a7, a7, 31
|
|
1455 slli a7, a7, 31
|
|
1456 or xh, xh, a7
|
|
1457 leaf_return
|
|
1458
|
|
1459 .Ldiv_highequal2:
|
|
1460 bgeu xl, yl, 2b
|
|
1461 j 3b
|
|
1462
|
|
1463 .Ldiv_highequal3:
|
|
1464 bltu xl, yl, .Ldiv_rounded
|
|
1465 bne xl, yl, .Ldiv_roundup
|
|
1466
|
|
1467 /* Remainder is exactly half the divisor. Round even. */
|
|
1468 addi a11, a11, 1
|
|
1469 beqz a11, .Ldiv_roundcarry
|
|
1470 srli a11, a11, 1
|
|
1471 slli a11, a11, 1
|
|
1472 j .Ldiv_rounded
|
|
1473
|
|
1474 .Ldiv_overflow:
|
|
1475 bltz a8, .Ldiv_underflow
|
|
1476 /* Return +/- Infinity. */
|
|
1477 addi a8, a9, 1 /* 0x7ff */
|
|
1478 slli xh, a8, 20
|
|
1479 movi xl, 0
|
|
1480 j .Ldiv_addsign
|
|
1481
|
|
1482 .Ldiv_underflow:
|
|
1483 /* Create a subnormal value, where the exponent field contains zero,
|
|
1484 but the effective exponent is 1. The value of a8 is one less than
|
|
1485 the actual exponent, so just negate it to get the shift amount. */
|
|
1486 neg a8, a8
|
|
1487 ssr a8
|
|
1488 bgeui a8, 32, .Ldiv_bigshift
|
|
1489
|
|
1490 /* Shift a10/a11 right. Any bits that are shifted out of a11 are
|
|
1491 saved in a6 for rounding the result. */
|
|
1492 sll a6, a11
|
|
1493 src a11, a10, a11
|
|
1494 srl a10, a10
|
|
1495 j 1f
|
|
1496
|
|
1497 .Ldiv_bigshift:
|
|
1498 bgeui a8, 64, .Ldiv_flush_to_zero
|
|
1499 sll a9, a11 /* lost bits shifted out of a11 */
|
|
1500 src a6, a10, a11
|
|
1501 srl a11, a10
|
|
1502 movi a10, 0
|
|
1503 or xl, xl, a9
|
|
1504
|
|
1505 /* Set the exponent to zero. */
|
|
1506 1: movi a8, 0
|
|
1507
|
|
1508 /* Pack any nonzero remainder (in xh/xl) into a6. */
|
|
1509 or xh, xh, xl
|
|
1510 beqz xh, 1f
|
|
1511 movi a9, 1
|
|
1512 or a6, a6, a9
|
|
1513
|
|
1514 /* Round a10/a11 based on the bits shifted out into a6. */
|
|
1515 1: bgez a6, .Ldiv_rounded
|
|
1516 addi a11, a11, 1
|
|
1517 beqz a11, .Ldiv_roundcarry
|
|
1518 slli a6, a6, 1
|
|
1519 bnez a6, .Ldiv_rounded
|
|
1520 srli a11, a11, 1
|
|
1521 slli a11, a11, 1
|
|
1522 j .Ldiv_rounded
|
|
1523
|
|
1524 .Ldiv_roundcarry:
|
|
1525 /* a11 is always zero when the rounding increment overflows, so
|
|
1526 there's no need to round it to an even value. */
|
|
1527 addi a10, a10, 1
|
|
1528 /* Overflow to the exponent field is OK. */
|
|
1529 j .Ldiv_rounded
|
|
1530
|
|
1531 .Ldiv_flush_to_zero:
|
|
1532 /* Return zero with the appropriate sign bit. */
|
|
1533 srli xh, a7, 31
|
|
1534 slli xh, xh, 31
|
|
1535 movi xl, 0
|
|
1536 leaf_return
|
|
1537
|
|
1538 #endif /* L_divdf3 */
|
|
1539
|
|
1540 #ifdef L_cmpdf2
|
|
1541
|
|
1542 /* Equal and Not Equal */
|
|
1543
|
|
1544 .align 4
|
|
1545 .global __eqdf2
|
|
1546 .global __nedf2
|
|
1547 .set __nedf2, __eqdf2
|
|
1548 .type __eqdf2, @function
|
|
1549 __eqdf2:
|
|
1550 leaf_entry sp, 16
|
|
1551 bne xl, yl, 2f
|
|
1552 bne xh, yh, 4f
|
|
1553
|
|
1554 /* The values are equal but NaN != NaN. Check the exponent. */
|
|
1555 movi a6, 0x7ff00000
|
|
1556 ball xh, a6, 3f
|
|
1557
|
|
1558 /* Equal. */
|
|
1559 movi a2, 0
|
|
1560 leaf_return
|
|
1561
|
|
1562 /* Not equal. */
|
|
1563 2: movi a2, 1
|
|
1564 leaf_return
|
|
1565
|
|
1566 /* Check if the mantissas are nonzero. */
|
|
1567 3: slli a7, xh, 12
|
|
1568 or a7, a7, xl
|
|
1569 j 5f
|
|
1570
|
|
1571 /* Check if x and y are zero with different signs. */
|
|
1572 4: or a7, xh, yh
|
|
1573 slli a7, a7, 1
|
|
1574 or a7, a7, xl /* xl == yl here */
|
|
1575
|
|
1576 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa
|
|
1577 or x when exponent(x) = 0x7ff and x == y. */
|
|
1578 5: movi a2, 0
|
|
1579 movi a3, 1
|
|
1580 movnez a2, a3, a7
|
|
1581 leaf_return
|
|
1582
|
|
1583
|
|
1584 /* Greater Than */
|
|
1585
|
|
1586 .align 4
|
|
1587 .global __gtdf2
|
|
1588 .type __gtdf2, @function
|
|
1589 __gtdf2:
|
|
1590 leaf_entry sp, 16
|
|
1591 movi a6, 0x7ff00000
|
|
1592 ball xh, a6, 2f
|
|
1593 1: bnall yh, a6, .Lle_cmp
|
|
1594
|
|
1595 /* Check if y is a NaN. */
|
|
1596 slli a7, yh, 12
|
|
1597 or a7, a7, yl
|
|
1598 beqz a7, .Lle_cmp
|
|
1599 movi a2, 0
|
|
1600 leaf_return
|
|
1601
|
|
1602 /* Check if x is a NaN. */
|
|
1603 2: slli a7, xh, 12
|
|
1604 or a7, a7, xl
|
|
1605 beqz a7, 1b
|
|
1606 movi a2, 0
|
|
1607 leaf_return
|
|
1608
|
|
1609
|
|
1610 /* Less Than or Equal */
|
|
1611
|
|
1612 .align 4
|
|
1613 .global __ledf2
|
|
1614 .type __ledf2, @function
|
|
1615 __ledf2:
|
|
1616 leaf_entry sp, 16
|
|
1617 movi a6, 0x7ff00000
|
|
1618 ball xh, a6, 2f
|
|
1619 1: bnall yh, a6, .Lle_cmp
|
|
1620
|
|
1621 /* Check if y is a NaN. */
|
|
1622 slli a7, yh, 12
|
|
1623 or a7, a7, yl
|
|
1624 beqz a7, .Lle_cmp
|
|
1625 movi a2, 1
|
|
1626 leaf_return
|
|
1627
|
|
1628 /* Check if x is a NaN. */
|
|
1629 2: slli a7, xh, 12
|
|
1630 or a7, a7, xl
|
|
1631 beqz a7, 1b
|
|
1632 movi a2, 1
|
|
1633 leaf_return
|
|
1634
|
|
1635 .Lle_cmp:
|
|
1636 /* Check if x and y have different signs. */
|
|
1637 xor a7, xh, yh
|
|
1638 bltz a7, .Lle_diff_signs
|
|
1639
|
|
1640 /* Check if x is negative. */
|
|
1641 bltz xh, .Lle_xneg
|
|
1642
|
|
1643 /* Check if x <= y. */
|
|
1644 bltu xh, yh, 4f
|
|
1645 bne xh, yh, 5f
|
|
1646 bltu yl, xl, 5f
|
|
1647 4: movi a2, 0
|
|
1648 leaf_return
|
|
1649
|
|
1650 .Lle_xneg:
|
|
1651 /* Check if y <= x. */
|
|
1652 bltu yh, xh, 4b
|
|
1653 bne yh, xh, 5f
|
|
1654 bgeu xl, yl, 4b
|
|
1655 5: movi a2, 1
|
|
1656 leaf_return
|
|
1657
|
|
1658 .Lle_diff_signs:
|
|
1659 bltz xh, 4b
|
|
1660
|
|
1661 /* Check if both x and y are zero. */
|
|
1662 or a7, xh, yh
|
|
1663 slli a7, a7, 1
|
|
1664 or a7, a7, xl
|
|
1665 or a7, a7, yl
|
|
1666 movi a2, 1
|
|
1667 movi a3, 0
|
|
1668 moveqz a2, a3, a7
|
|
1669 leaf_return
|
|
1670
|
|
1671
|
|
1672 /* Greater Than or Equal */
|
|
1673
|
|
1674 .align 4
|
|
1675 .global __gedf2
|
|
1676 .type __gedf2, @function
|
|
1677 __gedf2:
|
|
1678 leaf_entry sp, 16
|
|
1679 movi a6, 0x7ff00000
|
|
1680 ball xh, a6, 2f
|
|
1681 1: bnall yh, a6, .Llt_cmp
|
|
1682
|
|
1683 /* Check if y is a NaN. */
|
|
1684 slli a7, yh, 12
|
|
1685 or a7, a7, yl
|
|
1686 beqz a7, .Llt_cmp
|
|
1687 movi a2, -1
|
|
1688 leaf_return
|
|
1689
|
|
1690 /* Check if x is a NaN. */
|
|
1691 2: slli a7, xh, 12
|
|
1692 or a7, a7, xl
|
|
1693 beqz a7, 1b
|
|
1694 movi a2, -1
|
|
1695 leaf_return
|
|
1696
|
|
1697
|
|
1698 /* Less Than */
|
|
1699
|
|
1700 .align 4
|
|
1701 .global __ltdf2
|
|
1702 .type __ltdf2, @function
|
|
1703 __ltdf2:
|
|
1704 leaf_entry sp, 16
|
|
1705 movi a6, 0x7ff00000
|
|
1706 ball xh, a6, 2f
|
|
1707 1: bnall yh, a6, .Llt_cmp
|
|
1708
|
|
1709 /* Check if y is a NaN. */
|
|
1710 slli a7, yh, 12
|
|
1711 or a7, a7, yl
|
|
1712 beqz a7, .Llt_cmp
|
|
1713 movi a2, 0
|
|
1714 leaf_return
|
|
1715
|
|
1716 /* Check if x is a NaN. */
|
|
1717 2: slli a7, xh, 12
|
|
1718 or a7, a7, xl
|
|
1719 beqz a7, 1b
|
|
1720 movi a2, 0
|
|
1721 leaf_return
|
|
1722
|
|
1723 .Llt_cmp:
|
|
1724 /* Check if x and y have different signs. */
|
|
1725 xor a7, xh, yh
|
|
1726 bltz a7, .Llt_diff_signs
|
|
1727
|
|
1728 /* Check if x is negative. */
|
|
1729 bltz xh, .Llt_xneg
|
|
1730
|
|
1731 /* Check if x < y. */
|
|
1732 bltu xh, yh, 4f
|
|
1733 bne xh, yh, 5f
|
|
1734 bgeu xl, yl, 5f
|
|
1735 4: movi a2, -1
|
|
1736 leaf_return
|
|
1737
|
|
1738 .Llt_xneg:
|
|
1739 /* Check if y < x. */
|
|
1740 bltu yh, xh, 4b
|
|
1741 bne yh, xh, 5f
|
|
1742 bltu yl, xl, 4b
|
|
1743 5: movi a2, 0
|
|
1744 leaf_return
|
|
1745
|
|
1746 .Llt_diff_signs:
|
|
1747 bgez xh, 5b
|
|
1748
|
|
1749 /* Check if both x and y are nonzero. */
|
|
1750 or a7, xh, yh
|
|
1751 slli a7, a7, 1
|
|
1752 or a7, a7, xl
|
|
1753 or a7, a7, yl
|
|
1754 movi a2, 0
|
|
1755 movi a3, -1
|
|
1756 movnez a2, a3, a7
|
|
1757 leaf_return
|
|
1758
|
|
1759
|
|
1760 /* Unordered */
|
|
1761
|
|
1762 .align 4
|
|
1763 .global __unorddf2
|
|
1764 .type __unorddf2, @function
|
|
1765 __unorddf2:
|
|
1766 leaf_entry sp, 16
|
|
1767 movi a6, 0x7ff00000
|
|
1768 ball xh, a6, 3f
|
|
1769 1: ball yh, a6, 4f
|
|
1770 2: movi a2, 0
|
|
1771 leaf_return
|
|
1772
|
|
1773 3: slli a7, xh, 12
|
|
1774 or a7, a7, xl
|
|
1775 beqz a7, 1b
|
|
1776 movi a2, 1
|
|
1777 leaf_return
|
|
1778
|
|
1779 4: slli a7, yh, 12
|
|
1780 or a7, a7, yl
|
|
1781 beqz a7, 2b
|
|
1782 movi a2, 1
|
|
1783 leaf_return
|
|
1784
|
|
1785 #endif /* L_cmpdf2 */
|
|
1786
|
|
1787 #ifdef L_fixdfsi
|
|
1788
|
|
1789 .align 4
|
|
1790 .global __fixdfsi
|
|
1791 .type __fixdfsi, @function
|
|
1792 __fixdfsi:
|
|
1793 leaf_entry sp, 16
|
|
1794
|
|
1795 /* Check for NaN and Infinity. */
|
|
1796 movi a6, 0x7ff00000
|
|
1797 ball xh, a6, .Lfixdfsi_nan_or_inf
|
|
1798
|
|
1799 /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */
|
|
1800 extui a4, xh, 20, 11
|
|
1801 extui a5, a6, 19, 10 /* 0x3fe */
|
|
1802 sub a4, a4, a5
|
|
1803 bgei a4, 32, .Lfixdfsi_maxint
|
|
1804 blti a4, 1, .Lfixdfsi_zero
|
|
1805
|
|
1806 /* Add explicit "1.0" and shift << 11. */
|
|
1807 or a7, xh, a6
|
|
1808 ssai (32 - 11)
|
|
1809 src a5, a7, xl
|
|
1810
|
|
1811 /* Shift back to the right, based on the exponent. */
|
|
1812 ssl a4 /* shift by 32 - a4 */
|
|
1813 srl a5, a5
|
|
1814
|
|
1815 /* Negate the result if sign != 0. */
|
|
1816 neg a2, a5
|
|
1817 movgez a2, a5, a7
|
|
1818 leaf_return
|
|
1819
|
|
1820 .Lfixdfsi_nan_or_inf:
|
|
1821 /* Handle Infinity and NaN. */
|
|
1822 slli a4, xh, 12
|
|
1823 or a4, a4, xl
|
|
1824 beqz a4, .Lfixdfsi_maxint
|
|
1825
|
|
1826 /* Translate NaN to +maxint. */
|
|
1827 movi xh, 0
|
|
1828
|
|
1829 .Lfixdfsi_maxint:
|
|
1830 slli a4, a6, 11 /* 0x80000000 */
|
|
1831 addi a5, a4, -1 /* 0x7fffffff */
|
|
1832 movgez a4, a5, xh
|
|
1833 mov a2, a4
|
|
1834 leaf_return
|
|
1835
|
|
1836 .Lfixdfsi_zero:
|
|
1837 movi a2, 0
|
|
1838 leaf_return
|
|
1839
|
|
1840 #endif /* L_fixdfsi */
|
|
1841
|
|
1842 #ifdef L_fixdfdi
|
|
1843
|
|
1844 .align 4
|
|
1845 .global __fixdfdi
|
|
1846 .type __fixdfdi, @function
|
|
1847 __fixdfdi:
|
|
1848 leaf_entry sp, 16
|
|
1849
|
|
1850 /* Check for NaN and Infinity. */
|
|
1851 movi a6, 0x7ff00000
|
|
1852 ball xh, a6, .Lfixdfdi_nan_or_inf
|
|
1853
|
|
1854 /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */
|
|
1855 extui a4, xh, 20, 11
|
|
1856 extui a5, a6, 19, 10 /* 0x3fe */
|
|
1857 sub a4, a4, a5
|
|
1858 bgei a4, 64, .Lfixdfdi_maxint
|
|
1859 blti a4, 1, .Lfixdfdi_zero
|
|
1860
|
|
1861 /* Add explicit "1.0" and shift << 11. */
|
|
1862 or a7, xh, a6
|
|
1863 ssai (32 - 11)
|
|
1864 src xh, a7, xl
|
|
1865 sll xl, xl
|
|
1866
|
|
1867 /* Shift back to the right, based on the exponent. */
|
|
1868 ssl a4 /* shift by 64 - a4 */
|
|
1869 bgei a4, 32, .Lfixdfdi_smallshift
|
|
1870 srl xl, xh
|
|
1871 movi xh, 0
|
|
1872
|
|
1873 .Lfixdfdi_shifted:
|
|
1874 /* Negate the result if sign != 0. */
|
|
1875 bgez a7, 1f
|
|
1876 neg xl, xl
|
|
1877 neg xh, xh
|
|
1878 beqz xl, 1f
|
|
1879 addi xh, xh, -1
|
|
1880 1: leaf_return
|
|
1881
|
|
1882 .Lfixdfdi_smallshift:
|
|
1883 src xl, xh, xl
|
|
1884 srl xh, xh
|
|
1885 j .Lfixdfdi_shifted
|
|
1886
|
|
1887 .Lfixdfdi_nan_or_inf:
|
|
1888 /* Handle Infinity and NaN. */
|
|
1889 slli a4, xh, 12
|
|
1890 or a4, a4, xl
|
|
1891 beqz a4, .Lfixdfdi_maxint
|
|
1892
|
|
1893 /* Translate NaN to +maxint. */
|
|
1894 movi xh, 0
|
|
1895
|
|
1896 .Lfixdfdi_maxint:
|
|
1897 slli a7, a6, 11 /* 0x80000000 */
|
|
1898 bgez xh, 1f
|
|
1899 mov xh, a7
|
|
1900 movi xl, 0
|
|
1901 leaf_return
|
|
1902
|
|
1903 1: addi xh, a7, -1 /* 0x7fffffff */
|
|
1904 movi xl, -1
|
|
1905 leaf_return
|
|
1906
|
|
1907 .Lfixdfdi_zero:
|
|
1908 movi xh, 0
|
|
1909 movi xl, 0
|
|
1910 leaf_return
|
|
1911
|
|
1912 #endif /* L_fixdfdi */
|
|
1913
|
|
1914 #ifdef L_fixunsdfsi
|
|
1915
|
|
1916 .align 4
|
|
1917 .global __fixunsdfsi
|
|
1918 .type __fixunsdfsi, @function
|
|
1919 __fixunsdfsi:
|
|
1920 leaf_entry sp, 16
|
|
1921
|
|
1922 /* Check for NaN and Infinity. */
|
|
1923 movi a6, 0x7ff00000
|
|
1924 ball xh, a6, .Lfixunsdfsi_nan_or_inf
|
|
1925
|
|
1926 /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */
|
|
1927 extui a4, xh, 20, 11
|
|
1928 extui a5, a6, 20, 10 /* 0x3ff */
|
|
1929 sub a4, a4, a5
|
|
1930 bgei a4, 32, .Lfixunsdfsi_maxint
|
|
1931 bltz a4, .Lfixunsdfsi_zero
|
|
1932
|
|
1933 /* Add explicit "1.0" and shift << 11. */
|
|
1934 or a7, xh, a6
|
|
1935 ssai (32 - 11)
|
|
1936 src a5, a7, xl
|
|
1937
|
|
1938 /* Shift back to the right, based on the exponent. */
|
|
1939 addi a4, a4, 1
|
|
1940 beqi a4, 32, .Lfixunsdfsi_bigexp
|
|
1941 ssl a4 /* shift by 32 - a4 */
|
|
1942 srl a5, a5
|
|
1943
|
|
1944 /* Negate the result if sign != 0. */
|
|
1945 neg a2, a5
|
|
1946 movgez a2, a5, a7
|
|
1947 leaf_return
|
|
1948
|
|
1949 .Lfixunsdfsi_nan_or_inf:
|
|
1950 /* Handle Infinity and NaN. */
|
|
1951 slli a4, xh, 12
|
|
1952 or a4, a4, xl
|
|
1953 beqz a4, .Lfixunsdfsi_maxint
|
|
1954
|
|
1955 /* Translate NaN to 0xffffffff. */
|
|
1956 movi a2, -1
|
|
1957 leaf_return
|
|
1958
|
|
1959 .Lfixunsdfsi_maxint:
|
|
1960 slli a4, a6, 11 /* 0x80000000 */
|
|
1961 movi a5, -1 /* 0xffffffff */
|
|
1962 movgez a4, a5, xh
|
|
1963 mov a2, a4
|
|
1964 leaf_return
|
|
1965
|
|
1966 .Lfixunsdfsi_zero:
|
|
1967 movi a2, 0
|
|
1968 leaf_return
|
|
1969
|
|
1970 .Lfixunsdfsi_bigexp:
|
|
1971 /* Handle unsigned maximum exponent case. */
|
|
1972 bltz xh, 1f
|
|
1973 mov a2, a5 /* no shift needed */
|
|
1974 leaf_return
|
|
1975
|
|
1976 /* Return 0x80000000 if negative. */
|
|
1977 1: slli a2, a6, 11
|
|
1978 leaf_return
|
|
1979
|
|
1980 #endif /* L_fixunsdfsi */
|
|
1981
|
|
1982 #ifdef L_fixunsdfdi
|
|
1983
|
|
1984 .align 4
|
|
1985 .global __fixunsdfdi
|
|
1986 .type __fixunsdfdi, @function
|
|
1987 __fixunsdfdi:
|
|
1988 leaf_entry sp, 16
|
|
1989
|
|
1990 /* Check for NaN and Infinity. */
|
|
1991 movi a6, 0x7ff00000
|
|
1992 ball xh, a6, .Lfixunsdfdi_nan_or_inf
|
|
1993
|
|
1994 /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */
|
|
1995 extui a4, xh, 20, 11
|
|
1996 extui a5, a6, 20, 10 /* 0x3ff */
|
|
1997 sub a4, a4, a5
|
|
1998 bgei a4, 64, .Lfixunsdfdi_maxint
|
|
1999 bltz a4, .Lfixunsdfdi_zero
|
|
2000
|
|
2001 /* Add explicit "1.0" and shift << 11. */
|
|
2002 or a7, xh, a6
|
|
2003 ssai (32 - 11)
|
|
2004 src xh, a7, xl
|
|
2005 sll xl, xl
|
|
2006
|
|
2007 /* Shift back to the right, based on the exponent. */
|
|
2008 addi a4, a4, 1
|
|
2009 beqi a4, 64, .Lfixunsdfdi_bigexp
|
|
2010 ssl a4 /* shift by 64 - a4 */
|
|
2011 bgei a4, 32, .Lfixunsdfdi_smallshift
|
|
2012 srl xl, xh
|
|
2013 movi xh, 0
|
|
2014
|
|
2015 .Lfixunsdfdi_shifted:
|
|
2016 /* Negate the result if sign != 0. */
|
|
2017 bgez a7, 1f
|
|
2018 neg xl, xl
|
|
2019 neg xh, xh
|
|
2020 beqz xl, 1f
|
|
2021 addi xh, xh, -1
|
|
2022 1: leaf_return
|
|
2023
|
|
2024 .Lfixunsdfdi_smallshift:
|
|
2025 src xl, xh, xl
|
|
2026 srl xh, xh
|
|
2027 j .Lfixunsdfdi_shifted
|
|
2028
|
|
2029 .Lfixunsdfdi_nan_or_inf:
|
|
2030 /* Handle Infinity and NaN. */
|
|
2031 slli a4, xh, 12
|
|
2032 or a4, a4, xl
|
|
2033 beqz a4, .Lfixunsdfdi_maxint
|
|
2034
|
|
2035 /* Translate NaN to 0xffffffff.... */
|
|
2036 1: movi xh, -1
|
|
2037 movi xl, -1
|
|
2038 leaf_return
|
|
2039
|
|
2040 .Lfixunsdfdi_maxint:
|
|
2041 bgez xh, 1b
|
|
2042 2: slli xh, a6, 11 /* 0x80000000 */
|
|
2043 movi xl, 0
|
|
2044 leaf_return
|
|
2045
|
|
2046 .Lfixunsdfdi_zero:
|
|
2047 movi xh, 0
|
|
2048 movi xl, 0
|
|
2049 leaf_return
|
|
2050
|
|
2051 .Lfixunsdfdi_bigexp:
|
|
2052 /* Handle unsigned maximum exponent case. */
|
|
2053 bltz a7, 2b
|
|
2054 leaf_return /* no shift needed */
|
|
2055
|
|
2056 #endif /* L_fixunsdfdi */
|
|
2057
|
|
2058 #ifdef L_floatsidf
|
|
2059
|
|
2060 .align 4
|
|
2061 .global __floatunsidf
|
|
2062 .type __floatunsidf, @function
|
|
2063 __floatunsidf:
|
|
2064 leaf_entry sp, 16
|
|
2065 beqz a2, .Lfloatsidf_return_zero
|
|
2066
|
|
2067 /* Set the sign to zero and jump to the floatsidf code. */
|
|
2068 movi a7, 0
|
|
2069 j .Lfloatsidf_normalize
|
|
2070
|
|
2071 .align 4
|
|
2072 .global __floatsidf
|
|
2073 .type __floatsidf, @function
|
|
2074 __floatsidf:
|
|
2075 leaf_entry sp, 16
|
|
2076
|
|
2077 /* Check for zero. */
|
|
2078 beqz a2, .Lfloatsidf_return_zero
|
|
2079
|
|
2080 /* Save the sign. */
|
|
2081 extui a7, a2, 31, 1
|
|
2082
|
|
2083 /* Get the absolute value. */
|
|
2084 #if XCHAL_HAVE_ABS
|
|
2085 abs a2, a2
|
|
2086 #else
|
|
2087 neg a4, a2
|
|
2088 movltz a2, a4, a2
|
|
2089 #endif
|
|
2090
|
|
2091 .Lfloatsidf_normalize:
|
|
2092 /* Normalize with the first 1 bit in the msb. */
|
|
2093 do_nsau a4, a2, a5, a6
|
|
2094 ssl a4
|
|
2095 sll a5, a2
|
|
2096
|
|
2097 /* Shift the mantissa into position. */
|
|
2098 srli xh, a5, 11
|
|
2099 slli xl, a5, (32 - 11)
|
|
2100
|
|
2101 /* Set the exponent. */
|
|
2102 movi a5, 0x41d /* 0x3fe + 31 */
|
|
2103 sub a5, a5, a4
|
|
2104 slli a5, a5, 20
|
|
2105 add xh, xh, a5
|
|
2106
|
|
2107 /* Add the sign and return. */
|
|
2108 slli a7, a7, 31
|
|
2109 or xh, xh, a7
|
|
2110 leaf_return
|
|
2111
|
|
2112 .Lfloatsidf_return_zero:
|
|
2113 movi a3, 0
|
|
2114 leaf_return
|
|
2115
|
|
2116 #endif /* L_floatsidf */
|
|
2117
|
|
2118 #ifdef L_floatdidf
|
|
2119
|
|
2120 .align 4
|
|
2121 .global __floatundidf
|
|
2122 .type __floatundidf, @function
|
|
2123 __floatundidf:
|
|
2124 leaf_entry sp, 16
|
|
2125
|
|
2126 /* Check for zero. */
|
|
2127 or a4, xh, xl
|
|
2128 beqz a4, 2f
|
|
2129
|
|
2130 /* Set the sign to zero and jump to the floatdidf code. */
|
|
2131 movi a7, 0
|
|
2132 j .Lfloatdidf_normalize
|
|
2133
|
|
2134 .align 4
|
|
2135 .global __floatdidf
|
|
2136 .type __floatdidf, @function
|
|
2137 __floatdidf:
|
|
2138 leaf_entry sp, 16
|
|
2139
|
|
2140 /* Check for zero. */
|
|
2141 or a4, xh, xl
|
|
2142 beqz a4, 2f
|
|
2143
|
|
2144 /* Save the sign. */
|
|
2145 extui a7, xh, 31, 1
|
|
2146
|
|
2147 /* Get the absolute value. */
|
|
2148 bgez xh, .Lfloatdidf_normalize
|
|
2149 neg xl, xl
|
|
2150 neg xh, xh
|
|
2151 beqz xl, .Lfloatdidf_normalize
|
|
2152 addi xh, xh, -1
|
|
2153
|
|
2154 .Lfloatdidf_normalize:
|
|
2155 /* Normalize with the first 1 bit in the msb of xh. */
|
|
2156 beqz xh, .Lfloatdidf_bigshift
|
|
2157 do_nsau a4, xh, a5, a6
|
|
2158 ssl a4
|
|
2159 src xh, xh, xl
|
|
2160 sll xl, xl
|
|
2161
|
|
2162 .Lfloatdidf_shifted:
|
|
2163 /* Shift the mantissa into position, with rounding bits in a6. */
|
|
2164 ssai 11
|
|
2165 sll a6, xl
|
|
2166 src xl, xh, xl
|
|
2167 srl xh, xh
|
|
2168
|
|
2169 /* Set the exponent. */
|
|
2170 movi a5, 0x43d /* 0x3fe + 63 */
|
|
2171 sub a5, a5, a4
|
|
2172 slli a5, a5, 20
|
|
2173 add xh, xh, a5
|
|
2174
|
|
2175 /* Add the sign. */
|
|
2176 slli a7, a7, 31
|
|
2177 or xh, xh, a7
|
|
2178
|
|
2179 /* Round up if the leftover fraction is >= 1/2. */
|
|
2180 bgez a6, 2f
|
|
2181 addi xl, xl, 1
|
|
2182 beqz xl, .Lfloatdidf_roundcarry
|
|
2183
|
|
2184 /* Check if the leftover fraction is exactly 1/2. */
|
|
2185 slli a6, a6, 1
|
|
2186 beqz a6, .Lfloatdidf_exactlyhalf
|
|
2187 2: leaf_return
|
|
2188
|
|
2189 .Lfloatdidf_bigshift:
|
|
2190 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */
|
|
2191 do_nsau a4, xl, a5, a6
|
|
2192 ssl a4
|
|
2193 sll xh, xl
|
|
2194 movi xl, 0
|
|
2195 addi a4, a4, 32
|
|
2196 j .Lfloatdidf_shifted
|
|
2197
|
|
2198 .Lfloatdidf_exactlyhalf:
|
|
2199 /* Round down to the nearest even value. */
|
|
2200 srli xl, xl, 1
|
|
2201 slli xl, xl, 1
|
|
2202 leaf_return
|
|
2203
|
|
2204 .Lfloatdidf_roundcarry:
|
|
2205 /* xl is always zero when the rounding increment overflows, so
|
|
2206 there's no need to round it to an even value. */
|
|
2207 addi xh, xh, 1
|
|
2208 /* Overflow to the exponent is OK. */
|
|
2209 leaf_return
|
|
2210
|
|
2211 #endif /* L_floatdidf */
|
|
2212
|
|
2213 #ifdef L_truncdfsf2
|
|
2214
|
|
2215 .align 4
|
|
2216 .global __truncdfsf2
|
|
2217 .type __truncdfsf2, @function
|
|
2218 __truncdfsf2:
|
|
2219 leaf_entry sp, 16
|
|
2220
|
|
2221 /* Adjust the exponent bias. */
|
|
2222 movi a4, (0x3ff - 0x7f) << 20
|
|
2223 sub a5, xh, a4
|
|
2224
|
|
2225 /* Check for underflow. */
|
|
2226 xor a6, xh, a5
|
|
2227 bltz a6, .Ltrunc_underflow
|
|
2228 extui a6, a5, 20, 11
|
|
2229 beqz a6, .Ltrunc_underflow
|
|
2230
|
|
2231 /* Check for overflow. */
|
|
2232 movi a4, 255
|
|
2233 bge a6, a4, .Ltrunc_overflow
|
|
2234
|
|
2235 /* Shift a5/xl << 3 into a5/a4. */
|
|
2236 ssai (32 - 3)
|
|
2237 src a5, a5, xl
|
|
2238 sll a4, xl
|
|
2239
|
|
2240 .Ltrunc_addsign:
|
|
2241 /* Add the sign bit. */
|
|
2242 extui a6, xh, 31, 1
|
|
2243 slli a6, a6, 31
|
|
2244 or a2, a6, a5
|
|
2245
|
|
2246 /* Round up if the leftover fraction is >= 1/2. */
|
|
2247 bgez a4, 1f
|
|
2248 addi a2, a2, 1
|
|
2249 /* Overflow to the exponent is OK. The answer will be correct. */
|
|
2250
|
|
2251 /* Check if the leftover fraction is exactly 1/2. */
|
|
2252 slli a4, a4, 1
|
|
2253 beqz a4, .Ltrunc_exactlyhalf
|
|
2254 1: leaf_return
|
|
2255
|
|
2256 .Ltrunc_exactlyhalf:
|
|
2257 /* Round down to the nearest even value. */
|
|
2258 srli a2, a2, 1
|
|
2259 slli a2, a2, 1
|
|
2260 leaf_return
|
|
2261
|
|
2262 .Ltrunc_overflow:
|
|
2263 /* Check if exponent == 0x7ff. */
|
|
2264 movi a4, 0x7ff00000
|
|
2265 bnall xh, a4, 1f
|
|
2266
|
|
2267 /* Check if mantissa is nonzero. */
|
|
2268 slli a5, xh, 12
|
|
2269 or a5, a5, xl
|
|
2270 beqz a5, 1f
|
|
2271
|
|
2272 /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */
|
|
2273 srli a4, a4, 1
|
|
2274
|
|
2275 1: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */
|
|
2276 /* Add the sign bit. */
|
|
2277 extui a6, xh, 31, 1
|
|
2278 ssai 1
|
|
2279 src a2, a6, a4
|
|
2280 leaf_return
|
|
2281
|
|
2282 .Ltrunc_underflow:
|
|
2283 /* Find shift count for a subnormal. Flush to zero if >= 32. */
|
|
2284 extui a6, xh, 20, 11
|
|
2285 movi a5, 0x3ff - 0x7f
|
|
2286 sub a6, a5, a6
|
|
2287 addi a6, a6, 1
|
|
2288 bgeui a6, 32, 1f
|
|
2289
|
|
2290 /* Replace the exponent with an explicit "1.0". */
|
|
2291 slli a5, a5, 13 /* 0x700000 */
|
|
2292 or a5, a5, xh
|
|
2293 slli a5, a5, 11
|
|
2294 srli a5, a5, 11
|
|
2295
|
|
2296 /* Shift the mantissa left by 3 bits (into a5/a4). */
|
|
2297 ssai (32 - 3)
|
|
2298 src a5, a5, xl
|
|
2299 sll a4, xl
|
|
2300
|
|
2301 /* Shift right by a6. */
|
|
2302 ssr a6
|
|
2303 sll a7, a4
|
|
2304 src a4, a5, a4
|
|
2305 srl a5, a5
|
|
2306 beqz a7, .Ltrunc_addsign
|
|
2307 or a4, a4, a6 /* any positive, nonzero value will work */
|
|
2308 j .Ltrunc_addsign
|
|
2309
|
|
2310 /* Return +/- zero. */
|
|
2311 1: extui a2, xh, 31, 1
|
|
2312 slli a2, a2, 31
|
|
2313 leaf_return
|
|
2314
|
|
2315 #endif /* L_truncdfsf2 */
|
|
2316
|
|
2317 #ifdef L_extendsfdf2
|
|
2318
|
|
2319 .align 4
|
|
2320 .global __extendsfdf2
|
|
2321 .type __extendsfdf2, @function
|
|
2322 __extendsfdf2:
|
|
2323 leaf_entry sp, 16
|
|
2324
|
|
2325 /* Save the sign bit and then shift it off. */
|
|
2326 extui a5, a2, 31, 1
|
|
2327 slli a5, a5, 31
|
|
2328 slli a4, a2, 1
|
|
2329
|
|
2330 /* Extract and check the exponent. */
|
|
2331 extui a6, a2, 23, 8
|
|
2332 beqz a6, .Lextend_expzero
|
|
2333 addi a6, a6, 1
|
|
2334 beqi a6, 256, .Lextend_nan_or_inf
|
|
2335
|
|
2336 /* Shift >> 3 into a4/xl. */
|
|
2337 srli a4, a4, 4
|
|
2338 slli xl, a2, (32 - 3)
|
|
2339
|
|
2340 /* Adjust the exponent bias. */
|
|
2341 movi a6, (0x3ff - 0x7f) << 20
|
|
2342 add a4, a4, a6
|
|
2343
|
|
2344 /* Add the sign bit. */
|
|
2345 or xh, a4, a5
|
|
2346 leaf_return
|
|
2347
|
|
2348 .Lextend_nan_or_inf:
|
|
2349 movi a4, 0x7ff00000
|
|
2350
|
|
2351 /* Check for NaN. */
|
|
2352 slli a7, a2, 9
|
|
2353 beqz a7, 1f
|
|
2354
|
|
2355 slli a6, a6, 11 /* 0x80000 */
|
|
2356 or a4, a4, a6
|
|
2357
|
|
2358 /* Add the sign and return. */
|
|
2359 1: or xh, a4, a5
|
|
2360 movi xl, 0
|
|
2361 leaf_return
|
|
2362
|
|
2363 .Lextend_expzero:
|
|
2364 beqz a4, 1b
|
|
2365
|
|
2366 /* Normalize it to have 8 zero bits before the first 1 bit. */
|
|
2367 do_nsau a7, a4, a2, a3
|
|
2368 addi a7, a7, -8
|
|
2369 ssl a7
|
|
2370 sll a4, a4
|
|
2371
|
|
2372 /* Shift >> 3 into a4/xl. */
|
|
2373 slli xl, a4, (32 - 3)
|
|
2374 srli a4, a4, 3
|
|
2375
|
|
2376 /* Set the exponent. */
|
|
2377 movi a6, 0x3fe - 0x7f
|
|
2378 sub a6, a6, a7
|
|
2379 slli a6, a6, 20
|
|
2380 add a4, a4, a6
|
|
2381
|
|
2382 /* Add the sign and return. */
|
|
2383 or xh, a4, a5
|
|
2384 leaf_return
|
|
2385
|
|
2386 #endif /* L_extendsfdf2 */
|
|
2387
|
|
2388
|