Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/xtensa/ieee754-df.S @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* IEEE-754 double-precision functions for Xtensa | |
2 Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. | |
3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. | |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify it | |
8 under the terms of the GNU General Public License as published by | |
9 the Free Software Foundation; either version 3, or (at your option) | |
10 any later version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, but WITHOUT | |
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
15 License for more details. | |
16 | |
17 Under Section 7 of GPL version 3, you are granted additional | |
18 permissions described in the GCC Runtime Library Exception, version | |
19 3.1, as published by the Free Software Foundation. | |
20 | |
21 You should have received a copy of the GNU General Public License and | |
22 a copy of the GCC Runtime Library Exception along with this program; | |
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 <http://www.gnu.org/licenses/>. */ | |
25 | |
26 #ifdef __XTENSA_EB__ | |
27 #define xh a2 | |
28 #define xl a3 | |
29 #define yh a4 | |
30 #define yl a5 | |
31 #else | |
32 #define xh a3 | |
33 #define xl a2 | |
34 #define yh a5 | |
35 #define yl a4 | |
36 #endif | |
37 | |
38 /* Warning! The branch displacements for some Xtensa branch instructions | |
39 are quite small, and this code has been carefully laid out to keep | |
40 branch targets in range. If you change anything, be sure to check that | |
41 the assembler is not relaxing anything to branch over a jump. */ | |
42 | |
43 #ifdef L_negdf2 | |
44 | |
45 .align 4 | |
46 .global __negdf2 | |
47 .type __negdf2, @function | |
48 __negdf2: | |
49 leaf_entry sp, 16 | |
50 movi a4, 0x80000000 | |
51 xor xh, xh, a4 | |
52 leaf_return | |
53 | |
54 #endif /* L_negdf2 */ | |
55 | |
56 #ifdef L_addsubdf3 | |
57 | |
58 /* Addition */ | |
59 __adddf3_aux: | |
60 | |
61 /* Handle NaNs and Infinities. (This code is placed before the | |
62 start of the function just to keep it in range of the limited | |
63 branch displacements.) */ | |
64 | |
65 .Ladd_xnan_or_inf: | |
66 /* If y is neither Infinity nor NaN, return x. */ | |
67 bnall yh, a6, 1f | |
68 /* If x is a NaN, return it. Otherwise, return y. */ | |
69 slli a7, xh, 12 | |
70 or a7, a7, xl | |
71 beqz a7, .Ladd_ynan_or_inf | |
72 1: leaf_return | |
73 | |
74 .Ladd_ynan_or_inf: | |
75 /* Return y. */ | |
76 mov xh, yh | |
77 mov xl, yl | |
78 leaf_return | |
79 | |
80 .Ladd_opposite_signs: | |
81 /* Operand signs differ. Do a subtraction. */ | |
82 slli a7, a6, 11 | |
83 xor yh, yh, a7 | |
84 j .Lsub_same_sign | |
85 | |
86 .align 4 | |
87 .global __adddf3 | |
88 .type __adddf3, @function | |
89 __adddf3: | |
90 leaf_entry sp, 16 | |
91 movi a6, 0x7ff00000 | |
92 | |
93 /* Check if the two operands have the same sign. */ | |
94 xor a7, xh, yh | |
95 bltz a7, .Ladd_opposite_signs | |
96 | |
97 .Ladd_same_sign: | |
98 /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ | |
99 ball xh, a6, .Ladd_xnan_or_inf | |
100 ball yh, a6, .Ladd_ynan_or_inf | |
101 | |
102 /* Compare the exponents. The smaller operand will be shifted | |
103 right by the exponent difference and added to the larger | |
104 one. */ | |
105 extui a7, xh, 20, 12 | |
106 extui a8, yh, 20, 12 | |
107 bltu a7, a8, .Ladd_shiftx | |
108 | |
109 .Ladd_shifty: | |
110 /* Check if the smaller (or equal) exponent is zero. */ | |
111 bnone yh, a6, .Ladd_yexpzero | |
112 | |
113 /* Replace yh sign/exponent with 0x001. */ | |
114 or yh, yh, a6 | |
115 slli yh, yh, 11 | |
116 srli yh, yh, 11 | |
117 | |
118 .Ladd_yexpdiff: | |
119 /* Compute the exponent difference. Optimize for difference < 32. */ | |
120 sub a10, a7, a8 | |
121 bgeui a10, 32, .Ladd_bigshifty | |
122 | |
123 /* Shift yh/yl right by the exponent difference. Any bits that are | |
124 shifted out of yl are saved in a9 for rounding the result. */ | |
125 ssr a10 | |
126 movi a9, 0 | |
127 src a9, yl, a9 | |
128 src yl, yh, yl | |
129 srl yh, yh | |
130 | |
131 .Ladd_addy: | |
132 /* Do the 64-bit addition. */ | |
133 add xl, xl, yl | |
134 add xh, xh, yh | |
135 bgeu xl, yl, 1f | |
136 addi xh, xh, 1 | |
137 1: | |
138 /* Check if the add overflowed into the exponent. */ | |
139 extui a10, xh, 20, 12 | |
140 beq a10, a7, .Ladd_round | |
141 mov a8, a7 | |
142 j .Ladd_carry | |
143 | |
144 .Ladd_yexpzero: | |
145 /* y is a subnormal value. Replace its sign/exponent with zero, | |
146 i.e., no implicit "1.0", and increment the apparent exponent | |
147 because subnormals behave as if they had the minimum (nonzero) | |
148 exponent. Test for the case when both exponents are zero. */ | |
149 slli yh, yh, 12 | |
150 srli yh, yh, 12 | |
151 bnone xh, a6, .Ladd_bothexpzero | |
152 addi a8, a8, 1 | |
153 j .Ladd_yexpdiff | |
154 | |
155 .Ladd_bothexpzero: | |
156 /* Both exponents are zero. Handle this as a special case. There | |
157 is no need to shift or round, and the normal code for handling | |
158 a carry into the exponent field will not work because it | |
159 assumes there is an implicit "1.0" that needs to be added. */ | |
160 add xl, xl, yl | |
161 add xh, xh, yh | |
162 bgeu xl, yl, 1f | |
163 addi xh, xh, 1 | |
164 1: leaf_return | |
165 | |
166 .Ladd_bigshifty: | |
167 /* Exponent difference > 64 -- just return the bigger value. */ | |
168 bgeui a10, 64, 1b | |
169 | |
170 /* Shift yh/yl right by the exponent difference. Any bits that are | |
171 shifted out are saved in a9 for rounding the result. */ | |
172 ssr a10 | |
173 sll a11, yl /* lost bits shifted out of yl */ | |
174 src a9, yh, yl | |
175 srl yl, yh | |
176 movi yh, 0 | |
177 beqz a11, .Ladd_addy | |
178 or a9, a9, a10 /* any positive, nonzero value will work */ | |
179 j .Ladd_addy | |
180 | |
181 .Ladd_xexpzero: | |
182 /* Same as "yexpzero" except skip handling the case when both | |
183 exponents are zero. */ | |
184 slli xh, xh, 12 | |
185 srli xh, xh, 12 | |
186 addi a7, a7, 1 | |
187 j .Ladd_xexpdiff | |
188 | |
189 .Ladd_shiftx: | |
190 /* Same thing as the "shifty" code, but with x and y swapped. Also, | |
191 because the exponent difference is always nonzero in this version, | |
192 the shift sequence can use SLL and skip loading a constant zero. */ | |
193 bnone xh, a6, .Ladd_xexpzero | |
194 | |
195 or xh, xh, a6 | |
196 slli xh, xh, 11 | |
197 srli xh, xh, 11 | |
198 | |
199 .Ladd_xexpdiff: | |
200 sub a10, a8, a7 | |
201 bgeui a10, 32, .Ladd_bigshiftx | |
202 | |
203 ssr a10 | |
204 sll a9, xl | |
205 src xl, xh, xl | |
206 srl xh, xh | |
207 | |
208 .Ladd_addx: | |
209 add xl, xl, yl | |
210 add xh, xh, yh | |
211 bgeu xl, yl, 1f | |
212 addi xh, xh, 1 | |
213 1: | |
214 /* Check if the add overflowed into the exponent. */ | |
215 extui a10, xh, 20, 12 | |
216 bne a10, a8, .Ladd_carry | |
217 | |
218 .Ladd_round: | |
219 /* Round up if the leftover fraction is >= 1/2. */ | |
220 bgez a9, 1f | |
221 addi xl, xl, 1 | |
222 beqz xl, .Ladd_roundcarry | |
223 | |
224 /* Check if the leftover fraction is exactly 1/2. */ | |
225 slli a9, a9, 1 | |
226 beqz a9, .Ladd_exactlyhalf | |
227 1: leaf_return | |
228 | |
229 .Ladd_bigshiftx: | |
230 /* Mostly the same thing as "bigshifty".... */ | |
231 bgeui a10, 64, .Ladd_returny | |
232 | |
233 ssr a10 | |
234 sll a11, xl | |
235 src a9, xh, xl | |
236 srl xl, xh | |
237 movi xh, 0 | |
238 beqz a11, .Ladd_addx | |
239 or a9, a9, a10 | |
240 j .Ladd_addx | |
241 | |
242 .Ladd_returny: | |
243 mov xh, yh | |
244 mov xl, yl | |
245 leaf_return | |
246 | |
247 .Ladd_carry: | |
248 /* The addition has overflowed into the exponent field, so the | |
249 value needs to be renormalized. The mantissa of the result | |
250 can be recovered by subtracting the original exponent and | |
251 adding 0x100000 (which is the explicit "1.0" for the | |
252 mantissa of the non-shifted operand -- the "1.0" for the | |
253 shifted operand was already added). The mantissa can then | |
254 be shifted right by one bit. The explicit "1.0" of the | |
255 shifted mantissa then needs to be replaced by the exponent, | |
256 incremented by one to account for the normalizing shift. | |
257 It is faster to combine these operations: do the shift first | |
258 and combine the additions and subtractions. If x is the | |
259 original exponent, the result is: | |
260 shifted mantissa - (x << 19) + (1 << 19) + (x << 20) | |
261 or: | |
262 shifted mantissa + ((x + 1) << 19) | |
263 Note that the exponent is incremented here by leaving the | |
264 explicit "1.0" of the mantissa in the exponent field. */ | |
265 | |
266 /* Shift xh/xl right by one bit. Save the lsb of xl. */ | |
267 mov a10, xl | |
268 ssai 1 | |
269 src xl, xh, xl | |
270 srl xh, xh | |
271 | |
272 /* See explanation above. The original exponent is in a8. */ | |
273 addi a8, a8, 1 | |
274 slli a8, a8, 19 | |
275 add xh, xh, a8 | |
276 | |
277 /* Return an Infinity if the exponent overflowed. */ | |
278 ball xh, a6, .Ladd_infinity | |
279 | |
280 /* Same thing as the "round" code except the msb of the leftover | |
281 fraction is bit 0 of a10, with the rest of the fraction in a9. */ | |
282 bbci.l a10, 0, 1f | |
283 addi xl, xl, 1 | |
284 beqz xl, .Ladd_roundcarry | |
285 beqz a9, .Ladd_exactlyhalf | |
286 1: leaf_return | |
287 | |
288 .Ladd_infinity: | |
289 /* Clear the mantissa. */ | |
290 movi xl, 0 | |
291 srli xh, xh, 20 | |
292 slli xh, xh, 20 | |
293 | |
294 /* The sign bit may have been lost in a carry-out. Put it back. */ | |
295 slli a8, a8, 1 | |
296 or xh, xh, a8 | |
297 leaf_return | |
298 | |
299 .Ladd_exactlyhalf: | |
300 /* Round down to the nearest even value. */ | |
301 srli xl, xl, 1 | |
302 slli xl, xl, 1 | |
303 leaf_return | |
304 | |
305 .Ladd_roundcarry: | |
306 /* xl is always zero when the rounding increment overflows, so | |
307 there's no need to round it to an even value. */ | |
308 addi xh, xh, 1 | |
309 /* Overflow to the exponent is OK. */ | |
310 leaf_return | |
311 | |
312 | |
313 /* Subtraction */ | |
314 __subdf3_aux: | |
315 | |
316 /* Handle NaNs and Infinities. (This code is placed before the | |
317 start of the function just to keep it in range of the limited | |
318 branch displacements.) */ | |
319 | |
320 .Lsub_xnan_or_inf: | |
321 /* If y is neither Infinity nor NaN, return x. */ | |
322 bnall yh, a6, 1f | |
323 /* Both x and y are either NaN or Inf, so the result is NaN. */ | |
324 movi a4, 0x80000 /* make it a quiet NaN */ | |
325 or xh, xh, a4 | |
326 1: leaf_return | |
327 | |
328 .Lsub_ynan_or_inf: | |
329 /* Negate y and return it. */ | |
330 slli a7, a6, 11 | |
331 xor xh, yh, a7 | |
332 mov xl, yl | |
333 leaf_return | |
334 | |
335 .Lsub_opposite_signs: | |
336 /* Operand signs differ. Do an addition. */ | |
337 slli a7, a6, 11 | |
338 xor yh, yh, a7 | |
339 j .Ladd_same_sign | |
340 | |
341 .align 4 | |
342 .global __subdf3 | |
343 .type __subdf3, @function | |
344 __subdf3: | |
345 leaf_entry sp, 16 | |
346 movi a6, 0x7ff00000 | |
347 | |
348 /* Check if the two operands have the same sign. */ | |
349 xor a7, xh, yh | |
350 bltz a7, .Lsub_opposite_signs | |
351 | |
352 .Lsub_same_sign: | |
353 /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ | |
354 ball xh, a6, .Lsub_xnan_or_inf | |
355 ball yh, a6, .Lsub_ynan_or_inf | |
356 | |
357 /* Compare the operands. In contrast to addition, the entire | |
358 value matters here. */ | |
359 extui a7, xh, 20, 11 | |
360 extui a8, yh, 20, 11 | |
361 bltu xh, yh, .Lsub_xsmaller | |
362 beq xh, yh, .Lsub_compare_low | |
363 | |
364 .Lsub_ysmaller: | |
365 /* Check if the smaller (or equal) exponent is zero. */ | |
366 bnone yh, a6, .Lsub_yexpzero | |
367 | |
368 /* Replace yh sign/exponent with 0x001. */ | |
369 or yh, yh, a6 | |
370 slli yh, yh, 11 | |
371 srli yh, yh, 11 | |
372 | |
373 .Lsub_yexpdiff: | |
374 /* Compute the exponent difference. Optimize for difference < 32. */ | |
375 sub a10, a7, a8 | |
376 bgeui a10, 32, .Lsub_bigshifty | |
377 | |
378 /* Shift yh/yl right by the exponent difference. Any bits that are | |
379 shifted out of yl are saved in a9 for rounding the result. */ | |
380 ssr a10 | |
381 movi a9, 0 | |
382 src a9, yl, a9 | |
383 src yl, yh, yl | |
384 srl yh, yh | |
385 | |
386 .Lsub_suby: | |
387 /* Do the 64-bit subtraction. */ | |
388 sub xh, xh, yh | |
389 bgeu xl, yl, 1f | |
390 addi xh, xh, -1 | |
391 1: sub xl, xl, yl | |
392 | |
393 /* Subtract the leftover bits in a9 from zero and propagate any | |
394 borrow from xh/xl. */ | |
395 neg a9, a9 | |
396 beqz a9, 1f | |
397 addi a5, xh, -1 | |
398 moveqz xh, a5, xl | |
399 addi xl, xl, -1 | |
400 1: | |
401 /* Check if the subtract underflowed into the exponent. */ | |
402 extui a10, xh, 20, 11 | |
403 beq a10, a7, .Lsub_round | |
404 j .Lsub_borrow | |
405 | |
406 .Lsub_compare_low: | |
407 /* The high words are equal. Compare the low words. */ | |
408 bltu xl, yl, .Lsub_xsmaller | |
409 bltu yl, xl, .Lsub_ysmaller | |
410 /* The operands are equal. Return 0.0. */ | |
411 movi xh, 0 | |
412 movi xl, 0 | |
413 1: leaf_return | |
414 | |
415 .Lsub_yexpzero: | |
416 /* y is a subnormal value. Replace its sign/exponent with zero, | |
417 i.e., no implicit "1.0". Unless x is also a subnormal, increment | |
418 y's apparent exponent because subnormals behave as if they had | |
419 the minimum (nonzero) exponent. */ | |
420 slli yh, yh, 12 | |
421 srli yh, yh, 12 | |
422 bnone xh, a6, .Lsub_yexpdiff | |
423 addi a8, a8, 1 | |
424 j .Lsub_yexpdiff | |
425 | |
426 .Lsub_bigshifty: | |
427 /* Exponent difference > 64 -- just return the bigger value. */ | |
428 bgeui a10, 64, 1b | |
429 | |
430 /* Shift yh/yl right by the exponent difference. Any bits that are | |
431 shifted out are saved in a9 for rounding the result. */ | |
432 ssr a10 | |
433 sll a11, yl /* lost bits shifted out of yl */ | |
434 src a9, yh, yl | |
435 srl yl, yh | |
436 movi yh, 0 | |
437 beqz a11, .Lsub_suby | |
438 or a9, a9, a10 /* any positive, nonzero value will work */ | |
439 j .Lsub_suby | |
440 | |
441 .Lsub_xsmaller: | |
442 /* Same thing as the "ysmaller" code, but with x and y swapped and | |
443 with y negated. */ | |
444 bnone xh, a6, .Lsub_xexpzero | |
445 | |
446 or xh, xh, a6 | |
447 slli xh, xh, 11 | |
448 srli xh, xh, 11 | |
449 | |
450 .Lsub_xexpdiff: | |
451 sub a10, a8, a7 | |
452 bgeui a10, 32, .Lsub_bigshiftx | |
453 | |
454 ssr a10 | |
455 movi a9, 0 | |
456 src a9, xl, a9 | |
457 src xl, xh, xl | |
458 srl xh, xh | |
459 | |
460 /* Negate y. */ | |
461 slli a11, a6, 11 | |
462 xor yh, yh, a11 | |
463 | |
464 .Lsub_subx: | |
465 sub xl, yl, xl | |
466 sub xh, yh, xh | |
467 bgeu yl, xl, 1f | |
468 addi xh, xh, -1 | |
469 1: | |
470 /* Subtract the leftover bits in a9 from zero and propagate any | |
471 borrow from xh/xl. */ | |
472 neg a9, a9 | |
473 beqz a9, 1f | |
474 addi a5, xh, -1 | |
475 moveqz xh, a5, xl | |
476 addi xl, xl, -1 | |
477 1: | |
478 /* Check if the subtract underflowed into the exponent. */ | |
479 extui a10, xh, 20, 11 | |
480 bne a10, a8, .Lsub_borrow | |
481 | |
482 .Lsub_round: | |
483 /* Round up if the leftover fraction is >= 1/2. */ | |
484 bgez a9, 1f | |
485 addi xl, xl, 1 | |
486 beqz xl, .Lsub_roundcarry | |
487 | |
488 /* Check if the leftover fraction is exactly 1/2. */ | |
489 slli a9, a9, 1 | |
490 beqz a9, .Lsub_exactlyhalf | |
491 1: leaf_return | |
492 | |
493 .Lsub_xexpzero: | |
494 /* Same as "yexpzero". */ | |
495 slli xh, xh, 12 | |
496 srli xh, xh, 12 | |
497 bnone yh, a6, .Lsub_xexpdiff | |
498 addi a7, a7, 1 | |
499 j .Lsub_xexpdiff | |
500 | |
501 .Lsub_bigshiftx: | |
502 /* Mostly the same thing as "bigshifty", but with the sign bit of the | |
503 shifted value set so that the subsequent subtraction flips the | |
504 sign of y. */ | |
505 bgeui a10, 64, .Lsub_returny | |
506 | |
507 ssr a10 | |
508 sll a11, xl | |
509 src a9, xh, xl | |
510 srl xl, xh | |
511 slli xh, a6, 11 /* set sign bit of xh */ | |
512 beqz a11, .Lsub_subx | |
513 or a9, a9, a10 | |
514 j .Lsub_subx | |
515 | |
516 .Lsub_returny: | |
517 /* Negate and return y. */ | |
518 slli a7, a6, 11 | |
519 xor xh, yh, a7 | |
520 mov xl, yl | |
521 leaf_return | |
522 | |
523 .Lsub_borrow: | |
524 /* The subtraction has underflowed into the exponent field, so the | |
525 value needs to be renormalized. Shift the mantissa left as | |
526 needed to remove any leading zeros and adjust the exponent | |
527 accordingly. If the exponent is not large enough to remove | |
528 all the leading zeros, the result will be a subnormal value. */ | |
529 | |
530 slli a8, xh, 12 | |
531 beqz a8, .Lsub_xhzero | |
532 do_nsau a6, a8, a7, a11 | |
533 srli a8, a8, 12 | |
534 bge a6, a10, .Lsub_subnormal | |
535 addi a6, a6, 1 | |
536 | |
537 .Lsub_shift_lt32: | |
538 /* Shift the mantissa (a8/xl/a9) left by a6. */ | |
539 ssl a6 | |
540 src a8, a8, xl | |
541 src xl, xl, a9 | |
542 sll a9, a9 | |
543 | |
544 /* Combine the shifted mantissa with the sign and exponent, | |
545 decrementing the exponent by a6. (The exponent has already | |
546 been decremented by one due to the borrow from the subtraction, | |
547 but adding the mantissa will increment the exponent by one.) */ | |
548 srli xh, xh, 20 | |
549 sub xh, xh, a6 | |
550 slli xh, xh, 20 | |
551 add xh, xh, a8 | |
552 j .Lsub_round | |
553 | |
554 .Lsub_exactlyhalf: | |
555 /* Round down to the nearest even value. */ | |
556 srli xl, xl, 1 | |
557 slli xl, xl, 1 | |
558 leaf_return | |
559 | |
560 .Lsub_roundcarry: | |
561 /* xl is always zero when the rounding increment overflows, so | |
562 there's no need to round it to an even value. */ | |
563 addi xh, xh, 1 | |
564 /* Overflow to the exponent is OK. */ | |
565 leaf_return | |
566 | |
567 .Lsub_xhzero: | |
568 /* When normalizing the result, all the mantissa bits in the high | |
569 word are zero. Shift by "20 + (leading zero count of xl) + 1". */ | |
570 do_nsau a6, xl, a7, a11 | |
571 addi a6, a6, 21 | |
572 blt a10, a6, .Lsub_subnormal | |
573 | |
574 .Lsub_normalize_shift: | |
575 bltui a6, 32, .Lsub_shift_lt32 | |
576 | |
577 ssl a6 | |
578 src a8, xl, a9 | |
579 sll xl, a9 | |
580 movi a9, 0 | |
581 | |
582 srli xh, xh, 20 | |
583 sub xh, xh, a6 | |
584 slli xh, xh, 20 | |
585 add xh, xh, a8 | |
586 j .Lsub_round | |
587 | |
588 .Lsub_subnormal: | |
589 /* The exponent is too small to shift away all the leading zeros. | |
590 Set a6 to the current exponent (which has already been | |
591 decremented by the borrow) so that the exponent of the result | |
592 will be zero. Do not add 1 to a6 in this case, because: (1) | |
593 adding the mantissa will not increment the exponent, so there is | |
594 no need to subtract anything extra from the exponent to | |
595 compensate, and (2) the effective exponent of a subnormal is 1 | |
596 not 0 so the shift amount must be 1 smaller than normal. */ | |
597 mov a6, a10 | |
598 j .Lsub_normalize_shift | |
599 | |
600 #endif /* L_addsubdf3 */ | |
601 | |
602 #ifdef L_muldf3 | |
603 | |
604 /* Multiplication */ | |
605 #if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 | |
606 #define XCHAL_NO_MUL 1 | |
607 #endif | |
608 | |
609 __muldf3_aux: | |
610 | |
611 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). | |
612 (This code is placed before the start of the function just to | |
613 keep it in range of the limited branch displacements.) */ | |
614 | |
615 .Lmul_xexpzero: | |
616 /* Clear the sign bit of x. */ | |
617 slli xh, xh, 1 | |
618 srli xh, xh, 1 | |
619 | |
620 /* If x is zero, return zero. */ | |
621 or a10, xh, xl | |
622 beqz a10, .Lmul_return_zero | |
623 | |
624 /* Normalize x. Adjust the exponent in a8. */ | |
625 beqz xh, .Lmul_xh_zero | |
626 do_nsau a10, xh, a11, a12 | |
627 addi a10, a10, -11 | |
628 ssl a10 | |
629 src xh, xh, xl | |
630 sll xl, xl | |
631 movi a8, 1 | |
632 sub a8, a8, a10 | |
633 j .Lmul_xnormalized | |
634 .Lmul_xh_zero: | |
635 do_nsau a10, xl, a11, a12 | |
636 addi a10, a10, -11 | |
637 movi a8, -31 | |
638 sub a8, a8, a10 | |
639 ssl a10 | |
640 bltz a10, .Lmul_xl_srl | |
641 sll xh, xl | |
642 movi xl, 0 | |
643 j .Lmul_xnormalized | |
644 .Lmul_xl_srl: | |
645 srl xh, xl | |
646 sll xl, xl | |
647 j .Lmul_xnormalized | |
648 | |
649 .Lmul_yexpzero: | |
650 /* Clear the sign bit of y. */ | |
651 slli yh, yh, 1 | |
652 srli yh, yh, 1 | |
653 | |
654 /* If y is zero, return zero. */ | |
655 or a10, yh, yl | |
656 beqz a10, .Lmul_return_zero | |
657 | |
658 /* Normalize y. Adjust the exponent in a9. */ | |
659 beqz yh, .Lmul_yh_zero | |
660 do_nsau a10, yh, a11, a12 | |
661 addi a10, a10, -11 | |
662 ssl a10 | |
663 src yh, yh, yl | |
664 sll yl, yl | |
665 movi a9, 1 | |
666 sub a9, a9, a10 | |
667 j .Lmul_ynormalized | |
668 .Lmul_yh_zero: | |
669 do_nsau a10, yl, a11, a12 | |
670 addi a10, a10, -11 | |
671 movi a9, -31 | |
672 sub a9, a9, a10 | |
673 ssl a10 | |
674 bltz a10, .Lmul_yl_srl | |
675 sll yh, yl | |
676 movi yl, 0 | |
677 j .Lmul_ynormalized | |
678 .Lmul_yl_srl: | |
679 srl yh, yl | |
680 sll yl, yl | |
681 j .Lmul_ynormalized | |
682 | |
683 .Lmul_return_zero: | |
684 /* Return zero with the appropriate sign bit. */ | |
685 srli xh, a7, 31 | |
686 slli xh, xh, 31 | |
687 movi xl, 0 | |
688 j .Lmul_done | |
689 | |
690 .Lmul_xnan_or_inf: | |
691 /* If y is zero, return NaN. */ | |
692 bnez yl, 1f | |
693 slli a8, yh, 1 | |
694 bnez a8, 1f | |
695 movi a4, 0x80000 /* make it a quiet NaN */ | |
696 or xh, xh, a4 | |
697 j .Lmul_done | |
698 1: | |
699 /* If y is NaN, return y. */ | |
700 bnall yh, a6, .Lmul_returnx | |
701 slli a8, yh, 12 | |
702 or a8, a8, yl | |
703 beqz a8, .Lmul_returnx | |
704 | |
705 .Lmul_returny: | |
706 mov xh, yh | |
707 mov xl, yl | |
708 | |
709 .Lmul_returnx: | |
710 /* Set the sign bit and return. */ | |
711 extui a7, a7, 31, 1 | |
712 slli xh, xh, 1 | |
713 ssai 1 | |
714 src xh, a7, xh | |
715 j .Lmul_done | |
716 | |
717 .Lmul_ynan_or_inf: | |
718 /* If x is zero, return NaN. */ | |
719 bnez xl, .Lmul_returny | |
720 slli a8, xh, 1 | |
721 bnez a8, .Lmul_returny | |
722 movi a7, 0x80000 /* make it a quiet NaN */ | |
723 or xh, yh, a7 | |
724 j .Lmul_done | |
725 | |
726 .align 4 | |
727 .global __muldf3 | |
728 .type __muldf3, @function | |
729 __muldf3: | |
730 #if __XTENSA_CALL0_ABI__ | |
731 leaf_entry sp, 32 | |
732 addi sp, sp, -32 | |
733 s32i a12, sp, 16 | |
734 s32i a13, sp, 20 | |
735 s32i a14, sp, 24 | |
736 s32i a15, sp, 28 | |
737 #elif XCHAL_NO_MUL | |
738 /* This is not really a leaf function; allocate enough stack space | |
739 to allow CALL12s to a helper function. */ | |
740 leaf_entry sp, 64 | |
741 #else | |
742 leaf_entry sp, 32 | |
743 #endif | |
744 movi a6, 0x7ff00000 | |
745 | |
746 /* Get the sign of the result. */ | |
747 xor a7, xh, yh | |
748 | |
749 /* Check for NaN and infinity. */ | |
750 ball xh, a6, .Lmul_xnan_or_inf | |
751 ball yh, a6, .Lmul_ynan_or_inf | |
752 | |
753 /* Extract the exponents. */ | |
754 extui a8, xh, 20, 11 | |
755 extui a9, yh, 20, 11 | |
756 | |
757 beqz a8, .Lmul_xexpzero | |
758 .Lmul_xnormalized: | |
759 beqz a9, .Lmul_yexpzero | |
760 .Lmul_ynormalized: | |
761 | |
762 /* Add the exponents. */ | |
763 add a8, a8, a9 | |
764 | |
765 /* Replace sign/exponent fields with explicit "1.0". */ | |
766 movi a10, 0x1fffff | |
767 or xh, xh, a6 | |
768 and xh, xh, a10 | |
769 or yh, yh, a6 | |
770 and yh, yh, a10 | |
771 | |
772 /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6. | |
773 The least-significant word of the result is thrown away except | |
774 that if it is nonzero, the lsb of a6 is set to 1. */ | |
775 #if XCHAL_HAVE_MUL32_HIGH | |
776 | |
777 /* Compute a6 with any carry-outs in a10. */ | |
778 movi a10, 0 | |
779 mull a6, xl, yh | |
780 mull a11, xh, yl | |
781 add a6, a6, a11 | |
782 bgeu a6, a11, 1f | |
783 addi a10, a10, 1 | |
784 1: | |
785 muluh a11, xl, yl | |
786 add a6, a6, a11 | |
787 bgeu a6, a11, 1f | |
788 addi a10, a10, 1 | |
789 1: | |
790 /* If the low word of the result is nonzero, set the lsb of a6. */ | |
791 mull a11, xl, yl | |
792 beqz a11, 1f | |
793 movi a9, 1 | |
794 or a6, a6, a9 | |
795 1: | |
796 /* Compute xl with any carry-outs in a9. */ | |
797 movi a9, 0 | |
798 mull a11, xh, yh | |
799 add a10, a10, a11 | |
800 bgeu a10, a11, 1f | |
801 addi a9, a9, 1 | |
802 1: | |
803 muluh a11, xh, yl | |
804 add a10, a10, a11 | |
805 bgeu a10, a11, 1f | |
806 addi a9, a9, 1 | |
807 1: | |
808 muluh xl, xl, yh | |
809 add xl, xl, a10 | |
810 bgeu xl, a10, 1f | |
811 addi a9, a9, 1 | |
812 1: | |
813 /* Compute xh. */ | |
814 muluh xh, xh, yh | |
815 add xh, xh, a9 | |
816 | |
817 #else /* ! XCHAL_HAVE_MUL32_HIGH */ | |
818 | |
819 /* Break the inputs into 16-bit chunks and compute 16 32-bit partial | |
820 products. These partial products are: | |
821 | |
822 0 xll * yll | |
823 | |
824 1 xll * ylh | |
825 2 xlh * yll | |
826 | |
827 3 xll * yhl | |
828 4 xlh * ylh | |
829 5 xhl * yll | |
830 | |
831 6 xll * yhh | |
832 7 xlh * yhl | |
833 8 xhl * ylh | |
834 9 xhh * yll | |
835 | |
836 10 xlh * yhh | |
837 11 xhl * yhl | |
838 12 xhh * ylh | |
839 | |
840 13 xhl * yhh | |
841 14 xhh * yhl | |
842 | |
843 15 xhh * yhh | |
844 | |
845 where the input chunks are (hh, hl, lh, ll). If using the Mul16 | |
846 or Mul32 multiplier options, these input chunks must be stored in | |
847 separate registers. For Mac16, the UMUL.AA.* opcodes can specify | |
848 that the inputs come from either half of the registers, so there | |
849 is no need to shift them out ahead of time. If there is no | |
850 multiply hardware, the 16-bit chunks can be extracted when setting | |
851 up the arguments to the separate multiply function. */ | |
852 | |
853 /* Save a7 since it is needed to hold a temporary value. */ | |
854 s32i a7, sp, 4 | |
855 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL | |
856 /* Calling a separate multiply function will clobber a0 and requires | |
857 use of a8 as a temporary, so save those values now. (The function | |
858 uses a custom ABI so nothing else needs to be saved.) */ | |
859 s32i a0, sp, 0 | |
860 s32i a8, sp, 8 | |
861 #endif | |
862 | |
863 #if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 | |
864 | |
865 #define xlh a12 | |
866 #define ylh a13 | |
867 #define xhh a14 | |
868 #define yhh a15 | |
869 | |
870 /* Get the high halves of the inputs into registers. */ | |
871 srli xlh, xl, 16 | |
872 srli ylh, yl, 16 | |
873 srli xhh, xh, 16 | |
874 srli yhh, yh, 16 | |
875 | |
876 #define xll xl | |
877 #define yll yl | |
878 #define xhl xh | |
879 #define yhl yh | |
880 | |
881 #if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 | |
882 /* Clear the high halves of the inputs. This does not matter | |
883 for MUL16 because the high bits are ignored. */ | |
884 extui xl, xl, 0, 16 | |
885 extui xh, xh, 0, 16 | |
886 extui yl, yl, 0, 16 | |
887 extui yh, yh, 0, 16 | |
888 #endif | |
889 #endif /* MUL16 || MUL32 */ | |
890 | |
891 | |
892 #if XCHAL_HAVE_MUL16 | |
893 | |
894 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | |
895 mul16u dst, xreg ## xhalf, yreg ## yhalf | |
896 | |
897 #elif XCHAL_HAVE_MUL32 | |
898 | |
899 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | |
900 mull dst, xreg ## xhalf, yreg ## yhalf | |
901 | |
902 #elif XCHAL_HAVE_MAC16 | |
903 | |
904 /* The preprocessor insists on inserting a space when concatenating after | |
905 a period in the definition of do_mul below. These macros are a workaround | |
906 using underscores instead of periods when doing the concatenation. */ | |
907 #define umul_aa_ll umul.aa.ll | |
908 #define umul_aa_lh umul.aa.lh | |
909 #define umul_aa_hl umul.aa.hl | |
910 #define umul_aa_hh umul.aa.hh | |
911 | |
912 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | |
913 umul_aa_ ## xhalf ## yhalf xreg, yreg; \ | |
914 rsr dst, ACCLO | |
915 | |
916 #else /* no multiply hardware */ | |
917 | |
918 #define set_arg_l(dst, src) \ | |
919 extui dst, src, 0, 16 | |
920 #define set_arg_h(dst, src) \ | |
921 srli dst, src, 16 | |
922 | |
923 #if __XTENSA_CALL0_ABI__ | |
924 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | |
925 set_arg_ ## xhalf (a13, xreg); \ | |
926 set_arg_ ## yhalf (a14, yreg); \ | |
927 call0 .Lmul_mulsi3; \ | |
928 mov dst, a12 | |
929 #else | |
930 #define do_mul(dst, xreg, xhalf, yreg, yhalf) \ | |
931 set_arg_ ## xhalf (a14, xreg); \ | |
932 set_arg_ ## yhalf (a15, yreg); \ | |
933 call12 .Lmul_mulsi3; \ | |
934 mov dst, a14 | |
935 #endif /* __XTENSA_CALL0_ABI__ */ | |
936 | |
937 #endif /* no multiply hardware */ | |
938 | |
939 /* Add pp1 and pp2 into a10 with carry-out in a9. */ | |
940 do_mul(a10, xl, l, yl, h) /* pp 1 */ | |
941 do_mul(a11, xl, h, yl, l) /* pp 2 */ | |
942 movi a9, 0 | |
943 add a10, a10, a11 | |
944 bgeu a10, a11, 1f | |
945 addi a9, a9, 1 | |
946 1: | |
947 /* Initialize a6 with a9/a10 shifted into position. Note that | |
948 this value can be safely incremented without any carry-outs. */ | |
949 ssai 16 | |
950 src a6, a9, a10 | |
951 | |
952 /* Compute the low word into a10. */ | |
953 do_mul(a11, xl, l, yl, l) /* pp 0 */ | |
954 sll a10, a10 | |
955 add a10, a10, a11 | |
956 bgeu a10, a11, 1f | |
957 addi a6, a6, 1 | |
958 1: | |
959 /* Compute the contributions of pp0-5 to a6, with carry-outs in a9. | |
960 This is good enough to determine the low half of a6, so that any | |
961 nonzero bits from the low word of the result can be collapsed | |
962 into a6, freeing up a register. */ | |
963 movi a9, 0 | |
964 do_mul(a11, xl, l, yh, l) /* pp 3 */ | |
965 add a6, a6, a11 | |
966 bgeu a6, a11, 1f | |
967 addi a9, a9, 1 | |
968 1: | |
969 do_mul(a11, xl, h, yl, h) /* pp 4 */ | |
970 add a6, a6, a11 | |
971 bgeu a6, a11, 1f | |
972 addi a9, a9, 1 | |
973 1: | |
974 do_mul(a11, xh, l, yl, l) /* pp 5 */ | |
975 add a6, a6, a11 | |
976 bgeu a6, a11, 1f | |
977 addi a9, a9, 1 | |
978 1: | |
979 /* Collapse any nonzero bits from the low word into a6. */ | |
980 beqz a10, 1f | |
981 movi a11, 1 | |
982 or a6, a6, a11 | |
983 1: | |
984 /* Add pp6-9 into a11 with carry-outs in a10. */ | |
985 do_mul(a7, xl, l, yh, h) /* pp 6 */ | |
986 do_mul(a11, xh, h, yl, l) /* pp 9 */ | |
987 movi a10, 0 | |
988 add a11, a11, a7 | |
989 bgeu a11, a7, 1f | |
990 addi a10, a10, 1 | |
991 1: | |
992 do_mul(a7, xl, h, yh, l) /* pp 7 */ | |
993 add a11, a11, a7 | |
994 bgeu a11, a7, 1f | |
995 addi a10, a10, 1 | |
996 1: | |
997 do_mul(a7, xh, l, yl, h) /* pp 8 */ | |
998 add a11, a11, a7 | |
999 bgeu a11, a7, 1f | |
1000 addi a10, a10, 1 | |
1001 1: | |
1002 /* Shift a10/a11 into position, and add low half of a11 to a6. */ | |
1003 src a10, a10, a11 | |
1004 add a10, a10, a9 | |
1005 sll a11, a11 | |
1006 add a6, a6, a11 | |
1007 bgeu a6, a11, 1f | |
1008 addi a10, a10, 1 | |
1009 1: | |
1010 /* Add pp10-12 into xl with carry-outs in a9. */ | |
1011 movi a9, 0 | |
1012 do_mul(xl, xl, h, yh, h) /* pp 10 */ | |
1013 add xl, xl, a10 | |
1014 bgeu xl, a10, 1f | |
1015 addi a9, a9, 1 | |
1016 1: | |
1017 do_mul(a10, xh, l, yh, l) /* pp 11 */ | |
1018 add xl, xl, a10 | |
1019 bgeu xl, a10, 1f | |
1020 addi a9, a9, 1 | |
1021 1: | |
1022 do_mul(a10, xh, h, yl, h) /* pp 12 */ | |
1023 add xl, xl, a10 | |
1024 bgeu xl, a10, 1f | |
1025 addi a9, a9, 1 | |
1026 1: | |
1027 /* Add pp13-14 into a11 with carry-outs in a10. */ | |
1028 do_mul(a11, xh, l, yh, h) /* pp 13 */ | |
1029 do_mul(a7, xh, h, yh, l) /* pp 14 */ | |
1030 movi a10, 0 | |
1031 add a11, a11, a7 | |
1032 bgeu a11, a7, 1f | |
1033 addi a10, a10, 1 | |
1034 1: | |
1035 /* Shift a10/a11 into position, and add low half of a11 to a6. */ | |
1036 src a10, a10, a11 | |
1037 add a10, a10, a9 | |
1038 sll a11, a11 | |
1039 add xl, xl, a11 | |
1040 bgeu xl, a11, 1f | |
1041 addi a10, a10, 1 | |
1042 1: | |
1043 /* Compute xh. */ | |
1044 do_mul(xh, xh, h, yh, h) /* pp 15 */ | |
1045 add xh, xh, a10 | |
1046 | |
1047 /* Restore values saved on the stack during the multiplication. */ | |
1048 l32i a7, sp, 4 | |
1049 #if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL | |
1050 l32i a0, sp, 0 | |
1051 l32i a8, sp, 8 | |
1052 #endif | |
1053 #endif /* ! XCHAL_HAVE_MUL32_HIGH */ | |
1054 | |
1055 /* Shift left by 12 bits, unless there was a carry-out from the | |
1056 multiply, in which case, shift by 11 bits and increment the | |
1057 exponent. Note: It is convenient to use the constant 0x3ff | |
1058 instead of 0x400 when removing the extra exponent bias (so that | |
1059 it is easy to construct 0x7fe for the overflow check). Reverse | |
1060 the logic here to decrement the exponent sum by one unless there | |
1061 was a carry-out. */ | |
1062 movi a4, 11 | |
1063 srli a5, xh, 21 - 12 | |
1064 bnez a5, 1f | |
1065 addi a4, a4, 1 | |
1066 addi a8, a8, -1 | |
1067 1: ssl a4 | |
1068 src xh, xh, xl | |
1069 src xl, xl, a6 | |
1070 sll a6, a6 | |
1071 | |
1072 /* Subtract the extra bias from the exponent sum (plus one to account | |
1073 for the explicit "1.0" of the mantissa that will be added to the | |
1074 exponent in the final result). */ | |
1075 movi a4, 0x3ff | |
1076 sub a8, a8, a4 | |
1077 | |
1078 /* Check for over/underflow. The value in a8 is one less than the | |
1079 final exponent, so values in the range 0..7fd are OK here. */ | |
1080 slli a4, a4, 1 /* 0x7fe */ | |
1081 bgeu a8, a4, .Lmul_overflow | |
1082 | |
1083 .Lmul_round: | |
1084 /* Round. */ | |
1085 bgez a6, .Lmul_rounded | |
1086 addi xl, xl, 1 | |
1087 beqz xl, .Lmul_roundcarry | |
1088 slli a6, a6, 1 | |
1089 beqz a6, .Lmul_exactlyhalf | |
1090 | |
1091 .Lmul_rounded: | |
1092 /* Add the exponent to the mantissa. */ | |
1093 slli a8, a8, 20 | |
1094 add xh, xh, a8 | |
1095 | |
1096 .Lmul_addsign: | |
1097 /* Add the sign bit. */ | |
1098 srli a7, a7, 31 | |
1099 slli a7, a7, 31 | |
1100 or xh, xh, a7 | |
1101 | |
1102 .Lmul_done: | |
1103 #if __XTENSA_CALL0_ABI__ | |
1104 l32i a12, sp, 16 | |
1105 l32i a13, sp, 20 | |
1106 l32i a14, sp, 24 | |
1107 l32i a15, sp, 28 | |
1108 addi sp, sp, 32 | |
1109 #endif | |
1110 leaf_return | |
1111 | |
1112 .Lmul_exactlyhalf: | |
1113 /* Round down to the nearest even value. */ | |
1114 srli xl, xl, 1 | |
1115 slli xl, xl, 1 | |
1116 j .Lmul_rounded | |
1117 | |
1118 .Lmul_roundcarry: | |
1119 /* xl is always zero when the rounding increment overflows, so | |
1120 there's no need to round it to an even value. */ | |
1121 addi xh, xh, 1 | |
1122 /* Overflow is OK -- it will be added to the exponent. */ | |
1123 j .Lmul_rounded | |
1124 | |
1125 .Lmul_overflow: | |
1126 bltz a8, .Lmul_underflow | |
1127 /* Return +/- Infinity. */ | |
1128 addi a8, a4, 1 /* 0x7ff */ | |
1129 slli xh, a8, 20 | |
1130 movi xl, 0 | |
1131 j .Lmul_addsign | |
1132 | |
1133 .Lmul_underflow: | |
1134 /* Create a subnormal value, where the exponent field contains zero, | |
1135 but the effective exponent is 1. The value of a8 is one less than | |
1136 the actual exponent, so just negate it to get the shift amount. */ | |
1137 neg a8, a8 | |
1138 mov a9, a6 | |
1139 ssr a8 | |
1140 bgeui a8, 32, .Lmul_bigshift | |
1141 | |
1142 /* Shift xh/xl right. Any bits that are shifted out of xl are saved | |
1143 in a6 (combined with the shifted-out bits currently in a6) for | |
1144 rounding the result. */ | |
1145 sll a6, xl | |
1146 src xl, xh, xl | |
1147 srl xh, xh | |
1148 j 1f | |
1149 | |
1150 .Lmul_bigshift: | |
1151 bgeui a8, 64, .Lmul_flush_to_zero | |
1152 sll a10, xl /* lost bits shifted out of xl */ | |
1153 src a6, xh, xl | |
1154 srl xl, xh | |
1155 movi xh, 0 | |
1156 or a9, a9, a10 | |
1157 | |
1158 /* Set the exponent to zero. */ | |
1159 1: movi a8, 0 | |
1160 | |
1161 /* Pack any nonzero bits shifted out into a6. */ | |
1162 beqz a9, .Lmul_round | |
1163 movi a9, 1 | |
1164 or a6, a6, a9 | |
1165 j .Lmul_round | |
1166 | |
1167 .Lmul_flush_to_zero: | |
1168 /* Return zero with the appropriate sign bit. */ | |
1169 srli xh, a7, 31 | |
1170 slli xh, xh, 31 | |
1171 movi xl, 0 | |
1172 j .Lmul_done | |
1173 | |
1174 #if XCHAL_NO_MUL | |
1175 | |
1176 /* For Xtensa processors with no multiply hardware, this simplified | |
1177 version of _mulsi3 is used for multiplying 16-bit chunks of | |
1178 the floating-point mantissas. When using CALL0, this function | |
1179 uses a custom ABI: the inputs are passed in a13 and a14, the | |
1180 result is returned in a12, and a8 and a15 are clobbered. */ | |
1181 .align 4 | |
1182 .Lmul_mulsi3: | |
1183 leaf_entry sp, 16 | |
1184 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 | |
1185 movi \dst, 0 | |
1186 1: add \tmp1, \src2, \dst | |
1187 extui \tmp2, \src1, 0, 1 | |
1188 movnez \dst, \tmp1, \tmp2 | |
1189 | |
1190 do_addx2 \tmp1, \src2, \dst, \tmp1 | |
1191 extui \tmp2, \src1, 1, 1 | |
1192 movnez \dst, \tmp1, \tmp2 | |
1193 | |
1194 do_addx4 \tmp1, \src2, \dst, \tmp1 | |
1195 extui \tmp2, \src1, 2, 1 | |
1196 movnez \dst, \tmp1, \tmp2 | |
1197 | |
1198 do_addx8 \tmp1, \src2, \dst, \tmp1 | |
1199 extui \tmp2, \src1, 3, 1 | |
1200 movnez \dst, \tmp1, \tmp2 | |
1201 | |
1202 srli \src1, \src1, 4 | |
1203 slli \src2, \src2, 4 | |
1204 bnez \src1, 1b | |
1205 .endm | |
1206 #if __XTENSA_CALL0_ABI__ | |
1207 mul_mulsi3_body a12, a13, a14, a15, a8 | |
1208 #else | |
1209 /* The result will be written into a2, so save that argument in a4. */ | |
1210 mov a4, a2 | |
1211 mul_mulsi3_body a2, a4, a3, a5, a6 | |
1212 #endif | |
1213 leaf_return | |
1214 #endif /* XCHAL_NO_MUL */ | |
1215 #endif /* L_muldf3 */ | |
1216 | |
1217 #ifdef L_divdf3 | |
1218 | |
1219 /* Division */ | |
1220 __divdf3_aux: | |
1221 | |
1222 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). | |
1223 (This code is placed before the start of the function just to | |
1224 keep it in range of the limited branch displacements.) */ | |
1225 | |
1226 .Ldiv_yexpzero: | |
1227 /* Clear the sign bit of y. */ | |
1228 slli yh, yh, 1 | |
1229 srli yh, yh, 1 | |
1230 | |
1231 /* Check for division by zero. */ | |
1232 or a10, yh, yl | |
1233 beqz a10, .Ldiv_yzero | |
1234 | |
1235 /* Normalize y. Adjust the exponent in a9. */ | |
1236 beqz yh, .Ldiv_yh_zero | |
1237 do_nsau a10, yh, a11, a9 | |
1238 addi a10, a10, -11 | |
1239 ssl a10 | |
1240 src yh, yh, yl | |
1241 sll yl, yl | |
1242 movi a9, 1 | |
1243 sub a9, a9, a10 | |
1244 j .Ldiv_ynormalized | |
1245 .Ldiv_yh_zero: | |
1246 do_nsau a10, yl, a11, a9 | |
1247 addi a10, a10, -11 | |
1248 movi a9, -31 | |
1249 sub a9, a9, a10 | |
1250 ssl a10 | |
1251 bltz a10, .Ldiv_yl_srl | |
1252 sll yh, yl | |
1253 movi yl, 0 | |
1254 j .Ldiv_ynormalized | |
1255 .Ldiv_yl_srl: | |
1256 srl yh, yl | |
1257 sll yl, yl | |
1258 j .Ldiv_ynormalized | |
1259 | |
1260 .Ldiv_yzero: | |
1261 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */ | |
1262 slli xh, xh, 1 | |
1263 srli xh, xh, 1 | |
1264 or xl, xl, xh | |
1265 srli xh, a7, 31 | |
1266 slli xh, xh, 31 | |
1267 or xh, xh, a6 | |
1268 bnez xl, 1f | |
1269 movi a4, 0x80000 /* make it a quiet NaN */ | |
1270 or xh, xh, a4 | |
1271 1: movi xl, 0 | |
1272 leaf_return | |
1273 | |
1274 .Ldiv_xexpzero: | |
1275 /* Clear the sign bit of x. */ | |
1276 slli xh, xh, 1 | |
1277 srli xh, xh, 1 | |
1278 | |
1279 /* If x is zero, return zero. */ | |
1280 or a10, xh, xl | |
1281 beqz a10, .Ldiv_return_zero | |
1282 | |
1283 /* Normalize x. Adjust the exponent in a8. */ | |
1284 beqz xh, .Ldiv_xh_zero | |
1285 do_nsau a10, xh, a11, a8 | |
1286 addi a10, a10, -11 | |
1287 ssl a10 | |
1288 src xh, xh, xl | |
1289 sll xl, xl | |
1290 movi a8, 1 | |
1291 sub a8, a8, a10 | |
1292 j .Ldiv_xnormalized | |
1293 .Ldiv_xh_zero: | |
1294 do_nsau a10, xl, a11, a8 | |
1295 addi a10, a10, -11 | |
1296 movi a8, -31 | |
1297 sub a8, a8, a10 | |
1298 ssl a10 | |
1299 bltz a10, .Ldiv_xl_srl | |
1300 sll xh, xl | |
1301 movi xl, 0 | |
1302 j .Ldiv_xnormalized | |
1303 .Ldiv_xl_srl: | |
1304 srl xh, xl | |
1305 sll xl, xl | |
1306 j .Ldiv_xnormalized | |
1307 | |
1308 .Ldiv_return_zero: | |
1309 /* Return zero with the appropriate sign bit. */ | |
1310 srli xh, a7, 31 | |
1311 slli xh, xh, 31 | |
1312 movi xl, 0 | |
1313 leaf_return | |
1314 | |
1315 .Ldiv_xnan_or_inf: | |
1316 /* Set the sign bit of the result. */ | |
1317 srli a7, yh, 31 | |
1318 slli a7, a7, 31 | |
1319 xor xh, xh, a7 | |
1320 /* If y is NaN or Inf, return NaN. */ | |
1321 bnall yh, a6, 1f | |
1322 movi a4, 0x80000 /* make it a quiet NaN */ | |
1323 or xh, xh, a4 | |
1324 1: leaf_return | |
1325 | |
1326 .Ldiv_ynan_or_inf: | |
1327 /* If y is Infinity, return zero. */ | |
1328 slli a8, yh, 12 | |
1329 or a8, a8, yl | |
1330 beqz a8, .Ldiv_return_zero | |
1331 /* y is NaN; return it. */ | |
1332 mov xh, yh | |
1333 mov xl, yl | |
1334 leaf_return | |
1335 | |
1336 .Ldiv_highequal1: | |
1337 bltu xl, yl, 2f | |
1338 j 3f | |
1339 | |
1340 .align 4 | |
1341 .global __divdf3 | |
1342 .type __divdf3, @function | |
1343 __divdf3: | |
1344 leaf_entry sp, 16 | |
1345 movi a6, 0x7ff00000 | |
1346 | |
1347 /* Get the sign of the result. */ | |
1348 xor a7, xh, yh | |
1349 | |
1350 /* Check for NaN and infinity. */ | |
1351 ball xh, a6, .Ldiv_xnan_or_inf | |
1352 ball yh, a6, .Ldiv_ynan_or_inf | |
1353 | |
1354 /* Extract the exponents. */ | |
1355 extui a8, xh, 20, 11 | |
1356 extui a9, yh, 20, 11 | |
1357 | |
1358 beqz a9, .Ldiv_yexpzero | |
1359 .Ldiv_ynormalized: | |
1360 beqz a8, .Ldiv_xexpzero | |
1361 .Ldiv_xnormalized: | |
1362 | |
1363 /* Subtract the exponents. */ | |
1364 sub a8, a8, a9 | |
1365 | |
1366 /* Replace sign/exponent fields with explicit "1.0". */ | |
1367 movi a10, 0x1fffff | |
1368 or xh, xh, a6 | |
1369 and xh, xh, a10 | |
1370 or yh, yh, a6 | |
1371 and yh, yh, a10 | |
1372 | |
1373 /* Set SAR for left shift by one. */ | |
1374 ssai (32 - 1) | |
1375 | |
1376 /* The first digit of the mantissa division must be a one. | |
1377 Shift x (and adjust the exponent) as needed to make this true. */ | |
1378 bltu yh, xh, 3f | |
1379 beq yh, xh, .Ldiv_highequal1 | |
1380 2: src xh, xh, xl | |
1381 sll xl, xl | |
1382 addi a8, a8, -1 | |
1383 3: | |
1384 /* Do the first subtraction and shift. */ | |
1385 sub xh, xh, yh | |
1386 bgeu xl, yl, 1f | |
1387 addi xh, xh, -1 | |
1388 1: sub xl, xl, yl | |
1389 src xh, xh, xl | |
1390 sll xl, xl | |
1391 | |
1392 /* Put the quotient into a10/a11. */ | |
1393 movi a10, 0 | |
1394 movi a11, 1 | |
1395 | |
1396 /* Divide one bit at a time for 52 bits. */ | |
1397 movi a9, 52 | |
1398 #if XCHAL_HAVE_LOOPS | |
1399 loop a9, .Ldiv_loopend | |
1400 #endif | |
1401 .Ldiv_loop: | |
1402 /* Shift the quotient << 1. */ | |
1403 src a10, a10, a11 | |
1404 sll a11, a11 | |
1405 | |
1406 /* Is this digit a 0 or 1? */ | |
1407 bltu xh, yh, 3f | |
1408 beq xh, yh, .Ldiv_highequal2 | |
1409 | |
1410 /* Output a 1 and subtract. */ | |
1411 2: addi a11, a11, 1 | |
1412 sub xh, xh, yh | |
1413 bgeu xl, yl, 1f | |
1414 addi xh, xh, -1 | |
1415 1: sub xl, xl, yl | |
1416 | |
1417 /* Shift the dividend << 1. */ | |
1418 3: src xh, xh, xl | |
1419 sll xl, xl | |
1420 | |
1421 #if !XCHAL_HAVE_LOOPS | |
1422 addi a9, a9, -1 | |
1423 bnez a9, .Ldiv_loop | |
1424 #endif | |
1425 .Ldiv_loopend: | |
1426 | |
1427 /* Add the exponent bias (less one to account for the explicit "1.0" | |
1428 of the mantissa that will be added to the exponent in the final | |
1429 result). */ | |
1430 movi a9, 0x3fe | |
1431 add a8, a8, a9 | |
1432 | |
1433 /* Check for over/underflow. The value in a8 is one less than the | |
1434 final exponent, so values in the range 0..7fd are OK here. */ | |
1435 addmi a9, a9, 0x400 /* 0x7fe */ | |
1436 bgeu a8, a9, .Ldiv_overflow | |
1437 | |
1438 .Ldiv_round: | |
1439 /* Round. The remainder (<< 1) is in xh/xl. */ | |
1440 bltu xh, yh, .Ldiv_rounded | |
1441 beq xh, yh, .Ldiv_highequal3 | |
1442 .Ldiv_roundup: | |
1443 addi a11, a11, 1 | |
1444 beqz a11, .Ldiv_roundcarry | |
1445 | |
1446 .Ldiv_rounded: | |
1447 mov xl, a11 | |
1448 /* Add the exponent to the mantissa. */ | |
1449 slli a8, a8, 20 | |
1450 add xh, a10, a8 | |
1451 | |
1452 .Ldiv_addsign: | |
1453 /* Add the sign bit. */ | |
1454 srli a7, a7, 31 | |
1455 slli a7, a7, 31 | |
1456 or xh, xh, a7 | |
1457 leaf_return | |
1458 | |
1459 .Ldiv_highequal2: | |
1460 bgeu xl, yl, 2b | |
1461 j 3b | |
1462 | |
1463 .Ldiv_highequal3: | |
1464 bltu xl, yl, .Ldiv_rounded | |
1465 bne xl, yl, .Ldiv_roundup | |
1466 | |
1467 /* Remainder is exactly half the divisor. Round even. */ | |
1468 addi a11, a11, 1 | |
1469 beqz a11, .Ldiv_roundcarry | |
1470 srli a11, a11, 1 | |
1471 slli a11, a11, 1 | |
1472 j .Ldiv_rounded | |
1473 | |
1474 .Ldiv_overflow: | |
1475 bltz a8, .Ldiv_underflow | |
1476 /* Return +/- Infinity. */ | |
1477 addi a8, a9, 1 /* 0x7ff */ | |
1478 slli xh, a8, 20 | |
1479 movi xl, 0 | |
1480 j .Ldiv_addsign | |
1481 | |
1482 .Ldiv_underflow: | |
1483 /* Create a subnormal value, where the exponent field contains zero, | |
1484 but the effective exponent is 1. The value of a8 is one less than | |
1485 the actual exponent, so just negate it to get the shift amount. */ | |
1486 neg a8, a8 | |
1487 ssr a8 | |
1488 bgeui a8, 32, .Ldiv_bigshift | |
1489 | |
1490 /* Shift a10/a11 right. Any bits that are shifted out of a11 are | |
1491 saved in a6 for rounding the result. */ | |
1492 sll a6, a11 | |
1493 src a11, a10, a11 | |
1494 srl a10, a10 | |
1495 j 1f | |
1496 | |
1497 .Ldiv_bigshift: | |
1498 bgeui a8, 64, .Ldiv_flush_to_zero | |
1499 sll a9, a11 /* lost bits shifted out of a11 */ | |
1500 src a6, a10, a11 | |
1501 srl a11, a10 | |
1502 movi a10, 0 | |
1503 or xl, xl, a9 | |
1504 | |
1505 /* Set the exponent to zero. */ | |
1506 1: movi a8, 0 | |
1507 | |
1508 /* Pack any nonzero remainder (in xh/xl) into a6. */ | |
1509 or xh, xh, xl | |
1510 beqz xh, 1f | |
1511 movi a9, 1 | |
1512 or a6, a6, a9 | |
1513 | |
1514 /* Round a10/a11 based on the bits shifted out into a6. */ | |
1515 1: bgez a6, .Ldiv_rounded | |
1516 addi a11, a11, 1 | |
1517 beqz a11, .Ldiv_roundcarry | |
1518 slli a6, a6, 1 | |
1519 bnez a6, .Ldiv_rounded | |
1520 srli a11, a11, 1 | |
1521 slli a11, a11, 1 | |
1522 j .Ldiv_rounded | |
1523 | |
1524 .Ldiv_roundcarry: | |
1525 /* a11 is always zero when the rounding increment overflows, so | |
1526 there's no need to round it to an even value. */ | |
1527 addi a10, a10, 1 | |
1528 /* Overflow to the exponent field is OK. */ | |
1529 j .Ldiv_rounded | |
1530 | |
1531 .Ldiv_flush_to_zero: | |
1532 /* Return zero with the appropriate sign bit. */ | |
1533 srli xh, a7, 31 | |
1534 slli xh, xh, 31 | |
1535 movi xl, 0 | |
1536 leaf_return | |
1537 | |
1538 #endif /* L_divdf3 */ | |
1539 | |
1540 #ifdef L_cmpdf2 | |
1541 | |
1542 /* Equal and Not Equal */ | |
1543 | |
1544 .align 4 | |
1545 .global __eqdf2 | |
1546 .global __nedf2 | |
1547 .set __nedf2, __eqdf2 | |
1548 .type __eqdf2, @function | |
1549 __eqdf2: | |
1550 leaf_entry sp, 16 | |
1551 bne xl, yl, 2f | |
1552 bne xh, yh, 4f | |
1553 | |
1554 /* The values are equal but NaN != NaN. Check the exponent. */ | |
1555 movi a6, 0x7ff00000 | |
1556 ball xh, a6, 3f | |
1557 | |
1558 /* Equal. */ | |
1559 movi a2, 0 | |
1560 leaf_return | |
1561 | |
1562 /* Not equal. */ | |
1563 2: movi a2, 1 | |
1564 leaf_return | |
1565 | |
1566 /* Check if the mantissas are nonzero. */ | |
1567 3: slli a7, xh, 12 | |
1568 or a7, a7, xl | |
1569 j 5f | |
1570 | |
1571 /* Check if x and y are zero with different signs. */ | |
1572 4: or a7, xh, yh | |
1573 slli a7, a7, 1 | |
1574 or a7, a7, xl /* xl == yl here */ | |
1575 | |
1576 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa | |
1577 or x when exponent(x) = 0x7ff and x == y. */ | |
1578 5: movi a2, 0 | |
1579 movi a3, 1 | |
1580 movnez a2, a3, a7 | |
1581 leaf_return | |
1582 | |
1583 | |
1584 /* Greater Than */ | |
1585 | |
1586 .align 4 | |
1587 .global __gtdf2 | |
1588 .type __gtdf2, @function | |
1589 __gtdf2: | |
1590 leaf_entry sp, 16 | |
1591 movi a6, 0x7ff00000 | |
1592 ball xh, a6, 2f | |
1593 1: bnall yh, a6, .Lle_cmp | |
1594 | |
1595 /* Check if y is a NaN. */ | |
1596 slli a7, yh, 12 | |
1597 or a7, a7, yl | |
1598 beqz a7, .Lle_cmp | |
1599 movi a2, 0 | |
1600 leaf_return | |
1601 | |
1602 /* Check if x is a NaN. */ | |
1603 2: slli a7, xh, 12 | |
1604 or a7, a7, xl | |
1605 beqz a7, 1b | |
1606 movi a2, 0 | |
1607 leaf_return | |
1608 | |
1609 | |
1610 /* Less Than or Equal */ | |
1611 | |
1612 .align 4 | |
1613 .global __ledf2 | |
1614 .type __ledf2, @function | |
1615 __ledf2: | |
1616 leaf_entry sp, 16 | |
1617 movi a6, 0x7ff00000 | |
1618 ball xh, a6, 2f | |
1619 1: bnall yh, a6, .Lle_cmp | |
1620 | |
1621 /* Check if y is a NaN. */ | |
1622 slli a7, yh, 12 | |
1623 or a7, a7, yl | |
1624 beqz a7, .Lle_cmp | |
1625 movi a2, 1 | |
1626 leaf_return | |
1627 | |
1628 /* Check if x is a NaN. */ | |
1629 2: slli a7, xh, 12 | |
1630 or a7, a7, xl | |
1631 beqz a7, 1b | |
1632 movi a2, 1 | |
1633 leaf_return | |
1634 | |
1635 .Lle_cmp: | |
1636 /* Check if x and y have different signs. */ | |
1637 xor a7, xh, yh | |
1638 bltz a7, .Lle_diff_signs | |
1639 | |
1640 /* Check if x is negative. */ | |
1641 bltz xh, .Lle_xneg | |
1642 | |
1643 /* Check if x <= y. */ | |
1644 bltu xh, yh, 4f | |
1645 bne xh, yh, 5f | |
1646 bltu yl, xl, 5f | |
1647 4: movi a2, 0 | |
1648 leaf_return | |
1649 | |
1650 .Lle_xneg: | |
1651 /* Check if y <= x. */ | |
1652 bltu yh, xh, 4b | |
1653 bne yh, xh, 5f | |
1654 bgeu xl, yl, 4b | |
1655 5: movi a2, 1 | |
1656 leaf_return | |
1657 | |
1658 .Lle_diff_signs: | |
1659 bltz xh, 4b | |
1660 | |
1661 /* Check if both x and y are zero. */ | |
1662 or a7, xh, yh | |
1663 slli a7, a7, 1 | |
1664 or a7, a7, xl | |
1665 or a7, a7, yl | |
1666 movi a2, 1 | |
1667 movi a3, 0 | |
1668 moveqz a2, a3, a7 | |
1669 leaf_return | |
1670 | |
1671 | |
1672 /* Greater Than or Equal */ | |
1673 | |
1674 .align 4 | |
1675 .global __gedf2 | |
1676 .type __gedf2, @function | |
1677 __gedf2: | |
1678 leaf_entry sp, 16 | |
1679 movi a6, 0x7ff00000 | |
1680 ball xh, a6, 2f | |
1681 1: bnall yh, a6, .Llt_cmp | |
1682 | |
1683 /* Check if y is a NaN. */ | |
1684 slli a7, yh, 12 | |
1685 or a7, a7, yl | |
1686 beqz a7, .Llt_cmp | |
1687 movi a2, -1 | |
1688 leaf_return | |
1689 | |
1690 /* Check if x is a NaN. */ | |
1691 2: slli a7, xh, 12 | |
1692 or a7, a7, xl | |
1693 beqz a7, 1b | |
1694 movi a2, -1 | |
1695 leaf_return | |
1696 | |
1697 | |
1698 /* Less Than */ | |
1699 | |
1700 .align 4 | |
1701 .global __ltdf2 | |
1702 .type __ltdf2, @function | |
1703 __ltdf2: | |
1704 leaf_entry sp, 16 | |
1705 movi a6, 0x7ff00000 | |
1706 ball xh, a6, 2f | |
1707 1: bnall yh, a6, .Llt_cmp | |
1708 | |
1709 /* Check if y is a NaN. */ | |
1710 slli a7, yh, 12 | |
1711 or a7, a7, yl | |
1712 beqz a7, .Llt_cmp | |
1713 movi a2, 0 | |
1714 leaf_return | |
1715 | |
1716 /* Check if x is a NaN. */ | |
1717 2: slli a7, xh, 12 | |
1718 or a7, a7, xl | |
1719 beqz a7, 1b | |
1720 movi a2, 0 | |
1721 leaf_return | |
1722 | |
1723 .Llt_cmp: | |
1724 /* Check if x and y have different signs. */ | |
1725 xor a7, xh, yh | |
1726 bltz a7, .Llt_diff_signs | |
1727 | |
1728 /* Check if x is negative. */ | |
1729 bltz xh, .Llt_xneg | |
1730 | |
1731 /* Check if x < y. */ | |
1732 bltu xh, yh, 4f | |
1733 bne xh, yh, 5f | |
1734 bgeu xl, yl, 5f | |
1735 4: movi a2, -1 | |
1736 leaf_return | |
1737 | |
1738 .Llt_xneg: | |
1739 /* Check if y < x. */ | |
1740 bltu yh, xh, 4b | |
1741 bne yh, xh, 5f | |
1742 bltu yl, xl, 4b | |
1743 5: movi a2, 0 | |
1744 leaf_return | |
1745 | |
1746 .Llt_diff_signs: | |
1747 bgez xh, 5b | |
1748 | |
1749 /* Check if both x and y are nonzero. */ | |
1750 or a7, xh, yh | |
1751 slli a7, a7, 1 | |
1752 or a7, a7, xl | |
1753 or a7, a7, yl | |
1754 movi a2, 0 | |
1755 movi a3, -1 | |
1756 movnez a2, a3, a7 | |
1757 leaf_return | |
1758 | |
1759 | |
1760 /* Unordered */ | |
1761 | |
1762 .align 4 | |
1763 .global __unorddf2 | |
1764 .type __unorddf2, @function | |
1765 __unorddf2: | |
1766 leaf_entry sp, 16 | |
1767 movi a6, 0x7ff00000 | |
1768 ball xh, a6, 3f | |
1769 1: ball yh, a6, 4f | |
1770 2: movi a2, 0 | |
1771 leaf_return | |
1772 | |
1773 3: slli a7, xh, 12 | |
1774 or a7, a7, xl | |
1775 beqz a7, 1b | |
1776 movi a2, 1 | |
1777 leaf_return | |
1778 | |
1779 4: slli a7, yh, 12 | |
1780 or a7, a7, yl | |
1781 beqz a7, 2b | |
1782 movi a2, 1 | |
1783 leaf_return | |
1784 | |
1785 #endif /* L_cmpdf2 */ | |
1786 | |
1787 #ifdef L_fixdfsi | |
1788 | |
1789 .align 4 | |
1790 .global __fixdfsi | |
1791 .type __fixdfsi, @function | |
1792 __fixdfsi: | |
1793 leaf_entry sp, 16 | |
1794 | |
1795 /* Check for NaN and Infinity. */ | |
1796 movi a6, 0x7ff00000 | |
1797 ball xh, a6, .Lfixdfsi_nan_or_inf | |
1798 | |
1799 /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */ | |
1800 extui a4, xh, 20, 11 | |
1801 extui a5, a6, 19, 10 /* 0x3fe */ | |
1802 sub a4, a4, a5 | |
1803 bgei a4, 32, .Lfixdfsi_maxint | |
1804 blti a4, 1, .Lfixdfsi_zero | |
1805 | |
1806 /* Add explicit "1.0" and shift << 11. */ | |
1807 or a7, xh, a6 | |
1808 ssai (32 - 11) | |
1809 src a5, a7, xl | |
1810 | |
1811 /* Shift back to the right, based on the exponent. */ | |
1812 ssl a4 /* shift by 32 - a4 */ | |
1813 srl a5, a5 | |
1814 | |
1815 /* Negate the result if sign != 0. */ | |
1816 neg a2, a5 | |
1817 movgez a2, a5, a7 | |
1818 leaf_return | |
1819 | |
1820 .Lfixdfsi_nan_or_inf: | |
1821 /* Handle Infinity and NaN. */ | |
1822 slli a4, xh, 12 | |
1823 or a4, a4, xl | |
1824 beqz a4, .Lfixdfsi_maxint | |
1825 | |
1826 /* Translate NaN to +maxint. */ | |
1827 movi xh, 0 | |
1828 | |
1829 .Lfixdfsi_maxint: | |
1830 slli a4, a6, 11 /* 0x80000000 */ | |
1831 addi a5, a4, -1 /* 0x7fffffff */ | |
1832 movgez a4, a5, xh | |
1833 mov a2, a4 | |
1834 leaf_return | |
1835 | |
1836 .Lfixdfsi_zero: | |
1837 movi a2, 0 | |
1838 leaf_return | |
1839 | |
1840 #endif /* L_fixdfsi */ | |
1841 | |
1842 #ifdef L_fixdfdi | |
1843 | |
1844 .align 4 | |
1845 .global __fixdfdi | |
1846 .type __fixdfdi, @function | |
1847 __fixdfdi: | |
1848 leaf_entry sp, 16 | |
1849 | |
1850 /* Check for NaN and Infinity. */ | |
1851 movi a6, 0x7ff00000 | |
1852 ball xh, a6, .Lfixdfdi_nan_or_inf | |
1853 | |
1854 /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */ | |
1855 extui a4, xh, 20, 11 | |
1856 extui a5, a6, 19, 10 /* 0x3fe */ | |
1857 sub a4, a4, a5 | |
1858 bgei a4, 64, .Lfixdfdi_maxint | |
1859 blti a4, 1, .Lfixdfdi_zero | |
1860 | |
1861 /* Add explicit "1.0" and shift << 11. */ | |
1862 or a7, xh, a6 | |
1863 ssai (32 - 11) | |
1864 src xh, a7, xl | |
1865 sll xl, xl | |
1866 | |
1867 /* Shift back to the right, based on the exponent. */ | |
1868 ssl a4 /* shift by 64 - a4 */ | |
1869 bgei a4, 32, .Lfixdfdi_smallshift | |
1870 srl xl, xh | |
1871 movi xh, 0 | |
1872 | |
1873 .Lfixdfdi_shifted: | |
1874 /* Negate the result if sign != 0. */ | |
1875 bgez a7, 1f | |
1876 neg xl, xl | |
1877 neg xh, xh | |
1878 beqz xl, 1f | |
1879 addi xh, xh, -1 | |
1880 1: leaf_return | |
1881 | |
1882 .Lfixdfdi_smallshift: | |
1883 src xl, xh, xl | |
1884 srl xh, xh | |
1885 j .Lfixdfdi_shifted | |
1886 | |
1887 .Lfixdfdi_nan_or_inf: | |
1888 /* Handle Infinity and NaN. */ | |
1889 slli a4, xh, 12 | |
1890 or a4, a4, xl | |
1891 beqz a4, .Lfixdfdi_maxint | |
1892 | |
1893 /* Translate NaN to +maxint. */ | |
1894 movi xh, 0 | |
1895 | |
1896 .Lfixdfdi_maxint: | |
1897 slli a7, a6, 11 /* 0x80000000 */ | |
1898 bgez xh, 1f | |
1899 mov xh, a7 | |
1900 movi xl, 0 | |
1901 leaf_return | |
1902 | |
1903 1: addi xh, a7, -1 /* 0x7fffffff */ | |
1904 movi xl, -1 | |
1905 leaf_return | |
1906 | |
1907 .Lfixdfdi_zero: | |
1908 movi xh, 0 | |
1909 movi xl, 0 | |
1910 leaf_return | |
1911 | |
1912 #endif /* L_fixdfdi */ | |
1913 | |
1914 #ifdef L_fixunsdfsi | |
1915 | |
1916 .align 4 | |
1917 .global __fixunsdfsi | |
1918 .type __fixunsdfsi, @function | |
1919 __fixunsdfsi: | |
1920 leaf_entry sp, 16 | |
1921 | |
1922 /* Check for NaN and Infinity. */ | |
1923 movi a6, 0x7ff00000 | |
1924 ball xh, a6, .Lfixunsdfsi_nan_or_inf | |
1925 | |
1926 /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */ | |
1927 extui a4, xh, 20, 11 | |
1928 extui a5, a6, 20, 10 /* 0x3ff */ | |
1929 sub a4, a4, a5 | |
1930 bgei a4, 32, .Lfixunsdfsi_maxint | |
1931 bltz a4, .Lfixunsdfsi_zero | |
1932 | |
1933 /* Add explicit "1.0" and shift << 11. */ | |
1934 or a7, xh, a6 | |
1935 ssai (32 - 11) | |
1936 src a5, a7, xl | |
1937 | |
1938 /* Shift back to the right, based on the exponent. */ | |
1939 addi a4, a4, 1 | |
1940 beqi a4, 32, .Lfixunsdfsi_bigexp | |
1941 ssl a4 /* shift by 32 - a4 */ | |
1942 srl a5, a5 | |
1943 | |
1944 /* Negate the result if sign != 0. */ | |
1945 neg a2, a5 | |
1946 movgez a2, a5, a7 | |
1947 leaf_return | |
1948 | |
1949 .Lfixunsdfsi_nan_or_inf: | |
1950 /* Handle Infinity and NaN. */ | |
1951 slli a4, xh, 12 | |
1952 or a4, a4, xl | |
1953 beqz a4, .Lfixunsdfsi_maxint | |
1954 | |
1955 /* Translate NaN to 0xffffffff. */ | |
1956 movi a2, -1 | |
1957 leaf_return | |
1958 | |
1959 .Lfixunsdfsi_maxint: | |
1960 slli a4, a6, 11 /* 0x80000000 */ | |
1961 movi a5, -1 /* 0xffffffff */ | |
1962 movgez a4, a5, xh | |
1963 mov a2, a4 | |
1964 leaf_return | |
1965 | |
1966 .Lfixunsdfsi_zero: | |
1967 movi a2, 0 | |
1968 leaf_return | |
1969 | |
1970 .Lfixunsdfsi_bigexp: | |
1971 /* Handle unsigned maximum exponent case. */ | |
1972 bltz xh, 1f | |
1973 mov a2, a5 /* no shift needed */ | |
1974 leaf_return | |
1975 | |
1976 /* Return 0x80000000 if negative. */ | |
1977 1: slli a2, a6, 11 | |
1978 leaf_return | |
1979 | |
1980 #endif /* L_fixunsdfsi */ | |
1981 | |
1982 #ifdef L_fixunsdfdi | |
1983 | |
1984 .align 4 | |
1985 .global __fixunsdfdi | |
1986 .type __fixunsdfdi, @function | |
1987 __fixunsdfdi: | |
1988 leaf_entry sp, 16 | |
1989 | |
1990 /* Check for NaN and Infinity. */ | |
1991 movi a6, 0x7ff00000 | |
1992 ball xh, a6, .Lfixunsdfdi_nan_or_inf | |
1993 | |
1994 /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */ | |
1995 extui a4, xh, 20, 11 | |
1996 extui a5, a6, 20, 10 /* 0x3ff */ | |
1997 sub a4, a4, a5 | |
1998 bgei a4, 64, .Lfixunsdfdi_maxint | |
1999 bltz a4, .Lfixunsdfdi_zero | |
2000 | |
2001 /* Add explicit "1.0" and shift << 11. */ | |
2002 or a7, xh, a6 | |
2003 ssai (32 - 11) | |
2004 src xh, a7, xl | |
2005 sll xl, xl | |
2006 | |
2007 /* Shift back to the right, based on the exponent. */ | |
2008 addi a4, a4, 1 | |
2009 beqi a4, 64, .Lfixunsdfdi_bigexp | |
2010 ssl a4 /* shift by 64 - a4 */ | |
2011 bgei a4, 32, .Lfixunsdfdi_smallshift | |
2012 srl xl, xh | |
2013 movi xh, 0 | |
2014 | |
2015 .Lfixunsdfdi_shifted: | |
2016 /* Negate the result if sign != 0. */ | |
2017 bgez a7, 1f | |
2018 neg xl, xl | |
2019 neg xh, xh | |
2020 beqz xl, 1f | |
2021 addi xh, xh, -1 | |
2022 1: leaf_return | |
2023 | |
2024 .Lfixunsdfdi_smallshift: | |
2025 src xl, xh, xl | |
2026 srl xh, xh | |
2027 j .Lfixunsdfdi_shifted | |
2028 | |
2029 .Lfixunsdfdi_nan_or_inf: | |
2030 /* Handle Infinity and NaN. */ | |
2031 slli a4, xh, 12 | |
2032 or a4, a4, xl | |
2033 beqz a4, .Lfixunsdfdi_maxint | |
2034 | |
2035 /* Translate NaN to 0xffffffff.... */ | |
2036 1: movi xh, -1 | |
2037 movi xl, -1 | |
2038 leaf_return | |
2039 | |
2040 .Lfixunsdfdi_maxint: | |
2041 bgez xh, 1b | |
2042 2: slli xh, a6, 11 /* 0x80000000 */ | |
2043 movi xl, 0 | |
2044 leaf_return | |
2045 | |
2046 .Lfixunsdfdi_zero: | |
2047 movi xh, 0 | |
2048 movi xl, 0 | |
2049 leaf_return | |
2050 | |
2051 .Lfixunsdfdi_bigexp: | |
2052 /* Handle unsigned maximum exponent case. */ | |
2053 bltz a7, 2b | |
2054 leaf_return /* no shift needed */ | |
2055 | |
2056 #endif /* L_fixunsdfdi */ | |
2057 | |
2058 #ifdef L_floatsidf | |
2059 | |
2060 .align 4 | |
2061 .global __floatunsidf | |
2062 .type __floatunsidf, @function | |
2063 __floatunsidf: | |
2064 leaf_entry sp, 16 | |
2065 beqz a2, .Lfloatsidf_return_zero | |
2066 | |
2067 /* Set the sign to zero and jump to the floatsidf code. */ | |
2068 movi a7, 0 | |
2069 j .Lfloatsidf_normalize | |
2070 | |
2071 .align 4 | |
2072 .global __floatsidf | |
2073 .type __floatsidf, @function | |
2074 __floatsidf: | |
2075 leaf_entry sp, 16 | |
2076 | |
2077 /* Check for zero. */ | |
2078 beqz a2, .Lfloatsidf_return_zero | |
2079 | |
2080 /* Save the sign. */ | |
2081 extui a7, a2, 31, 1 | |
2082 | |
2083 /* Get the absolute value. */ | |
2084 #if XCHAL_HAVE_ABS | |
2085 abs a2, a2 | |
2086 #else | |
2087 neg a4, a2 | |
2088 movltz a2, a4, a2 | |
2089 #endif | |
2090 | |
2091 .Lfloatsidf_normalize: | |
2092 /* Normalize with the first 1 bit in the msb. */ | |
2093 do_nsau a4, a2, a5, a6 | |
2094 ssl a4 | |
2095 sll a5, a2 | |
2096 | |
2097 /* Shift the mantissa into position. */ | |
2098 srli xh, a5, 11 | |
2099 slli xl, a5, (32 - 11) | |
2100 | |
2101 /* Set the exponent. */ | |
2102 movi a5, 0x41d /* 0x3fe + 31 */ | |
2103 sub a5, a5, a4 | |
2104 slli a5, a5, 20 | |
2105 add xh, xh, a5 | |
2106 | |
2107 /* Add the sign and return. */ | |
2108 slli a7, a7, 31 | |
2109 or xh, xh, a7 | |
2110 leaf_return | |
2111 | |
2112 .Lfloatsidf_return_zero: | |
2113 movi a3, 0 | |
2114 leaf_return | |
2115 | |
2116 #endif /* L_floatsidf */ | |
2117 | |
2118 #ifdef L_floatdidf | |
2119 | |
2120 .align 4 | |
2121 .global __floatundidf | |
2122 .type __floatundidf, @function | |
2123 __floatundidf: | |
2124 leaf_entry sp, 16 | |
2125 | |
2126 /* Check for zero. */ | |
2127 or a4, xh, xl | |
2128 beqz a4, 2f | |
2129 | |
2130 /* Set the sign to zero and jump to the floatdidf code. */ | |
2131 movi a7, 0 | |
2132 j .Lfloatdidf_normalize | |
2133 | |
2134 .align 4 | |
2135 .global __floatdidf | |
2136 .type __floatdidf, @function | |
2137 __floatdidf: | |
2138 leaf_entry sp, 16 | |
2139 | |
2140 /* Check for zero. */ | |
2141 or a4, xh, xl | |
2142 beqz a4, 2f | |
2143 | |
2144 /* Save the sign. */ | |
2145 extui a7, xh, 31, 1 | |
2146 | |
2147 /* Get the absolute value. */ | |
2148 bgez xh, .Lfloatdidf_normalize | |
2149 neg xl, xl | |
2150 neg xh, xh | |
2151 beqz xl, .Lfloatdidf_normalize | |
2152 addi xh, xh, -1 | |
2153 | |
2154 .Lfloatdidf_normalize: | |
2155 /* Normalize with the first 1 bit in the msb of xh. */ | |
2156 beqz xh, .Lfloatdidf_bigshift | |
2157 do_nsau a4, xh, a5, a6 | |
2158 ssl a4 | |
2159 src xh, xh, xl | |
2160 sll xl, xl | |
2161 | |
2162 .Lfloatdidf_shifted: | |
2163 /* Shift the mantissa into position, with rounding bits in a6. */ | |
2164 ssai 11 | |
2165 sll a6, xl | |
2166 src xl, xh, xl | |
2167 srl xh, xh | |
2168 | |
2169 /* Set the exponent. */ | |
2170 movi a5, 0x43d /* 0x3fe + 63 */ | |
2171 sub a5, a5, a4 | |
2172 slli a5, a5, 20 | |
2173 add xh, xh, a5 | |
2174 | |
2175 /* Add the sign. */ | |
2176 slli a7, a7, 31 | |
2177 or xh, xh, a7 | |
2178 | |
2179 /* Round up if the leftover fraction is >= 1/2. */ | |
2180 bgez a6, 2f | |
2181 addi xl, xl, 1 | |
2182 beqz xl, .Lfloatdidf_roundcarry | |
2183 | |
2184 /* Check if the leftover fraction is exactly 1/2. */ | |
2185 slli a6, a6, 1 | |
2186 beqz a6, .Lfloatdidf_exactlyhalf | |
2187 2: leaf_return | |
2188 | |
2189 .Lfloatdidf_bigshift: | |
2190 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ | |
2191 do_nsau a4, xl, a5, a6 | |
2192 ssl a4 | |
2193 sll xh, xl | |
2194 movi xl, 0 | |
2195 addi a4, a4, 32 | |
2196 j .Lfloatdidf_shifted | |
2197 | |
2198 .Lfloatdidf_exactlyhalf: | |
2199 /* Round down to the nearest even value. */ | |
2200 srli xl, xl, 1 | |
2201 slli xl, xl, 1 | |
2202 leaf_return | |
2203 | |
2204 .Lfloatdidf_roundcarry: | |
2205 /* xl is always zero when the rounding increment overflows, so | |
2206 there's no need to round it to an even value. */ | |
2207 addi xh, xh, 1 | |
2208 /* Overflow to the exponent is OK. */ | |
2209 leaf_return | |
2210 | |
2211 #endif /* L_floatdidf */ | |
2212 | |
2213 #ifdef L_truncdfsf2 | |
2214 | |
2215 .align 4 | |
2216 .global __truncdfsf2 | |
2217 .type __truncdfsf2, @function | |
2218 __truncdfsf2: | |
2219 leaf_entry sp, 16 | |
2220 | |
2221 /* Adjust the exponent bias. */ | |
2222 movi a4, (0x3ff - 0x7f) << 20 | |
2223 sub a5, xh, a4 | |
2224 | |
2225 /* Check for underflow. */ | |
2226 xor a6, xh, a5 | |
2227 bltz a6, .Ltrunc_underflow | |
2228 extui a6, a5, 20, 11 | |
2229 beqz a6, .Ltrunc_underflow | |
2230 | |
2231 /* Check for overflow. */ | |
2232 movi a4, 255 | |
2233 bge a6, a4, .Ltrunc_overflow | |
2234 | |
2235 /* Shift a5/xl << 3 into a5/a4. */ | |
2236 ssai (32 - 3) | |
2237 src a5, a5, xl | |
2238 sll a4, xl | |
2239 | |
2240 .Ltrunc_addsign: | |
2241 /* Add the sign bit. */ | |
2242 extui a6, xh, 31, 1 | |
2243 slli a6, a6, 31 | |
2244 or a2, a6, a5 | |
2245 | |
2246 /* Round up if the leftover fraction is >= 1/2. */ | |
2247 bgez a4, 1f | |
2248 addi a2, a2, 1 | |
2249 /* Overflow to the exponent is OK. The answer will be correct. */ | |
2250 | |
2251 /* Check if the leftover fraction is exactly 1/2. */ | |
2252 slli a4, a4, 1 | |
2253 beqz a4, .Ltrunc_exactlyhalf | |
2254 1: leaf_return | |
2255 | |
2256 .Ltrunc_exactlyhalf: | |
2257 /* Round down to the nearest even value. */ | |
2258 srli a2, a2, 1 | |
2259 slli a2, a2, 1 | |
2260 leaf_return | |
2261 | |
2262 .Ltrunc_overflow: | |
2263 /* Check if exponent == 0x7ff. */ | |
2264 movi a4, 0x7ff00000 | |
2265 bnall xh, a4, 1f | |
2266 | |
2267 /* Check if mantissa is nonzero. */ | |
2268 slli a5, xh, 12 | |
2269 or a5, a5, xl | |
2270 beqz a5, 1f | |
2271 | |
2272 /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */ | |
2273 srli a4, a4, 1 | |
2274 | |
2275 1: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */ | |
2276 /* Add the sign bit. */ | |
2277 extui a6, xh, 31, 1 | |
2278 ssai 1 | |
2279 src a2, a6, a4 | |
2280 leaf_return | |
2281 | |
2282 .Ltrunc_underflow: | |
2283 /* Find shift count for a subnormal. Flush to zero if >= 32. */ | |
2284 extui a6, xh, 20, 11 | |
2285 movi a5, 0x3ff - 0x7f | |
2286 sub a6, a5, a6 | |
2287 addi a6, a6, 1 | |
2288 bgeui a6, 32, 1f | |
2289 | |
2290 /* Replace the exponent with an explicit "1.0". */ | |
2291 slli a5, a5, 13 /* 0x700000 */ | |
2292 or a5, a5, xh | |
2293 slli a5, a5, 11 | |
2294 srli a5, a5, 11 | |
2295 | |
2296 /* Shift the mantissa left by 3 bits (into a5/a4). */ | |
2297 ssai (32 - 3) | |
2298 src a5, a5, xl | |
2299 sll a4, xl | |
2300 | |
2301 /* Shift right by a6. */ | |
2302 ssr a6 | |
2303 sll a7, a4 | |
2304 src a4, a5, a4 | |
2305 srl a5, a5 | |
2306 beqz a7, .Ltrunc_addsign | |
2307 or a4, a4, a6 /* any positive, nonzero value will work */ | |
2308 j .Ltrunc_addsign | |
2309 | |
2310 /* Return +/- zero. */ | |
2311 1: extui a2, xh, 31, 1 | |
2312 slli a2, a2, 31 | |
2313 leaf_return | |
2314 | |
2315 #endif /* L_truncdfsf2 */ | |
2316 | |
2317 #ifdef L_extendsfdf2 | |
2318 | |
2319 .align 4 | |
2320 .global __extendsfdf2 | |
2321 .type __extendsfdf2, @function | |
2322 __extendsfdf2: | |
2323 leaf_entry sp, 16 | |
2324 | |
2325 /* Save the sign bit and then shift it off. */ | |
2326 extui a5, a2, 31, 1 | |
2327 slli a5, a5, 31 | |
2328 slli a4, a2, 1 | |
2329 | |
2330 /* Extract and check the exponent. */ | |
2331 extui a6, a2, 23, 8 | |
2332 beqz a6, .Lextend_expzero | |
2333 addi a6, a6, 1 | |
2334 beqi a6, 256, .Lextend_nan_or_inf | |
2335 | |
2336 /* Shift >> 3 into a4/xl. */ | |
2337 srli a4, a4, 4 | |
2338 slli xl, a2, (32 - 3) | |
2339 | |
2340 /* Adjust the exponent bias. */ | |
2341 movi a6, (0x3ff - 0x7f) << 20 | |
2342 add a4, a4, a6 | |
2343 | |
2344 /* Add the sign bit. */ | |
2345 or xh, a4, a5 | |
2346 leaf_return | |
2347 | |
2348 .Lextend_nan_or_inf: | |
2349 movi a4, 0x7ff00000 | |
2350 | |
2351 /* Check for NaN. */ | |
2352 slli a7, a2, 9 | |
2353 beqz a7, 1f | |
2354 | |
2355 slli a6, a6, 11 /* 0x80000 */ | |
2356 or a4, a4, a6 | |
2357 | |
2358 /* Add the sign and return. */ | |
2359 1: or xh, a4, a5 | |
2360 movi xl, 0 | |
2361 leaf_return | |
2362 | |
2363 .Lextend_expzero: | |
2364 beqz a4, 1b | |
2365 | |
2366 /* Normalize it to have 8 zero bits before the first 1 bit. */ | |
2367 do_nsau a7, a4, a2, a3 | |
2368 addi a7, a7, -8 | |
2369 ssl a7 | |
2370 sll a4, a4 | |
2371 | |
2372 /* Shift >> 3 into a4/xl. */ | |
2373 slli xl, a4, (32 - 3) | |
2374 srli a4, a4, 3 | |
2375 | |
2376 /* Set the exponent. */ | |
2377 movi a6, 0x3fe - 0x7f | |
2378 sub a6, a6, a7 | |
2379 slli a6, a6, 20 | |
2380 add a4, a4, a6 | |
2381 | |
2382 /* Add the sign and return. */ | |
2383 or xh, a4, a5 | |
2384 leaf_return | |
2385 | |
2386 #endif /* L_extendsfdf2 */ | |
2387 | |
2388 |