Mercurial > hg > CbC > CbC_gcc
annotate gcc/config/arm/ieee754-df.S @ 47:3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 07 Feb 2010 17:44:34 +0900 |
parents | a06113de4d67 |
children |
rev | line source |
---|---|
0 | 1 /* ieee754-df.S double-precision floating point support for ARM |
2 | |
3 Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc. | |
4 Contributed by Nicolas Pitre (nico@cam.org) | |
5 | |
6 This file is free software; you can redistribute it and/or modify it | |
7 under the terms of the GNU General Public License as published by the | |
8 Free Software Foundation; either version 3, or (at your option) any | |
9 later version. | |
10 | |
11 This file is distributed in the hope that it will be useful, but | |
12 WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 General Public License for more details. | |
15 | |
16 Under Section 7 of GPL version 3, you are granted additional | |
17 permissions described in the GCC Runtime Library Exception, version | |
18 3.1, as published by the Free Software Foundation. | |
19 | |
20 You should have received a copy of the GNU General Public License and | |
21 a copy of the GCC Runtime Library Exception along with this program; | |
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 <http://www.gnu.org/licenses/>. */ | |
24 | |
25 /* | |
26 * Notes: | |
27 * | |
28 * The goal of this code is to be as fast as possible. This is | |
29 * not meant to be easy to understand for the casual reader. | |
30 * For slightly simpler code please see the single precision version | |
31 * of this file. | |
32 * | |
33 * Only the default rounding mode is intended for best performances. | |
34 * Exceptions aren't supported yet, but that can be added quite easily | |
35 * if necessary without impacting performances. | |
36 */ | |
37 | |
38 | |
39 @ For FPA, float words are always big-endian. | |
40 @ For VFP, floats words follow the memory system mode. | |
41 #if defined(__VFP_FP__) && !defined(__ARMEB__) | |
42 #define xl r0 | |
43 #define xh r1 | |
44 #define yl r2 | |
45 #define yh r3 | |
46 #else | |
47 #define xh r0 | |
48 #define xl r1 | |
49 #define yh r2 | |
50 #define yl r3 | |
51 #endif | |
52 | |
53 | |
54 #ifdef L_arm_negdf2 | |
55 | |
56 ARM_FUNC_START negdf2 | |
57 ARM_FUNC_ALIAS aeabi_dneg negdf2 | |
58 | |
59 @ flip sign bit | |
60 eor xh, xh, #0x80000000 | |
61 RET | |
62 | |
63 FUNC_END aeabi_dneg | |
64 FUNC_END negdf2 | |
65 | |
66 #endif | |
67 | |
68 #ifdef L_arm_addsubdf3 | |
69 | |
70 ARM_FUNC_START aeabi_drsub | |
71 | |
72 eor xh, xh, #0x80000000 @ flip sign bit of first arg | |
73 b 1f | |
74 | |
75 ARM_FUNC_START subdf3 | |
76 ARM_FUNC_ALIAS aeabi_dsub subdf3 | |
77 | |
78 eor yh, yh, #0x80000000 @ flip sign bit of second arg | |
79 #if defined(__INTERWORKING_STUBS__) | |
80 b 1f @ Skip Thumb-code prologue | |
81 #endif | |
82 | |
83 ARM_FUNC_START adddf3 | |
84 ARM_FUNC_ALIAS aeabi_dadd adddf3 | |
85 | |
86 1: do_push {r4, r5, lr} | |
87 | |
88 @ Look for zeroes, equal values, INF, or NAN. | |
89 shift1 lsl, r4, xh, #1 | |
90 shift1 lsl, r5, yh, #1 | |
91 teq r4, r5 | |
92 do_it eq | |
93 teqeq xl, yl | |
94 do_it ne, ttt | |
95 COND(orr,s,ne) ip, r4, xl | |
96 COND(orr,s,ne) ip, r5, yl | |
97 COND(mvn,s,ne) ip, r4, asr #21 | |
98 COND(mvn,s,ne) ip, r5, asr #21 | |
99 beq LSYM(Lad_s) | |
100 | |
101 @ Compute exponent difference. Make largest exponent in r4, | |
102 @ corresponding arg in xh-xl, and positive exponent difference in r5. | |
103 shift1 lsr, r4, r4, #21 | |
104 rsbs r5, r4, r5, lsr #21 | |
105 do_it lt | |
106 rsblt r5, r5, #0 | |
107 ble 1f | |
108 add r4, r4, r5 | |
109 eor yl, xl, yl | |
110 eor yh, xh, yh | |
111 eor xl, yl, xl | |
112 eor xh, yh, xh | |
113 eor yl, xl, yl | |
114 eor yh, xh, yh | |
115 1: | |
116 @ If exponent difference is too large, return largest argument | |
117 @ already in xh-xl. We need up to 54 bit to handle proper rounding | |
118 @ of 0x1p54 - 1.1. | |
119 cmp r5, #54 | |
120 do_it hi | |
121 RETLDM "r4, r5" hi | |
122 | |
123 @ Convert mantissa to signed integer. | |
124 tst xh, #0x80000000 | |
125 mov xh, xh, lsl #12 | |
126 mov ip, #0x00100000 | |
127 orr xh, ip, xh, lsr #12 | |
128 beq 1f | |
129 #if defined(__thumb2__) | |
130 negs xl, xl | |
131 sbc xh, xh, xh, lsl #1 | |
132 #else | |
133 rsbs xl, xl, #0 | |
134 rsc xh, xh, #0 | |
135 #endif | |
136 1: | |
137 tst yh, #0x80000000 | |
138 mov yh, yh, lsl #12 | |
139 orr yh, ip, yh, lsr #12 | |
140 beq 1f | |
141 #if defined(__thumb2__) | |
142 negs yl, yl | |
143 sbc yh, yh, yh, lsl #1 | |
144 #else | |
145 rsbs yl, yl, #0 | |
146 rsc yh, yh, #0 | |
147 #endif | |
148 1: | |
149 @ If exponent == difference, one or both args were denormalized. | |
150 @ Since this is not common case, rescale them off line. | |
151 teq r4, r5 | |
152 beq LSYM(Lad_d) | |
153 LSYM(Lad_x): | |
154 | |
155 @ Compensate for the exponent overlapping the mantissa MSB added later | |
156 sub r4, r4, #1 | |
157 | |
158 @ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip. | |
159 rsbs lr, r5, #32 | |
160 blt 1f | |
161 shift1 lsl, ip, yl, lr | |
162 shiftop adds xl xl yl lsr r5 yl | |
163 adc xh, xh, #0 | |
164 shiftop adds xl xl yh lsl lr yl | |
165 shiftop adcs xh xh yh asr r5 yh | |
166 b 2f | |
167 1: sub r5, r5, #32 | |
168 add lr, lr, #32 | |
169 cmp yl, #1 | |
170 shift1 lsl,ip, yh, lr | |
171 do_it cs | |
172 orrcs ip, ip, #2 @ 2 not 1, to allow lsr #1 later | |
173 shiftop adds xl xl yh asr r5 yh | |
174 adcs xh, xh, yh, asr #31 | |
175 2: | |
176 @ We now have a result in xh-xl-ip. | |
177 @ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above) | |
178 and r5, xh, #0x80000000 | |
179 bpl LSYM(Lad_p) | |
180 #if defined(__thumb2__) | |
181 mov lr, #0 | |
182 negs ip, ip | |
183 sbcs xl, lr, xl | |
184 sbc xh, lr, xh | |
185 #else | |
186 rsbs ip, ip, #0 | |
187 rscs xl, xl, #0 | |
188 rsc xh, xh, #0 | |
189 #endif | |
190 | |
191 @ Determine how to normalize the result. | |
192 LSYM(Lad_p): | |
193 cmp xh, #0x00100000 | |
194 bcc LSYM(Lad_a) | |
195 cmp xh, #0x00200000 | |
196 bcc LSYM(Lad_e) | |
197 | |
198 @ Result needs to be shifted right. | |
199 movs xh, xh, lsr #1 | |
200 movs xl, xl, rrx | |
201 mov ip, ip, rrx | |
202 add r4, r4, #1 | |
203 | |
204 @ Make sure we did not bust our exponent. | |
205 mov r2, r4, lsl #21 | |
206 cmn r2, #(2 << 21) | |
207 bcs LSYM(Lad_o) | |
208 | |
209 @ Our result is now properly aligned into xh-xl, remaining bits in ip. | |
210 @ Round with MSB of ip. If halfway between two numbers, round towards | |
211 @ LSB of xl = 0. | |
212 @ Pack final result together. | |
213 LSYM(Lad_e): | |
214 cmp ip, #0x80000000 | |
215 do_it eq | |
216 COND(mov,s,eq) ip, xl, lsr #1 | |
217 adcs xl, xl, #0 | |
218 adc xh, xh, r4, lsl #20 | |
219 orr xh, xh, r5 | |
220 RETLDM "r4, r5" | |
221 | |
222 @ Result must be shifted left and exponent adjusted. | |
223 LSYM(Lad_a): | |
224 movs ip, ip, lsl #1 | |
225 adcs xl, xl, xl | |
226 adc xh, xh, xh | |
227 tst xh, #0x00100000 | |
228 sub r4, r4, #1 | |
229 bne LSYM(Lad_e) | |
230 | |
231 @ No rounding necessary since ip will always be 0 at this point. | |
232 LSYM(Lad_l): | |
233 | |
234 #if __ARM_ARCH__ < 5 | |
235 | |
236 teq xh, #0 | |
237 movne r3, #20 | |
238 moveq r3, #52 | |
239 moveq xh, xl | |
240 moveq xl, #0 | |
241 mov r2, xh | |
242 cmp r2, #(1 << 16) | |
243 movhs r2, r2, lsr #16 | |
244 subhs r3, r3, #16 | |
245 cmp r2, #(1 << 8) | |
246 movhs r2, r2, lsr #8 | |
247 subhs r3, r3, #8 | |
248 cmp r2, #(1 << 4) | |
249 movhs r2, r2, lsr #4 | |
250 subhs r3, r3, #4 | |
251 cmp r2, #(1 << 2) | |
252 subhs r3, r3, #2 | |
253 sublo r3, r3, r2, lsr #1 | |
254 sub r3, r3, r2, lsr #3 | |
255 | |
256 #else | |
257 | |
258 teq xh, #0 | |
259 do_it eq, t | |
260 moveq xh, xl | |
261 moveq xl, #0 | |
262 clz r3, xh | |
263 do_it eq | |
264 addeq r3, r3, #32 | |
265 sub r3, r3, #11 | |
266 | |
267 #endif | |
268 | |
269 @ determine how to shift the value. | |
270 subs r2, r3, #32 | |
271 bge 2f | |
272 adds r2, r2, #12 | |
273 ble 1f | |
274 | |
275 @ shift value left 21 to 31 bits, or actually right 11 to 1 bits | |
276 @ since a register switch happened above. | |
277 add ip, r2, #20 | |
278 rsb r2, r2, #12 | |
279 shift1 lsl, xl, xh, ip | |
280 shift1 lsr, xh, xh, r2 | |
281 b 3f | |
282 | |
283 @ actually shift value left 1 to 20 bits, which might also represent | |
284 @ 32 to 52 bits if counting the register switch that happened earlier. | |
285 1: add r2, r2, #20 | |
286 2: do_it le | |
287 rsble ip, r2, #32 | |
288 shift1 lsl, xh, xh, r2 | |
289 #if defined(__thumb2__) | |
290 lsr ip, xl, ip | |
291 itt le | |
292 orrle xh, xh, ip | |
293 lslle xl, xl, r2 | |
294 #else | |
295 orrle xh, xh, xl, lsr ip | |
296 movle xl, xl, lsl r2 | |
297 #endif | |
298 | |
299 @ adjust exponent accordingly. | |
300 3: subs r4, r4, r3 | |
301 do_it ge, tt | |
302 addge xh, xh, r4, lsl #20 | |
303 orrge xh, xh, r5 | |
304 RETLDM "r4, r5" ge | |
305 | |
306 @ Exponent too small, denormalize result. | |
307 @ Find out proper shift value. | |
308 mvn r4, r4 | |
309 subs r4, r4, #31 | |
310 bge 2f | |
311 adds r4, r4, #12 | |
312 bgt 1f | |
313 | |
314 @ shift result right of 1 to 20 bits, sign is in r5. | |
315 add r4, r4, #20 | |
316 rsb r2, r4, #32 | |
317 shift1 lsr, xl, xl, r4 | |
318 shiftop orr xl xl xh lsl r2 yh | |
319 shiftop orr xh r5 xh lsr r4 yh | |
320 RETLDM "r4, r5" | |
321 | |
322 @ shift result right of 21 to 31 bits, or left 11 to 1 bits after | |
323 @ a register switch from xh to xl. | |
324 1: rsb r4, r4, #12 | |
325 rsb r2, r4, #32 | |
326 shift1 lsr, xl, xl, r2 | |
327 shiftop orr xl xl xh lsl r4 yh | |
328 mov xh, r5 | |
329 RETLDM "r4, r5" | |
330 | |
331 @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch | |
332 @ from xh to xl. | |
333 2: shift1 lsr, xl, xh, r4 | |
334 mov xh, r5 | |
335 RETLDM "r4, r5" | |
336 | |
337 @ Adjust exponents for denormalized arguments. | |
338 @ Note that r4 must not remain equal to 0. | |
339 LSYM(Lad_d): | |
340 teq r4, #0 | |
341 eor yh, yh, #0x00100000 | |
342 do_it eq, te | |
343 eoreq xh, xh, #0x00100000 | |
344 addeq r4, r4, #1 | |
345 subne r5, r5, #1 | |
346 b LSYM(Lad_x) | |
347 | |
348 | |
349 LSYM(Lad_s): | |
350 mvns ip, r4, asr #21 | |
351 do_it ne | |
352 COND(mvn,s,ne) ip, r5, asr #21 | |
353 beq LSYM(Lad_i) | |
354 | |
355 teq r4, r5 | |
356 do_it eq | |
357 teqeq xl, yl | |
358 beq 1f | |
359 | |
360 @ Result is x + 0.0 = x or 0.0 + y = y. | |
361 orrs ip, r4, xl | |
362 do_it eq, t | |
363 moveq xh, yh | |
364 moveq xl, yl | |
365 RETLDM "r4, r5" | |
366 | |
367 1: teq xh, yh | |
368 | |
369 @ Result is x - x = 0. | |
370 do_it ne, tt | |
371 movne xh, #0 | |
372 movne xl, #0 | |
373 RETLDM "r4, r5" ne | |
374 | |
375 @ Result is x + x = 2x. | |
376 movs ip, r4, lsr #21 | |
377 bne 2f | |
378 movs xl, xl, lsl #1 | |
379 adcs xh, xh, xh | |
380 do_it cs | |
381 orrcs xh, xh, #0x80000000 | |
382 RETLDM "r4, r5" | |
383 2: adds r4, r4, #(2 << 21) | |
384 do_it cc, t | |
385 addcc xh, xh, #(1 << 20) | |
386 RETLDM "r4, r5" cc | |
387 and r5, xh, #0x80000000 | |
388 | |
389 @ Overflow: return INF. | |
390 LSYM(Lad_o): | |
391 orr xh, r5, #0x7f000000 | |
392 orr xh, xh, #0x00f00000 | |
393 mov xl, #0 | |
394 RETLDM "r4, r5" | |
395 | |
396 @ At least one of x or y is INF/NAN. | |
397 @ if xh-xl != INF/NAN: return yh-yl (which is INF/NAN) | |
398 @ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN) | |
399 @ if either is NAN: return NAN | |
400 @ if opposite sign: return NAN | |
401 @ otherwise return xh-xl (which is INF or -INF) | |
402 LSYM(Lad_i): | |
403 mvns ip, r4, asr #21 | |
404 do_it ne, te | |
405 movne xh, yh | |
406 movne xl, yl | |
407 COND(mvn,s,eq) ip, r5, asr #21 | |
408 do_it ne, t | |
409 movne yh, xh | |
410 movne yl, xl | |
411 orrs r4, xl, xh, lsl #12 | |
412 do_it eq, te | |
413 COND(orr,s,eq) r5, yl, yh, lsl #12 | |
414 teqeq xh, yh | |
415 orrne xh, xh, #0x00080000 @ quiet NAN | |
416 RETLDM "r4, r5" | |
417 | |
418 FUNC_END aeabi_dsub | |
419 FUNC_END subdf3 | |
420 FUNC_END aeabi_dadd | |
421 FUNC_END adddf3 | |
422 | |
423 ARM_FUNC_START floatunsidf | |
424 ARM_FUNC_ALIAS aeabi_ui2d floatunsidf | |
425 | |
426 teq r0, #0 | |
427 do_it eq, t | |
428 moveq r1, #0 | |
429 RETc(eq) | |
430 do_push {r4, r5, lr} | |
431 mov r4, #0x400 @ initial exponent | |
432 add r4, r4, #(52-1 - 1) | |
433 mov r5, #0 @ sign bit is 0 | |
434 .ifnc xl, r0 | |
435 mov xl, r0 | |
436 .endif | |
437 mov xh, #0 | |
438 b LSYM(Lad_l) | |
439 | |
440 FUNC_END aeabi_ui2d | |
441 FUNC_END floatunsidf | |
442 | |
443 ARM_FUNC_START floatsidf | |
444 ARM_FUNC_ALIAS aeabi_i2d floatsidf | |
445 | |
446 teq r0, #0 | |
447 do_it eq, t | |
448 moveq r1, #0 | |
449 RETc(eq) | |
450 do_push {r4, r5, lr} | |
451 mov r4, #0x400 @ initial exponent | |
452 add r4, r4, #(52-1 - 1) | |
453 ands r5, r0, #0x80000000 @ sign bit in r5 | |
454 do_it mi | |
455 rsbmi r0, r0, #0 @ absolute value | |
456 .ifnc xl, r0 | |
457 mov xl, r0 | |
458 .endif | |
459 mov xh, #0 | |
460 b LSYM(Lad_l) | |
461 | |
462 FUNC_END aeabi_i2d | |
463 FUNC_END floatsidf | |
464 | |
465 ARM_FUNC_START extendsfdf2 | |
466 ARM_FUNC_ALIAS aeabi_f2d extendsfdf2 | |
467 | |
468 movs r2, r0, lsl #1 @ toss sign bit | |
469 mov xh, r2, asr #3 @ stretch exponent | |
470 mov xh, xh, rrx @ retrieve sign bit | |
471 mov xl, r2, lsl #28 @ retrieve remaining bits | |
472 do_it ne, ttt | |
473 COND(and,s,ne) r3, r2, #0xff000000 @ isolate exponent | |
474 teqne r3, #0xff000000 @ if not 0, check if INF or NAN | |
475 eorne xh, xh, #0x38000000 @ fixup exponent otherwise. | |
476 RETc(ne) @ and return it. | |
477 | |
478 teq r2, #0 @ if actually 0 | |
479 do_it ne, e | |
480 teqne r3, #0xff000000 @ or INF or NAN | |
481 RETc(eq) @ we are done already. | |
482 | |
483 @ value was denormalized. We can normalize it now. | |
484 do_push {r4, r5, lr} | |
485 mov r4, #0x380 @ setup corresponding exponent | |
486 and r5, xh, #0x80000000 @ move sign bit in r5 | |
487 bic xh, xh, #0x80000000 | |
488 b LSYM(Lad_l) | |
489 | |
490 FUNC_END aeabi_f2d | |
491 FUNC_END extendsfdf2 | |
492 | |
493 ARM_FUNC_START floatundidf | |
494 ARM_FUNC_ALIAS aeabi_ul2d floatundidf | |
495 | |
496 orrs r2, r0, r1 | |
497 #if !defined (__VFP_FP__) && !defined(__SOFTFP__) | |
498 do_it eq, t | |
499 mvfeqd f0, #0.0 | |
500 #else | |
501 do_it eq | |
502 #endif | |
503 RETc(eq) | |
504 | |
505 #if !defined (__VFP_FP__) && !defined(__SOFTFP__) | |
506 @ For hard FPA code we want to return via the tail below so that | |
507 @ we can return the result in f0 as well as in r0/r1 for backwards | |
508 @ compatibility. | |
509 adr ip, LSYM(f0_ret) | |
510 @ Push pc as well so that RETLDM works correctly. | |
511 do_push {r4, r5, ip, lr, pc} | |
512 #else | |
513 do_push {r4, r5, lr} | |
514 #endif | |
515 | |
516 mov r5, #0 | |
517 b 2f | |
518 | |
519 ARM_FUNC_START floatdidf | |
520 ARM_FUNC_ALIAS aeabi_l2d floatdidf | |
521 | |
522 orrs r2, r0, r1 | |
523 #if !defined (__VFP_FP__) && !defined(__SOFTFP__) | |
524 do_it eq, t | |
525 mvfeqd f0, #0.0 | |
526 #else | |
527 do_it eq | |
528 #endif | |
529 RETc(eq) | |
530 | |
531 #if !defined (__VFP_FP__) && !defined(__SOFTFP__) | |
532 @ For hard FPA code we want to return via the tail below so that | |
533 @ we can return the result in f0 as well as in r0/r1 for backwards | |
534 @ compatibility. | |
535 adr ip, LSYM(f0_ret) | |
536 @ Push pc as well so that RETLDM works correctly. | |
537 do_push {r4, r5, ip, lr, pc} | |
538 #else | |
539 do_push {r4, r5, lr} | |
540 #endif | |
541 | |
542 ands r5, ah, #0x80000000 @ sign bit in r5 | |
543 bpl 2f | |
544 #if defined(__thumb2__) | |
545 negs al, al | |
546 sbc ah, ah, ah, lsl #1 | |
547 #else | |
548 rsbs al, al, #0 | |
549 rsc ah, ah, #0 | |
550 #endif | |
551 2: | |
552 mov r4, #0x400 @ initial exponent | |
553 add r4, r4, #(52-1 - 1) | |
554 | |
555 @ FPA little-endian: must swap the word order. | |
556 .ifnc xh, ah | |
557 mov ip, al | |
558 mov xh, ah | |
559 mov xl, ip | |
560 .endif | |
561 | |
562 movs ip, xh, lsr #22 | |
563 beq LSYM(Lad_p) | |
564 | |
565 @ The value is too big. Scale it down a bit... | |
566 mov r2, #3 | |
567 movs ip, ip, lsr #3 | |
568 do_it ne | |
569 addne r2, r2, #3 | |
570 movs ip, ip, lsr #3 | |
571 do_it ne | |
572 addne r2, r2, #3 | |
573 add r2, r2, ip, lsr #3 | |
574 | |
575 rsb r3, r2, #32 | |
576 shift1 lsl, ip, xl, r3 | |
577 shift1 lsr, xl, xl, r2 | |
578 shiftop orr xl xl xh lsl r3 lr | |
579 shift1 lsr, xh, xh, r2 | |
580 add r4, r4, r2 | |
581 b LSYM(Lad_p) | |
582 | |
583 #if !defined (__VFP_FP__) && !defined(__SOFTFP__) | |
584 | |
585 @ Legacy code expects the result to be returned in f0. Copy it | |
586 @ there as well. | |
587 LSYM(f0_ret): | |
588 do_push {r0, r1} | |
589 ldfd f0, [sp], #8 | |
590 RETLDM | |
591 | |
592 #endif | |
593 | |
594 FUNC_END floatdidf | |
595 FUNC_END aeabi_l2d | |
596 FUNC_END floatundidf | |
597 FUNC_END aeabi_ul2d | |
598 | |
599 #endif /* L_addsubdf3 */ | |
600 | |
601 #ifdef L_arm_muldivdf3 | |
602 | |
603 ARM_FUNC_START muldf3 | |
604 ARM_FUNC_ALIAS aeabi_dmul muldf3 | |
605 do_push {r4, r5, r6, lr} | |
606 | |
607 @ Mask out exponents, trap any zero/denormal/INF/NAN. | |
608 mov ip, #0xff | |
609 orr ip, ip, #0x700 | |
610 ands r4, ip, xh, lsr #20 | |
611 do_it ne, tte | |
612 COND(and,s,ne) r5, ip, yh, lsr #20 | |
613 teqne r4, ip | |
614 teqne r5, ip | |
615 bleq LSYM(Lml_s) | |
616 | |
617 @ Add exponents together | |
618 add r4, r4, r5 | |
619 | |
620 @ Determine final sign. | |
621 eor r6, xh, yh | |
622 | |
623 @ Convert mantissa to unsigned integer. | |
624 @ If power of two, branch to a separate path. | |
625 bic xh, xh, ip, lsl #21 | |
626 bic yh, yh, ip, lsl #21 | |
627 orrs r5, xl, xh, lsl #12 | |
628 do_it ne | |
629 COND(orr,s,ne) r5, yl, yh, lsl #12 | |
630 orr xh, xh, #0x00100000 | |
631 orr yh, yh, #0x00100000 | |
632 beq LSYM(Lml_1) | |
633 | |
634 #if __ARM_ARCH__ < 4 | |
635 | |
636 @ Put sign bit in r6, which will be restored in yl later. | |
637 and r6, r6, #0x80000000 | |
638 | |
639 @ Well, no way to make it shorter without the umull instruction. | |
640 stmfd sp!, {r6, r7, r8, r9, sl, fp} | |
641 mov r7, xl, lsr #16 | |
642 mov r8, yl, lsr #16 | |
643 mov r9, xh, lsr #16 | |
644 mov sl, yh, lsr #16 | |
645 bic xl, xl, r7, lsl #16 | |
646 bic yl, yl, r8, lsl #16 | |
647 bic xh, xh, r9, lsl #16 | |
648 bic yh, yh, sl, lsl #16 | |
649 mul ip, xl, yl | |
650 mul fp, xl, r8 | |
651 mov lr, #0 | |
652 adds ip, ip, fp, lsl #16 | |
653 adc lr, lr, fp, lsr #16 | |
654 mul fp, r7, yl | |
655 adds ip, ip, fp, lsl #16 | |
656 adc lr, lr, fp, lsr #16 | |
657 mul fp, xl, sl | |
658 mov r5, #0 | |
659 adds lr, lr, fp, lsl #16 | |
660 adc r5, r5, fp, lsr #16 | |
661 mul fp, r7, yh | |
662 adds lr, lr, fp, lsl #16 | |
663 adc r5, r5, fp, lsr #16 | |
664 mul fp, xh, r8 | |
665 adds lr, lr, fp, lsl #16 | |
666 adc r5, r5, fp, lsr #16 | |
667 mul fp, r9, yl | |
668 adds lr, lr, fp, lsl #16 | |
669 adc r5, r5, fp, lsr #16 | |
670 mul fp, xh, sl | |
671 mul r6, r9, sl | |
672 adds r5, r5, fp, lsl #16 | |
673 adc r6, r6, fp, lsr #16 | |
674 mul fp, r9, yh | |
675 adds r5, r5, fp, lsl #16 | |
676 adc r6, r6, fp, lsr #16 | |
677 mul fp, xl, yh | |
678 adds lr, lr, fp | |
679 mul fp, r7, sl | |
680 adcs r5, r5, fp | |
681 mul fp, xh, yl | |
682 adc r6, r6, #0 | |
683 adds lr, lr, fp | |
684 mul fp, r9, r8 | |
685 adcs r5, r5, fp | |
686 mul fp, r7, r8 | |
687 adc r6, r6, #0 | |
688 adds lr, lr, fp | |
689 mul fp, xh, yh | |
690 adcs r5, r5, fp | |
691 adc r6, r6, #0 | |
692 ldmfd sp!, {yl, r7, r8, r9, sl, fp} | |
693 | |
694 #else | |
695 | |
696 @ Here is the actual multiplication. | |
697 umull ip, lr, xl, yl | |
698 mov r5, #0 | |
699 umlal lr, r5, xh, yl | |
700 and yl, r6, #0x80000000 | |
701 umlal lr, r5, xl, yh | |
702 mov r6, #0 | |
703 umlal r5, r6, xh, yh | |
704 | |
705 #endif | |
706 | |
707 @ The LSBs in ip are only significant for the final rounding. | |
708 @ Fold them into lr. | |
709 teq ip, #0 | |
710 do_it ne | |
711 orrne lr, lr, #1 | |
712 | |
713 @ Adjust result upon the MSB position. | |
714 sub r4, r4, #0xff | |
715 cmp r6, #(1 << (20-11)) | |
716 sbc r4, r4, #0x300 | |
717 bcs 1f | |
718 movs lr, lr, lsl #1 | |
719 adcs r5, r5, r5 | |
720 adc r6, r6, r6 | |
721 1: | |
722 @ Shift to final position, add sign to result. | |
723 orr xh, yl, r6, lsl #11 | |
724 orr xh, xh, r5, lsr #21 | |
725 mov xl, r5, lsl #11 | |
726 orr xl, xl, lr, lsr #21 | |
727 mov lr, lr, lsl #11 | |
728 | |
729 @ Check exponent range for under/overflow. | |
730 subs ip, r4, #(254 - 1) | |
731 do_it hi | |
732 cmphi ip, #0x700 | |
733 bhi LSYM(Lml_u) | |
734 | |
735 @ Round the result, merge final exponent. | |
736 cmp lr, #0x80000000 | |
737 do_it eq | |
738 COND(mov,s,eq) lr, xl, lsr #1 | |
739 adcs xl, xl, #0 | |
740 adc xh, xh, r4, lsl #20 | |
741 RETLDM "r4, r5, r6" | |
742 | |
743 @ Multiplication by 0x1p*: let''s shortcut a lot of code. | |
744 LSYM(Lml_1): | |
745 and r6, r6, #0x80000000 | |
746 orr xh, r6, xh | |
747 orr xl, xl, yl | |
748 eor xh, xh, yh | |
749 subs r4, r4, ip, lsr #1 | |
750 do_it gt, tt | |
751 COND(rsb,s,gt) r5, r4, ip | |
752 orrgt xh, xh, r4, lsl #20 | |
753 RETLDM "r4, r5, r6" gt | |
754 | |
755 @ Under/overflow: fix things up for the code below. | |
756 orr xh, xh, #0x00100000 | |
757 mov lr, #0 | |
758 subs r4, r4, #1 | |
759 | |
760 LSYM(Lml_u): | |
761 @ Overflow? | |
762 bgt LSYM(Lml_o) | |
763 | |
764 @ Check if denormalized result is possible, otherwise return signed 0. | |
765 cmn r4, #(53 + 1) | |
766 do_it le, tt | |
767 movle xl, #0 | |
768 bicle xh, xh, #0x7fffffff | |
769 RETLDM "r4, r5, r6" le | |
770 | |
771 @ Find out proper shift value. | |
772 rsb r4, r4, #0 | |
773 subs r4, r4, #32 | |
774 bge 2f | |
775 adds r4, r4, #12 | |
776 bgt 1f | |
777 | |
778 @ shift result right of 1 to 20 bits, preserve sign bit, round, etc. | |
779 add r4, r4, #20 | |
780 rsb r5, r4, #32 | |
781 shift1 lsl, r3, xl, r5 | |
782 shift1 lsr, xl, xl, r4 | |
783 shiftop orr xl xl xh lsl r5 r2 | |
784 and r2, xh, #0x80000000 | |
785 bic xh, xh, #0x80000000 | |
786 adds xl, xl, r3, lsr #31 | |
787 shiftop adc xh r2 xh lsr r4 r6 | |
788 orrs lr, lr, r3, lsl #1 | |
789 do_it eq | |
790 biceq xl, xl, r3, lsr #31 | |
791 RETLDM "r4, r5, r6" | |
792 | |
793 @ shift result right of 21 to 31 bits, or left 11 to 1 bits after | |
794 @ a register switch from xh to xl. Then round. | |
795 1: rsb r4, r4, #12 | |
796 rsb r5, r4, #32 | |
797 shift1 lsl, r3, xl, r4 | |
798 shift1 lsr, xl, xl, r5 | |
799 shiftop orr xl xl xh lsl r4 r2 | |
800 bic xh, xh, #0x7fffffff | |
801 adds xl, xl, r3, lsr #31 | |
802 adc xh, xh, #0 | |
803 orrs lr, lr, r3, lsl #1 | |
804 do_it eq | |
805 biceq xl, xl, r3, lsr #31 | |
806 RETLDM "r4, r5, r6" | |
807 | |
808 @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch | |
809 @ from xh to xl. Leftover bits are in r3-r6-lr for rounding. | |
810 2: rsb r5, r4, #32 | |
811 shiftop orr lr lr xl lsl r5 r2 | |
812 shift1 lsr, r3, xl, r4 | |
813 shiftop orr r3 r3 xh lsl r5 r2 | |
814 shift1 lsr, xl, xh, r4 | |
815 bic xh, xh, #0x7fffffff | |
816 shiftop bic xl xl xh lsr r4 r2 | |
817 add xl, xl, r3, lsr #31 | |
818 orrs lr, lr, r3, lsl #1 | |
819 do_it eq | |
820 biceq xl, xl, r3, lsr #31 | |
821 RETLDM "r4, r5, r6" | |
822 | |
823 @ One or both arguments are denormalized. | |
824 @ Scale them leftwards and preserve sign bit. | |
825 LSYM(Lml_d): | |
826 teq r4, #0 | |
827 bne 2f | |
828 and r6, xh, #0x80000000 | |
829 1: movs xl, xl, lsl #1 | |
830 adc xh, xh, xh | |
831 tst xh, #0x00100000 | |
832 do_it eq | |
833 subeq r4, r4, #1 | |
834 beq 1b | |
835 orr xh, xh, r6 | |
836 teq r5, #0 | |
837 do_it ne | |
838 RETc(ne) | |
839 2: and r6, yh, #0x80000000 | |
840 3: movs yl, yl, lsl #1 | |
841 adc yh, yh, yh | |
842 tst yh, #0x00100000 | |
843 do_it eq | |
844 subeq r5, r5, #1 | |
845 beq 3b | |
846 orr yh, yh, r6 | |
847 RET | |
848 | |
849 LSYM(Lml_s): | |
850 @ Isolate the INF and NAN cases away | |
851 teq r4, ip | |
852 and r5, ip, yh, lsr #20 | |
853 do_it ne | |
854 teqne r5, ip | |
855 beq 1f | |
856 | |
857 @ Here, one or more arguments are either denormalized or zero. | |
858 orrs r6, xl, xh, lsl #1 | |
859 do_it ne | |
860 COND(orr,s,ne) r6, yl, yh, lsl #1 | |
861 bne LSYM(Lml_d) | |
862 | |
863 @ Result is 0, but determine sign anyway. | |
864 LSYM(Lml_z): | |
865 eor xh, xh, yh | |
866 and xh, xh, #0x80000000 | |
867 mov xl, #0 | |
868 RETLDM "r4, r5, r6" | |
869 | |
870 1: @ One or both args are INF or NAN. | |
871 orrs r6, xl, xh, lsl #1 | |
872 do_it eq, te | |
873 moveq xl, yl | |
874 moveq xh, yh | |
875 COND(orr,s,ne) r6, yl, yh, lsl #1 | |
876 beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN | |
877 teq r4, ip | |
878 bne 1f | |
879 orrs r6, xl, xh, lsl #12 | |
880 bne LSYM(Lml_n) @ NAN * <anything> -> NAN | |
881 1: teq r5, ip | |
882 bne LSYM(Lml_i) | |
883 orrs r6, yl, yh, lsl #12 | |
884 do_it ne, t | |
885 movne xl, yl | |
886 movne xh, yh | |
887 bne LSYM(Lml_n) @ <anything> * NAN -> NAN | |
888 | |
889 @ Result is INF, but we need to determine its sign. | |
890 LSYM(Lml_i): | |
891 eor xh, xh, yh | |
892 | |
893 @ Overflow: return INF (sign already in xh). | |
894 LSYM(Lml_o): | |
895 and xh, xh, #0x80000000 | |
896 orr xh, xh, #0x7f000000 | |
897 orr xh, xh, #0x00f00000 | |
898 mov xl, #0 | |
899 RETLDM "r4, r5, r6" | |
900 | |
901 @ Return a quiet NAN. | |
902 LSYM(Lml_n): | |
903 orr xh, xh, #0x7f000000 | |
904 orr xh, xh, #0x00f80000 | |
905 RETLDM "r4, r5, r6" | |
906 | |
907 FUNC_END aeabi_dmul | |
908 FUNC_END muldf3 | |
909 | |
910 ARM_FUNC_START divdf3 | |
911 ARM_FUNC_ALIAS aeabi_ddiv divdf3 | |
912 | |
913 do_push {r4, r5, r6, lr} | |
914 | |
915 @ Mask out exponents, trap any zero/denormal/INF/NAN. | |
916 mov ip, #0xff | |
917 orr ip, ip, #0x700 | |
918 ands r4, ip, xh, lsr #20 | |
919 do_it ne, tte | |
920 COND(and,s,ne) r5, ip, yh, lsr #20 | |
921 teqne r4, ip | |
922 teqne r5, ip | |
923 bleq LSYM(Ldv_s) | |
924 | |
925 @ Substract divisor exponent from dividend''s. | |
926 sub r4, r4, r5 | |
927 | |
928 @ Preserve final sign into lr. | |
929 eor lr, xh, yh | |
930 | |
931 @ Convert mantissa to unsigned integer. | |
932 @ Dividend -> r5-r6, divisor -> yh-yl. | |
933 orrs r5, yl, yh, lsl #12 | |
934 mov xh, xh, lsl #12 | |
935 beq LSYM(Ldv_1) | |
936 mov yh, yh, lsl #12 | |
937 mov r5, #0x10000000 | |
938 orr yh, r5, yh, lsr #4 | |
939 orr yh, yh, yl, lsr #24 | |
940 mov yl, yl, lsl #8 | |
941 orr r5, r5, xh, lsr #4 | |
942 orr r5, r5, xl, lsr #24 | |
943 mov r6, xl, lsl #8 | |
944 | |
945 @ Initialize xh with final sign bit. | |
946 and xh, lr, #0x80000000 | |
947 | |
948 @ Ensure result will land to known bit position. | |
949 @ Apply exponent bias accordingly. | |
950 cmp r5, yh | |
951 do_it eq | |
952 cmpeq r6, yl | |
953 adc r4, r4, #(255 - 2) | |
954 add r4, r4, #0x300 | |
955 bcs 1f | |
956 movs yh, yh, lsr #1 | |
957 mov yl, yl, rrx | |
958 1: | |
959 @ Perform first substraction to align result to a nibble. | |
960 subs r6, r6, yl | |
961 sbc r5, r5, yh | |
962 movs yh, yh, lsr #1 | |
963 mov yl, yl, rrx | |
964 mov xl, #0x00100000 | |
965 mov ip, #0x00080000 | |
966 | |
967 @ The actual division loop. | |
968 1: subs lr, r6, yl | |
969 sbcs lr, r5, yh | |
970 do_it cs, tt | |
971 subcs r6, r6, yl | |
972 movcs r5, lr | |
973 orrcs xl, xl, ip | |
974 movs yh, yh, lsr #1 | |
975 mov yl, yl, rrx | |
976 subs lr, r6, yl | |
977 sbcs lr, r5, yh | |
978 do_it cs, tt | |
979 subcs r6, r6, yl | |
980 movcs r5, lr | |
981 orrcs xl, xl, ip, lsr #1 | |
982 movs yh, yh, lsr #1 | |
983 mov yl, yl, rrx | |
984 subs lr, r6, yl | |
985 sbcs lr, r5, yh | |
986 do_it cs, tt | |
987 subcs r6, r6, yl | |
988 movcs r5, lr | |
989 orrcs xl, xl, ip, lsr #2 | |
990 movs yh, yh, lsr #1 | |
991 mov yl, yl, rrx | |
992 subs lr, r6, yl | |
993 sbcs lr, r5, yh | |
994 do_it cs, tt | |
995 subcs r6, r6, yl | |
996 movcs r5, lr | |
997 orrcs xl, xl, ip, lsr #3 | |
998 | |
999 orrs lr, r5, r6 | |
1000 beq 2f | |
1001 mov r5, r5, lsl #4 | |
1002 orr r5, r5, r6, lsr #28 | |
1003 mov r6, r6, lsl #4 | |
1004 mov yh, yh, lsl #3 | |
1005 orr yh, yh, yl, lsr #29 | |
1006 mov yl, yl, lsl #3 | |
1007 movs ip, ip, lsr #4 | |
1008 bne 1b | |
1009 | |
1010 @ We are done with a word of the result. | |
1011 @ Loop again for the low word if this pass was for the high word. | |
1012 tst xh, #0x00100000 | |
1013 bne 3f | |
1014 orr xh, xh, xl | |
1015 mov xl, #0 | |
1016 mov ip, #0x80000000 | |
1017 b 1b | |
1018 2: | |
1019 @ Be sure result starts in the high word. | |
1020 tst xh, #0x00100000 | |
1021 do_it eq, t | |
1022 orreq xh, xh, xl | |
1023 moveq xl, #0 | |
1024 3: | |
1025 @ Check exponent range for under/overflow. | |
1026 subs ip, r4, #(254 - 1) | |
1027 do_it hi | |
1028 cmphi ip, #0x700 | |
1029 bhi LSYM(Lml_u) | |
1030 | |
1031 @ Round the result, merge final exponent. | |
1032 subs ip, r5, yh | |
1033 do_it eq, t | |
1034 COND(sub,s,eq) ip, r6, yl | |
1035 COND(mov,s,eq) ip, xl, lsr #1 | |
1036 adcs xl, xl, #0 | |
1037 adc xh, xh, r4, lsl #20 | |
1038 RETLDM "r4, r5, r6" | |
1039 | |
1040 @ Division by 0x1p*: shortcut a lot of code. | |
1041 LSYM(Ldv_1): | |
1042 and lr, lr, #0x80000000 | |
1043 orr xh, lr, xh, lsr #12 | |
1044 adds r4, r4, ip, lsr #1 | |
1045 do_it gt, tt | |
1046 COND(rsb,s,gt) r5, r4, ip | |
1047 orrgt xh, xh, r4, lsl #20 | |
1048 RETLDM "r4, r5, r6" gt | |
1049 | |
1050 orr xh, xh, #0x00100000 | |
1051 mov lr, #0 | |
1052 subs r4, r4, #1 | |
1053 b LSYM(Lml_u) | |
1054 | |
1055 @ Result mightt need to be denormalized: put remainder bits | |
1056 @ in lr for rounding considerations. | |
1057 LSYM(Ldv_u): | |
1058 orr lr, r5, r6 | |
1059 b LSYM(Lml_u) | |
1060 | |
1061 @ One or both arguments is either INF, NAN or zero. | |
1062 LSYM(Ldv_s): | |
1063 and r5, ip, yh, lsr #20 | |
1064 teq r4, ip | |
1065 do_it eq | |
1066 teqeq r5, ip | |
1067 beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN | |
1068 teq r4, ip | |
1069 bne 1f | |
1070 orrs r4, xl, xh, lsl #12 | |
1071 bne LSYM(Lml_n) @ NAN / <anything> -> NAN | |
1072 teq r5, ip | |
1073 bne LSYM(Lml_i) @ INF / <anything> -> INF | |
1074 mov xl, yl | |
1075 mov xh, yh | |
1076 b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN | |
1077 1: teq r5, ip | |
1078 bne 2f | |
1079 orrs r5, yl, yh, lsl #12 | |
1080 beq LSYM(Lml_z) @ <anything> / INF -> 0 | |
1081 mov xl, yl | |
1082 mov xh, yh | |
1083 b LSYM(Lml_n) @ <anything> / NAN -> NAN | |
1084 2: @ If both are nonzero, we need to normalize and resume above. | |
1085 orrs r6, xl, xh, lsl #1 | |
1086 do_it ne | |
1087 COND(orr,s,ne) r6, yl, yh, lsl #1 | |
1088 bne LSYM(Lml_d) | |
1089 @ One or both arguments are 0. | |
1090 orrs r4, xl, xh, lsl #1 | |
1091 bne LSYM(Lml_i) @ <non_zero> / 0 -> INF | |
1092 orrs r5, yl, yh, lsl #1 | |
1093 bne LSYM(Lml_z) @ 0 / <non_zero> -> 0 | |
1094 b LSYM(Lml_n) @ 0 / 0 -> NAN | |
1095 | |
1096 FUNC_END aeabi_ddiv | |
1097 FUNC_END divdf3 | |
1098 | |
1099 #endif /* L_muldivdf3 */ | |
1100 | |
1101 #ifdef L_arm_cmpdf2 | |
1102 | |
1103 @ Note: only r0 (return value) and ip are clobbered here. | |
1104 | |
1105 ARM_FUNC_START gtdf2 | |
1106 ARM_FUNC_ALIAS gedf2 gtdf2 | |
1107 mov ip, #-1 | |
1108 b 1f | |
1109 | |
1110 ARM_FUNC_START ltdf2 | |
1111 ARM_FUNC_ALIAS ledf2 ltdf2 | |
1112 mov ip, #1 | |
1113 b 1f | |
1114 | |
1115 ARM_FUNC_START cmpdf2 | |
1116 ARM_FUNC_ALIAS nedf2 cmpdf2 | |
1117 ARM_FUNC_ALIAS eqdf2 cmpdf2 | |
1118 mov ip, #1 @ how should we specify unordered here? | |
1119 | |
47
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1120 1: str ip, [sp, #-4]! |
0 | 1121 |
1122 @ Trap any INF/NAN first. | |
1123 mov ip, xh, lsl #1 | |
1124 mvns ip, ip, asr #21 | |
1125 mov ip, yh, lsl #1 | |
1126 do_it ne | |
1127 COND(mvn,s,ne) ip, ip, asr #21 | |
1128 beq 3f | |
1129 | |
1130 @ Test for equality. | |
1131 @ Note that 0.0 is equal to -0.0. | |
47
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1132 2: add sp, sp, #4 |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1133 orrs ip, xl, xh, lsl #1 @ if x == 0.0 or -0.0 |
0 | 1134 do_it eq, e |
1135 COND(orr,s,eq) ip, yl, yh, lsl #1 @ and y == 0.0 or -0.0 | |
1136 teqne xh, yh @ or xh == yh | |
1137 do_it eq, tt | |
1138 teqeq xl, yl @ and xl == yl | |
1139 moveq r0, #0 @ then equal. | |
1140 RETc(eq) | |
1141 | |
1142 @ Clear C flag | |
1143 cmn r0, #0 | |
1144 | |
1145 @ Compare sign, | |
1146 teq xh, yh | |
1147 | |
1148 @ Compare values if same sign | |
1149 do_it pl | |
1150 cmppl xh, yh | |
1151 do_it eq | |
1152 cmpeq xl, yl | |
1153 | |
1154 @ Result: | |
1155 do_it cs, e | |
1156 movcs r0, yh, asr #31 | |
1157 mvncc r0, yh, asr #31 | |
1158 orr r0, r0, #1 | |
1159 RET | |
1160 | |
1161 @ Look for a NAN. | |
1162 3: mov ip, xh, lsl #1 | |
1163 mvns ip, ip, asr #21 | |
1164 bne 4f | |
1165 orrs ip, xl, xh, lsl #12 | |
1166 bne 5f @ x is NAN | |
1167 4: mov ip, yh, lsl #1 | |
1168 mvns ip, ip, asr #21 | |
1169 bne 2b | |
1170 orrs ip, yl, yh, lsl #12 | |
1171 beq 2b @ y is not NAN | |
47
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1172 5: ldr r0, [sp], #4 @ unordered return code |
0 | 1173 RET |
1174 | |
1175 FUNC_END gedf2 | |
1176 FUNC_END gtdf2 | |
1177 FUNC_END ledf2 | |
1178 FUNC_END ltdf2 | |
1179 FUNC_END nedf2 | |
1180 FUNC_END eqdf2 | |
1181 FUNC_END cmpdf2 | |
1182 | |
1183 ARM_FUNC_START aeabi_cdrcmple | |
1184 | |
1185 mov ip, r0 | |
1186 mov r0, r2 | |
1187 mov r2, ip | |
1188 mov ip, r1 | |
1189 mov r1, r3 | |
1190 mov r3, ip | |
1191 b 6f | |
1192 | |
1193 ARM_FUNC_START aeabi_cdcmpeq | |
1194 ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq | |
1195 | |
1196 @ The status-returning routines are required to preserve all | |
1197 @ registers except ip, lr, and cpsr. | |
1198 6: do_push {r0, lr} | |
1199 ARM_CALL cmpdf2 | |
1200 @ Set the Z flag correctly, and the C flag unconditionally. | |
1201 cmp r0, #0 | |
1202 @ Clear the C flag if the return value was -1, indicating | |
1203 @ that the first operand was smaller than the second. | |
1204 do_it mi | |
1205 cmnmi r0, #0 | |
1206 RETLDM "r0" | |
1207 | |
1208 FUNC_END aeabi_cdcmple | |
1209 FUNC_END aeabi_cdcmpeq | |
1210 FUNC_END aeabi_cdrcmple | |
1211 | |
1212 ARM_FUNC_START aeabi_dcmpeq | |
1213 | |
1214 str lr, [sp, #-8]! | |
1215 ARM_CALL aeabi_cdcmple | |
1216 do_it eq, e | |
1217 moveq r0, #1 @ Equal to. | |
1218 movne r0, #0 @ Less than, greater than, or unordered. | |
1219 RETLDM | |
1220 | |
1221 FUNC_END aeabi_dcmpeq | |
1222 | |
1223 ARM_FUNC_START aeabi_dcmplt | |
1224 | |
1225 str lr, [sp, #-8]! | |
1226 ARM_CALL aeabi_cdcmple | |
1227 do_it cc, e | |
1228 movcc r0, #1 @ Less than. | |
1229 movcs r0, #0 @ Equal to, greater than, or unordered. | |
1230 RETLDM | |
1231 | |
1232 FUNC_END aeabi_dcmplt | |
1233 | |
1234 ARM_FUNC_START aeabi_dcmple | |
1235 | |
1236 str lr, [sp, #-8]! | |
1237 ARM_CALL aeabi_cdcmple | |
1238 do_it ls, e | |
1239 movls r0, #1 @ Less than or equal to. | |
1240 movhi r0, #0 @ Greater than or unordered. | |
1241 RETLDM | |
1242 | |
1243 FUNC_END aeabi_dcmple | |
1244 | |
1245 ARM_FUNC_START aeabi_dcmpge | |
1246 | |
1247 str lr, [sp, #-8]! | |
1248 ARM_CALL aeabi_cdrcmple | |
1249 do_it ls, e | |
1250 movls r0, #1 @ Operand 2 is less than or equal to operand 1. | |
1251 movhi r0, #0 @ Operand 2 greater than operand 1, or unordered. | |
1252 RETLDM | |
1253 | |
1254 FUNC_END aeabi_dcmpge | |
1255 | |
1256 ARM_FUNC_START aeabi_dcmpgt | |
1257 | |
1258 str lr, [sp, #-8]! | |
1259 ARM_CALL aeabi_cdrcmple | |
1260 do_it cc, e | |
1261 movcc r0, #1 @ Operand 2 is less than operand 1. | |
1262 movcs r0, #0 @ Operand 2 is greater than or equal to operand 1, | |
1263 @ or they are unordered. | |
1264 RETLDM | |
1265 | |
1266 FUNC_END aeabi_dcmpgt | |
1267 | |
1268 #endif /* L_cmpdf2 */ | |
1269 | |
1270 #ifdef L_arm_unorddf2 | |
1271 | |
1272 ARM_FUNC_START unorddf2 | |
1273 ARM_FUNC_ALIAS aeabi_dcmpun unorddf2 | |
1274 | |
1275 mov ip, xh, lsl #1 | |
1276 mvns ip, ip, asr #21 | |
1277 bne 1f | |
1278 orrs ip, xl, xh, lsl #12 | |
1279 bne 3f @ x is NAN | |
1280 1: mov ip, yh, lsl #1 | |
1281 mvns ip, ip, asr #21 | |
1282 bne 2f | |
1283 orrs ip, yl, yh, lsl #12 | |
1284 bne 3f @ y is NAN | |
1285 2: mov r0, #0 @ arguments are ordered. | |
1286 RET | |
1287 | |
1288 3: mov r0, #1 @ arguments are unordered. | |
1289 RET | |
1290 | |
1291 FUNC_END aeabi_dcmpun | |
1292 FUNC_END unorddf2 | |
1293 | |
1294 #endif /* L_unorddf2 */ | |
1295 | |
1296 #ifdef L_arm_fixdfsi | |
1297 | |
1298 ARM_FUNC_START fixdfsi | |
1299 ARM_FUNC_ALIAS aeabi_d2iz fixdfsi | |
1300 | |
1301 @ check exponent range. | |
1302 mov r2, xh, lsl #1 | |
1303 adds r2, r2, #(1 << 21) | |
1304 bcs 2f @ value is INF or NAN | |
1305 bpl 1f @ value is too small | |
1306 mov r3, #(0xfffffc00 + 31) | |
1307 subs r2, r3, r2, asr #21 | |
1308 bls 3f @ value is too large | |
1309 | |
1310 @ scale value | |
1311 mov r3, xh, lsl #11 | |
1312 orr r3, r3, #0x80000000 | |
1313 orr r3, r3, xl, lsr #21 | |
1314 tst xh, #0x80000000 @ the sign bit | |
1315 shift1 lsr, r0, r3, r2 | |
1316 do_it ne | |
1317 rsbne r0, r0, #0 | |
1318 RET | |
1319 | |
1320 1: mov r0, #0 | |
1321 RET | |
1322 | |
1323 2: orrs xl, xl, xh, lsl #12 | |
1324 bne 4f @ x is NAN. | |
1325 3: ands r0, xh, #0x80000000 @ the sign bit | |
1326 do_it eq | |
1327 moveq r0, #0x7fffffff @ maximum signed positive si | |
1328 RET | |
1329 | |
1330 4: mov r0, #0 @ How should we convert NAN? | |
1331 RET | |
1332 | |
1333 FUNC_END aeabi_d2iz | |
1334 FUNC_END fixdfsi | |
1335 | |
1336 #endif /* L_fixdfsi */ | |
1337 | |
1338 #ifdef L_arm_fixunsdfsi | |
1339 | |
1340 ARM_FUNC_START fixunsdfsi | |
1341 ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi | |
1342 | |
1343 @ check exponent range. | |
1344 movs r2, xh, lsl #1 | |
1345 bcs 1f @ value is negative | |
1346 adds r2, r2, #(1 << 21) | |
1347 bcs 2f @ value is INF or NAN | |
1348 bpl 1f @ value is too small | |
1349 mov r3, #(0xfffffc00 + 31) | |
1350 subs r2, r3, r2, asr #21 | |
1351 bmi 3f @ value is too large | |
1352 | |
1353 @ scale value | |
1354 mov r3, xh, lsl #11 | |
1355 orr r3, r3, #0x80000000 | |
1356 orr r3, r3, xl, lsr #21 | |
1357 shift1 lsr, r0, r3, r2 | |
1358 RET | |
1359 | |
1360 1: mov r0, #0 | |
1361 RET | |
1362 | |
1363 2: orrs xl, xl, xh, lsl #12 | |
1364 bne 4f @ value is NAN. | |
1365 3: mov r0, #0xffffffff @ maximum unsigned si | |
1366 RET | |
1367 | |
1368 4: mov r0, #0 @ How should we convert NAN? | |
1369 RET | |
1370 | |
1371 FUNC_END aeabi_d2uiz | |
1372 FUNC_END fixunsdfsi | |
1373 | |
1374 #endif /* L_fixunsdfsi */ | |
1375 | |
1376 #ifdef L_arm_truncdfsf2 | |
1377 | |
1378 ARM_FUNC_START truncdfsf2 | |
1379 ARM_FUNC_ALIAS aeabi_d2f truncdfsf2 | |
1380 | |
1381 @ check exponent range. | |
1382 mov r2, xh, lsl #1 | |
1383 subs r3, r2, #((1023 - 127) << 21) | |
1384 do_it cs, t | |
1385 COND(sub,s,cs) ip, r3, #(1 << 21) | |
1386 COND(rsb,s,cs) ip, ip, #(254 << 21) | |
1387 bls 2f @ value is out of range | |
1388 | |
1389 1: @ shift and round mantissa | |
1390 and ip, xh, #0x80000000 | |
1391 mov r2, xl, lsl #3 | |
1392 orr xl, ip, xl, lsr #29 | |
1393 cmp r2, #0x80000000 | |
1394 adc r0, xl, r3, lsl #2 | |
1395 do_it eq | |
1396 biceq r0, r0, #1 | |
1397 RET | |
1398 | |
1399 2: @ either overflow or underflow | |
1400 tst xh, #0x40000000 | |
1401 bne 3f @ overflow | |
1402 | |
1403 @ check if denormalized value is possible | |
1404 adds r2, r3, #(23 << 21) | |
1405 do_it lt, t | |
1406 andlt r0, xh, #0x80000000 @ too small, return signed 0. | |
1407 RETc(lt) | |
1408 | |
1409 @ denormalize value so we can resume with the code above afterwards. | |
1410 orr xh, xh, #0x00100000 | |
1411 mov r2, r2, lsr #21 | |
1412 rsb r2, r2, #24 | |
1413 rsb ip, r2, #32 | |
1414 #if defined(__thumb2__) | |
1415 lsls r3, xl, ip | |
1416 #else | |
1417 movs r3, xl, lsl ip | |
1418 #endif | |
1419 shift1 lsr, xl, xl, r2 | |
1420 do_it ne | |
1421 orrne xl, xl, #1 @ fold r3 for rounding considerations. | |
1422 mov r3, xh, lsl #11 | |
1423 mov r3, r3, lsr #11 | |
1424 shiftop orr xl xl r3 lsl ip ip | |
1425 shift1 lsr, r3, r3, r2 | |
1426 mov r3, r3, lsl #1 | |
1427 b 1b | |
1428 | |
1429 3: @ chech for NAN | |
1430 mvns r3, r2, asr #21 | |
1431 bne 5f @ simple overflow | |
1432 orrs r3, xl, xh, lsl #12 | |
1433 do_it ne, tt | |
1434 movne r0, #0x7f000000 | |
1435 orrne r0, r0, #0x00c00000 | |
1436 RETc(ne) @ return NAN | |
1437 | |
1438 5: @ return INF with sign | |
1439 and r0, xh, #0x80000000 | |
1440 orr r0, r0, #0x7f000000 | |
1441 orr r0, r0, #0x00800000 | |
1442 RET | |
1443 | |
1444 FUNC_END aeabi_d2f | |
1445 FUNC_END truncdfsf2 | |
1446 | |
1447 #endif /* L_truncdfsf2 */ |