Mercurial > hg > CbC > CbC_gcc
annotate gcc/config/arm/ieee754-sf.S @ 47:3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 07 Feb 2010 17:44:34 +0900 |
parents | a06113de4d67 |
children |
rev | line source |
---|---|
0 | 1 /* ieee754-sf.S single-precision floating point support for ARM |
2 | |
3 Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc. | |
4 Contributed by Nicolas Pitre (nico@cam.org) | |
5 | |
6 This file is free software; you can redistribute it and/or modify it | |
7 under the terms of the GNU General Public License as published by the | |
8 Free Software Foundation; either version 3, or (at your option) any | |
9 later version. | |
10 | |
11 This file is distributed in the hope that it will be useful, but | |
12 WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 General Public License for more details. | |
15 | |
16 Under Section 7 of GPL version 3, you are granted additional | |
17 permissions described in the GCC Runtime Library Exception, version | |
18 3.1, as published by the Free Software Foundation. | |
19 | |
20 You should have received a copy of the GNU General Public License and | |
21 a copy of the GCC Runtime Library Exception along with this program; | |
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 <http://www.gnu.org/licenses/>. */ | |
24 | |
25 /* | |
26 * Notes: | |
27 * | |
28 * The goal of this code is to be as fast as possible. This is | |
29 * not meant to be easy to understand for the casual reader. | |
30 * | |
31 * Only the default rounding mode is intended for best performances. | |
32 * Exceptions aren't supported yet, but that can be added quite easily | |
33 * if necessary without impacting performances. | |
34 */ | |
35 | |
36 #ifdef L_arm_negsf2 | |
37 | |
38 ARM_FUNC_START negsf2 | |
39 ARM_FUNC_ALIAS aeabi_fneg negsf2 | |
40 | |
41 eor r0, r0, #0x80000000 @ flip sign bit | |
42 RET | |
43 | |
44 FUNC_END aeabi_fneg | |
45 FUNC_END negsf2 | |
46 | |
47 #endif | |
48 | |
49 #ifdef L_arm_addsubsf3 | |
50 | |
51 ARM_FUNC_START aeabi_frsub | |
52 | |
53 eor r0, r0, #0x80000000 @ flip sign bit of first arg | |
54 b 1f | |
55 | |
56 ARM_FUNC_START subsf3 | |
57 ARM_FUNC_ALIAS aeabi_fsub subsf3 | |
58 | |
59 eor r1, r1, #0x80000000 @ flip sign bit of second arg | |
60 #if defined(__INTERWORKING_STUBS__) | |
61 b 1f @ Skip Thumb-code prologue | |
62 #endif | |
63 | |
64 ARM_FUNC_START addsf3 | |
65 ARM_FUNC_ALIAS aeabi_fadd addsf3 | |
66 | |
67 1: @ Look for zeroes, equal values, INF, or NAN. | |
68 movs r2, r0, lsl #1 | |
69 do_it ne, ttt | |
70 COND(mov,s,ne) r3, r1, lsl #1 | |
71 teqne r2, r3 | |
72 COND(mvn,s,ne) ip, r2, asr #24 | |
73 COND(mvn,s,ne) ip, r3, asr #24 | |
74 beq LSYM(Lad_s) | |
75 | |
76 @ Compute exponent difference. Make largest exponent in r2, | |
77 @ corresponding arg in r0, and positive exponent difference in r3. | |
78 mov r2, r2, lsr #24 | |
79 rsbs r3, r2, r3, lsr #24 | |
80 do_it gt, ttt | |
81 addgt r2, r2, r3 | |
82 eorgt r1, r0, r1 | |
83 eorgt r0, r1, r0 | |
84 eorgt r1, r0, r1 | |
85 do_it lt | |
86 rsblt r3, r3, #0 | |
87 | |
88 @ If exponent difference is too large, return largest argument | |
89 @ already in r0. We need up to 25 bit to handle proper rounding | |
90 @ of 0x1p25 - 1.1. | |
91 cmp r3, #25 | |
92 do_it hi | |
93 RETc(hi) | |
94 | |
95 @ Convert mantissa to signed integer. | |
96 tst r0, #0x80000000 | |
97 orr r0, r0, #0x00800000 | |
98 bic r0, r0, #0xff000000 | |
99 do_it ne | |
100 rsbne r0, r0, #0 | |
101 tst r1, #0x80000000 | |
102 orr r1, r1, #0x00800000 | |
103 bic r1, r1, #0xff000000 | |
104 do_it ne | |
105 rsbne r1, r1, #0 | |
106 | |
107 @ If exponent == difference, one or both args were denormalized. | |
108 @ Since this is not common case, rescale them off line. | |
109 teq r2, r3 | |
110 beq LSYM(Lad_d) | |
111 LSYM(Lad_x): | |
112 | |
113 @ Compensate for the exponent overlapping the mantissa MSB added later | |
114 sub r2, r2, #1 | |
115 | |
116 @ Shift and add second arg to first arg in r0. | |
117 @ Keep leftover bits into r1. | |
118 shiftop adds r0 r0 r1 asr r3 ip | |
119 rsb r3, r3, #32 | |
120 shift1 lsl, r1, r1, r3 | |
121 | |
122 @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above) | |
123 and r3, r0, #0x80000000 | |
124 bpl LSYM(Lad_p) | |
125 #if defined(__thumb2__) | |
126 negs r1, r1 | |
127 sbc r0, r0, r0, lsl #1 | |
128 #else | |
129 rsbs r1, r1, #0 | |
130 rsc r0, r0, #0 | |
131 #endif | |
132 | |
133 @ Determine how to normalize the result. | |
134 LSYM(Lad_p): | |
135 cmp r0, #0x00800000 | |
136 bcc LSYM(Lad_a) | |
137 cmp r0, #0x01000000 | |
138 bcc LSYM(Lad_e) | |
139 | |
140 @ Result needs to be shifted right. | |
141 movs r0, r0, lsr #1 | |
142 mov r1, r1, rrx | |
143 add r2, r2, #1 | |
144 | |
145 @ Make sure we did not bust our exponent. | |
146 cmp r2, #254 | |
147 bhs LSYM(Lad_o) | |
148 | |
149 @ Our result is now properly aligned into r0, remaining bits in r1. | |
150 @ Pack final result together. | |
151 @ Round with MSB of r1. If halfway between two numbers, round towards | |
152 @ LSB of r0 = 0. | |
153 LSYM(Lad_e): | |
154 cmp r1, #0x80000000 | |
155 adc r0, r0, r2, lsl #23 | |
156 do_it eq | |
157 biceq r0, r0, #1 | |
158 orr r0, r0, r3 | |
159 RET | |
160 | |
161 @ Result must be shifted left and exponent adjusted. | |
162 LSYM(Lad_a): | |
163 movs r1, r1, lsl #1 | |
164 adc r0, r0, r0 | |
165 tst r0, #0x00800000 | |
166 sub r2, r2, #1 | |
167 bne LSYM(Lad_e) | |
168 | |
169 @ No rounding necessary since r1 will always be 0 at this point. | |
170 LSYM(Lad_l): | |
171 | |
172 #if __ARM_ARCH__ < 5 | |
173 | |
174 movs ip, r0, lsr #12 | |
175 moveq r0, r0, lsl #12 | |
176 subeq r2, r2, #12 | |
177 tst r0, #0x00ff0000 | |
178 moveq r0, r0, lsl #8 | |
179 subeq r2, r2, #8 | |
180 tst r0, #0x00f00000 | |
181 moveq r0, r0, lsl #4 | |
182 subeq r2, r2, #4 | |
183 tst r0, #0x00c00000 | |
184 moveq r0, r0, lsl #2 | |
185 subeq r2, r2, #2 | |
186 cmp r0, #0x00800000 | |
187 movcc r0, r0, lsl #1 | |
188 sbcs r2, r2, #0 | |
189 | |
190 #else | |
191 | |
192 clz ip, r0 | |
193 sub ip, ip, #8 | |
194 subs r2, r2, ip | |
195 shift1 lsl, r0, r0, ip | |
196 | |
197 #endif | |
198 | |
199 @ Final result with sign | |
200 @ If exponent negative, denormalize result. | |
201 do_it ge, et | |
202 addge r0, r0, r2, lsl #23 | |
203 rsblt r2, r2, #0 | |
204 orrge r0, r0, r3 | |
205 #if defined(__thumb2__) | |
206 do_it lt, t | |
207 lsrlt r0, r0, r2 | |
208 orrlt r0, r3, r0 | |
209 #else | |
210 orrlt r0, r3, r0, lsr r2 | |
211 #endif | |
212 RET | |
213 | |
214 @ Fixup and adjust bit position for denormalized arguments. | |
215 @ Note that r2 must not remain equal to 0. | |
216 LSYM(Lad_d): | |
217 teq r2, #0 | |
218 eor r1, r1, #0x00800000 | |
219 do_it eq, te | |
220 eoreq r0, r0, #0x00800000 | |
221 addeq r2, r2, #1 | |
222 subne r3, r3, #1 | |
223 b LSYM(Lad_x) | |
224 | |
225 LSYM(Lad_s): | |
226 mov r3, r1, lsl #1 | |
227 | |
228 mvns ip, r2, asr #24 | |
229 do_it ne | |
230 COND(mvn,s,ne) ip, r3, asr #24 | |
231 beq LSYM(Lad_i) | |
232 | |
233 teq r2, r3 | |
234 beq 1f | |
235 | |
236 @ Result is x + 0.0 = x or 0.0 + y = y. | |
237 teq r2, #0 | |
238 do_it eq | |
239 moveq r0, r1 | |
240 RET | |
241 | |
242 1: teq r0, r1 | |
243 | |
244 @ Result is x - x = 0. | |
245 do_it ne, t | |
246 movne r0, #0 | |
247 RETc(ne) | |
248 | |
249 @ Result is x + x = 2x. | |
250 tst r2, #0xff000000 | |
251 bne 2f | |
252 movs r0, r0, lsl #1 | |
253 do_it cs | |
254 orrcs r0, r0, #0x80000000 | |
255 RET | |
256 2: adds r2, r2, #(2 << 24) | |
257 do_it cc, t | |
258 addcc r0, r0, #(1 << 23) | |
259 RETc(cc) | |
260 and r3, r0, #0x80000000 | |
261 | |
262 @ Overflow: return INF. | |
263 LSYM(Lad_o): | |
264 orr r0, r3, #0x7f000000 | |
265 orr r0, r0, #0x00800000 | |
266 RET | |
267 | |
268 @ At least one of r0/r1 is INF/NAN. | |
269 @ if r0 != INF/NAN: return r1 (which is INF/NAN) | |
270 @ if r1 != INF/NAN: return r0 (which is INF/NAN) | |
271 @ if r0 or r1 is NAN: return NAN | |
272 @ if opposite sign: return NAN | |
273 @ otherwise return r0 (which is INF or -INF) | |
274 LSYM(Lad_i): | |
275 mvns r2, r2, asr #24 | |
276 do_it ne, et | |
277 movne r0, r1 | |
278 COND(mvn,s,eq) r3, r3, asr #24 | |
279 movne r1, r0 | |
280 movs r2, r0, lsl #9 | |
281 do_it eq, te | |
282 COND(mov,s,eq) r3, r1, lsl #9 | |
283 teqeq r0, r1 | |
284 orrne r0, r0, #0x00400000 @ quiet NAN | |
285 RET | |
286 | |
287 FUNC_END aeabi_frsub | |
288 FUNC_END aeabi_fadd | |
289 FUNC_END addsf3 | |
290 FUNC_END aeabi_fsub | |
291 FUNC_END subsf3 | |
292 | |
293 ARM_FUNC_START floatunsisf | |
294 ARM_FUNC_ALIAS aeabi_ui2f floatunsisf | |
295 | |
296 mov r3, #0 | |
297 b 1f | |
298 | |
299 ARM_FUNC_START floatsisf | |
300 ARM_FUNC_ALIAS aeabi_i2f floatsisf | |
301 | |
302 ands r3, r0, #0x80000000 | |
303 do_it mi | |
304 rsbmi r0, r0, #0 | |
305 | |
306 1: movs ip, r0 | |
307 do_it eq | |
308 RETc(eq) | |
309 | |
310 @ Add initial exponent to sign | |
311 orr r3, r3, #((127 + 23) << 23) | |
312 | |
313 .ifnc ah, r0 | |
314 mov ah, r0 | |
315 .endif | |
316 mov al, #0 | |
317 b 2f | |
318 | |
319 FUNC_END aeabi_i2f | |
320 FUNC_END floatsisf | |
321 FUNC_END aeabi_ui2f | |
322 FUNC_END floatunsisf | |
323 | |
324 ARM_FUNC_START floatundisf | |
325 ARM_FUNC_ALIAS aeabi_ul2f floatundisf | |
326 | |
327 orrs r2, r0, r1 | |
328 #if !defined (__VFP_FP__) && !defined(__SOFTFP__) | |
329 do_it eq, t | |
330 mvfeqs f0, #0.0 | |
331 #else | |
332 do_it eq | |
333 #endif | |
334 RETc(eq) | |
335 | |
336 mov r3, #0 | |
337 b 1f | |
338 | |
339 ARM_FUNC_START floatdisf | |
340 ARM_FUNC_ALIAS aeabi_l2f floatdisf | |
341 | |
342 orrs r2, r0, r1 | |
343 #if !defined (__VFP_FP__) && !defined(__SOFTFP__) | |
344 do_it eq, t | |
345 mvfeqs f0, #0.0 | |
346 #else | |
347 do_it eq | |
348 #endif | |
349 RETc(eq) | |
350 | |
351 ands r3, ah, #0x80000000 @ sign bit in r3 | |
352 bpl 1f | |
353 #if defined(__thumb2__) | |
354 negs al, al | |
355 sbc ah, ah, ah, lsl #1 | |
356 #else | |
357 rsbs al, al, #0 | |
358 rsc ah, ah, #0 | |
359 #endif | |
360 1: | |
361 #if !defined (__VFP_FP__) && !defined(__SOFTFP__) | |
362 @ For hard FPA code we want to return via the tail below so that | |
363 @ we can return the result in f0 as well as in r0 for backwards | |
364 @ compatibility. | |
365 str lr, [sp, #-8]! | |
366 adr lr, LSYM(f0_ret) | |
367 #endif | |
368 | |
369 movs ip, ah | |
370 do_it eq, tt | |
371 moveq ip, al | |
372 moveq ah, al | |
373 moveq al, #0 | |
374 | |
375 @ Add initial exponent to sign | |
376 orr r3, r3, #((127 + 23 + 32) << 23) | |
377 do_it eq | |
378 subeq r3, r3, #(32 << 23) | |
379 2: sub r3, r3, #(1 << 23) | |
380 | |
381 #if __ARM_ARCH__ < 5 | |
382 | |
383 mov r2, #23 | |
384 cmp ip, #(1 << 16) | |
385 do_it hs, t | |
386 movhs ip, ip, lsr #16 | |
387 subhs r2, r2, #16 | |
388 cmp ip, #(1 << 8) | |
389 do_it hs, t | |
390 movhs ip, ip, lsr #8 | |
391 subhs r2, r2, #8 | |
392 cmp ip, #(1 << 4) | |
393 do_it hs, t | |
394 movhs ip, ip, lsr #4 | |
395 subhs r2, r2, #4 | |
396 cmp ip, #(1 << 2) | |
397 do_it hs, e | |
398 subhs r2, r2, #2 | |
399 sublo r2, r2, ip, lsr #1 | |
400 subs r2, r2, ip, lsr #3 | |
401 | |
402 #else | |
403 | |
404 clz r2, ip | |
405 subs r2, r2, #8 | |
406 | |
407 #endif | |
408 | |
409 sub r3, r3, r2, lsl #23 | |
410 blt 3f | |
411 | |
412 shiftop add r3 r3 ah lsl r2 ip | |
413 shift1 lsl, ip, al, r2 | |
414 rsb r2, r2, #32 | |
415 cmp ip, #0x80000000 | |
416 shiftop adc r0 r3 al lsr r2 r2 | |
417 do_it eq | |
418 biceq r0, r0, #1 | |
419 RET | |
420 | |
421 3: add r2, r2, #32 | |
422 shift1 lsl, ip, ah, r2 | |
423 rsb r2, r2, #32 | |
424 orrs al, al, ip, lsl #1 | |
425 shiftop adc r0 r3 ah lsr r2 r2 | |
426 do_it eq | |
427 biceq r0, r0, ip, lsr #31 | |
428 RET | |
429 | |
430 #if !defined (__VFP_FP__) && !defined(__SOFTFP__) | |
431 | |
432 LSYM(f0_ret): | |
433 str r0, [sp, #-4]! | |
434 ldfs f0, [sp], #4 | |
435 RETLDM | |
436 | |
437 #endif | |
438 | |
439 FUNC_END floatdisf | |
440 FUNC_END aeabi_l2f | |
441 FUNC_END floatundisf | |
442 FUNC_END aeabi_ul2f | |
443 | |
444 #endif /* L_addsubsf3 */ | |
445 | |
446 #ifdef L_arm_muldivsf3 | |
447 | |
448 ARM_FUNC_START mulsf3 | |
449 ARM_FUNC_ALIAS aeabi_fmul mulsf3 | |
450 | |
451 @ Mask out exponents, trap any zero/denormal/INF/NAN. | |
452 mov ip, #0xff | |
453 ands r2, ip, r0, lsr #23 | |
454 do_it ne, tt | |
455 COND(and,s,ne) r3, ip, r1, lsr #23 | |
456 teqne r2, ip | |
457 teqne r3, ip | |
458 beq LSYM(Lml_s) | |
459 LSYM(Lml_x): | |
460 | |
461 @ Add exponents together | |
462 add r2, r2, r3 | |
463 | |
464 @ Determine final sign. | |
465 eor ip, r0, r1 | |
466 | |
467 @ Convert mantissa to unsigned integer. | |
468 @ If power of two, branch to a separate path. | |
469 @ Make up for final alignment. | |
470 movs r0, r0, lsl #9 | |
471 do_it ne | |
472 COND(mov,s,ne) r1, r1, lsl #9 | |
473 beq LSYM(Lml_1) | |
474 mov r3, #0x08000000 | |
475 orr r0, r3, r0, lsr #5 | |
476 orr r1, r3, r1, lsr #5 | |
477 | |
478 #if __ARM_ARCH__ < 4 | |
479 | |
480 @ Put sign bit in r3, which will be restored into r0 later. | |
481 and r3, ip, #0x80000000 | |
482 | |
483 @ Well, no way to make it shorter without the umull instruction. | |
484 do_push {r3, r4, r5} | |
485 mov r4, r0, lsr #16 | |
486 mov r5, r1, lsr #16 | |
487 bic r0, r0, r4, lsl #16 | |
488 bic r1, r1, r5, lsl #16 | |
489 mul ip, r4, r5 | |
490 mul r3, r0, r1 | |
491 mul r0, r5, r0 | |
492 mla r0, r4, r1, r0 | |
493 adds r3, r3, r0, lsl #16 | |
494 adc r1, ip, r0, lsr #16 | |
495 do_pop {r0, r4, r5} | |
496 | |
497 #else | |
498 | |
499 @ The actual multiplication. | |
500 umull r3, r1, r0, r1 | |
501 | |
502 @ Put final sign in r0. | |
503 and r0, ip, #0x80000000 | |
504 | |
505 #endif | |
506 | |
507 @ Adjust result upon the MSB position. | |
508 cmp r1, #(1 << 23) | |
509 do_it cc, tt | |
510 movcc r1, r1, lsl #1 | |
511 orrcc r1, r1, r3, lsr #31 | |
512 movcc r3, r3, lsl #1 | |
513 | |
514 @ Add sign to result. | |
515 orr r0, r0, r1 | |
516 | |
517 @ Apply exponent bias, check for under/overflow. | |
518 sbc r2, r2, #127 | |
519 cmp r2, #(254 - 1) | |
520 bhi LSYM(Lml_u) | |
521 | |
522 @ Round the result, merge final exponent. | |
523 cmp r3, #0x80000000 | |
524 adc r0, r0, r2, lsl #23 | |
525 do_it eq | |
526 biceq r0, r0, #1 | |
527 RET | |
528 | |
529 @ Multiplication by 0x1p*: let''s shortcut a lot of code. | |
530 LSYM(Lml_1): | |
531 teq r0, #0 | |
532 and ip, ip, #0x80000000 | |
533 do_it eq | |
534 moveq r1, r1, lsl #9 | |
535 orr r0, ip, r0, lsr #9 | |
536 orr r0, r0, r1, lsr #9 | |
537 subs r2, r2, #127 | |
538 do_it gt, tt | |
539 COND(rsb,s,gt) r3, r2, #255 | |
540 orrgt r0, r0, r2, lsl #23 | |
541 RETc(gt) | |
542 | |
543 @ Under/overflow: fix things up for the code below. | |
544 orr r0, r0, #0x00800000 | |
545 mov r3, #0 | |
546 subs r2, r2, #1 | |
547 | |
548 LSYM(Lml_u): | |
549 @ Overflow? | |
550 bgt LSYM(Lml_o) | |
551 | |
552 @ Check if denormalized result is possible, otherwise return signed 0. | |
553 cmn r2, #(24 + 1) | |
554 do_it le, t | |
555 bicle r0, r0, #0x7fffffff | |
556 RETc(le) | |
557 | |
558 @ Shift value right, round, etc. | |
559 rsb r2, r2, #0 | |
560 movs r1, r0, lsl #1 | |
561 shift1 lsr, r1, r1, r2 | |
562 rsb r2, r2, #32 | |
563 shift1 lsl, ip, r0, r2 | |
564 movs r0, r1, rrx | |
565 adc r0, r0, #0 | |
566 orrs r3, r3, ip, lsl #1 | |
567 do_it eq | |
568 biceq r0, r0, ip, lsr #31 | |
569 RET | |
570 | |
571 @ One or both arguments are denormalized. | |
572 @ Scale them leftwards and preserve sign bit. | |
573 LSYM(Lml_d): | |
574 teq r2, #0 | |
575 and ip, r0, #0x80000000 | |
576 1: do_it eq, tt | |
577 moveq r0, r0, lsl #1 | |
578 tsteq r0, #0x00800000 | |
579 subeq r2, r2, #1 | |
580 beq 1b | |
581 orr r0, r0, ip | |
582 teq r3, #0 | |
583 and ip, r1, #0x80000000 | |
584 2: do_it eq, tt | |
585 moveq r1, r1, lsl #1 | |
586 tsteq r1, #0x00800000 | |
587 subeq r3, r3, #1 | |
588 beq 2b | |
589 orr r1, r1, ip | |
590 b LSYM(Lml_x) | |
591 | |
592 LSYM(Lml_s): | |
593 @ Isolate the INF and NAN cases away | |
594 and r3, ip, r1, lsr #23 | |
595 teq r2, ip | |
596 do_it ne | |
597 teqne r3, ip | |
598 beq 1f | |
599 | |
600 @ Here, one or more arguments are either denormalized or zero. | |
601 bics ip, r0, #0x80000000 | |
602 do_it ne | |
603 COND(bic,s,ne) ip, r1, #0x80000000 | |
604 bne LSYM(Lml_d) | |
605 | |
606 @ Result is 0, but determine sign anyway. | |
607 LSYM(Lml_z): | |
608 eor r0, r0, r1 | |
609 bic r0, r0, #0x7fffffff | |
610 RET | |
611 | |
612 1: @ One or both args are INF or NAN. | |
613 teq r0, #0x0 | |
614 do_it ne, ett | |
615 teqne r0, #0x80000000 | |
616 moveq r0, r1 | |
617 teqne r1, #0x0 | |
618 teqne r1, #0x80000000 | |
619 beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN | |
620 teq r2, ip | |
621 bne 1f | |
622 movs r2, r0, lsl #9 | |
623 bne LSYM(Lml_n) @ NAN * <anything> -> NAN | |
624 1: teq r3, ip | |
625 bne LSYM(Lml_i) | |
626 movs r3, r1, lsl #9 | |
627 do_it ne | |
628 movne r0, r1 | |
629 bne LSYM(Lml_n) @ <anything> * NAN -> NAN | |
630 | |
631 @ Result is INF, but we need to determine its sign. | |
632 LSYM(Lml_i): | |
633 eor r0, r0, r1 | |
634 | |
635 @ Overflow: return INF (sign already in r0). | |
636 LSYM(Lml_o): | |
637 and r0, r0, #0x80000000 | |
638 orr r0, r0, #0x7f000000 | |
639 orr r0, r0, #0x00800000 | |
640 RET | |
641 | |
642 @ Return a quiet NAN. | |
643 LSYM(Lml_n): | |
644 orr r0, r0, #0x7f000000 | |
645 orr r0, r0, #0x00c00000 | |
646 RET | |
647 | |
648 FUNC_END aeabi_fmul | |
649 FUNC_END mulsf3 | |
650 | |
651 ARM_FUNC_START divsf3 | |
652 ARM_FUNC_ALIAS aeabi_fdiv divsf3 | |
653 | |
654 @ Mask out exponents, trap any zero/denormal/INF/NAN. | |
655 mov ip, #0xff | |
656 ands r2, ip, r0, lsr #23 | |
657 do_it ne, tt | |
658 COND(and,s,ne) r3, ip, r1, lsr #23 | |
659 teqne r2, ip | |
660 teqne r3, ip | |
661 beq LSYM(Ldv_s) | |
662 LSYM(Ldv_x): | |
663 | |
664 @ Substract divisor exponent from dividend''s | |
665 sub r2, r2, r3 | |
666 | |
667 @ Preserve final sign into ip. | |
668 eor ip, r0, r1 | |
669 | |
670 @ Convert mantissa to unsigned integer. | |
671 @ Dividend -> r3, divisor -> r1. | |
672 movs r1, r1, lsl #9 | |
673 mov r0, r0, lsl #9 | |
674 beq LSYM(Ldv_1) | |
675 mov r3, #0x10000000 | |
676 orr r1, r3, r1, lsr #4 | |
677 orr r3, r3, r0, lsr #4 | |
678 | |
679 @ Initialize r0 (result) with final sign bit. | |
680 and r0, ip, #0x80000000 | |
681 | |
682 @ Ensure result will land to known bit position. | |
683 @ Apply exponent bias accordingly. | |
684 cmp r3, r1 | |
685 do_it cc | |
686 movcc r3, r3, lsl #1 | |
687 adc r2, r2, #(127 - 2) | |
688 | |
689 @ The actual division loop. | |
690 mov ip, #0x00800000 | |
691 1: cmp r3, r1 | |
692 do_it cs, t | |
693 subcs r3, r3, r1 | |
694 orrcs r0, r0, ip | |
695 cmp r3, r1, lsr #1 | |
696 do_it cs, t | |
697 subcs r3, r3, r1, lsr #1 | |
698 orrcs r0, r0, ip, lsr #1 | |
699 cmp r3, r1, lsr #2 | |
700 do_it cs, t | |
701 subcs r3, r3, r1, lsr #2 | |
702 orrcs r0, r0, ip, lsr #2 | |
703 cmp r3, r1, lsr #3 | |
704 do_it cs, t | |
705 subcs r3, r3, r1, lsr #3 | |
706 orrcs r0, r0, ip, lsr #3 | |
707 movs r3, r3, lsl #4 | |
708 do_it ne | |
709 COND(mov,s,ne) ip, ip, lsr #4 | |
710 bne 1b | |
711 | |
712 @ Check exponent for under/overflow. | |
713 cmp r2, #(254 - 1) | |
714 bhi LSYM(Lml_u) | |
715 | |
716 @ Round the result, merge final exponent. | |
717 cmp r3, r1 | |
718 adc r0, r0, r2, lsl #23 | |
719 do_it eq | |
720 biceq r0, r0, #1 | |
721 RET | |
722 | |
723 @ Division by 0x1p*: let''s shortcut a lot of code. | |
724 LSYM(Ldv_1): | |
725 and ip, ip, #0x80000000 | |
726 orr r0, ip, r0, lsr #9 | |
727 adds r2, r2, #127 | |
728 do_it gt, tt | |
729 COND(rsb,s,gt) r3, r2, #255 | |
730 orrgt r0, r0, r2, lsl #23 | |
731 RETc(gt) | |
732 | |
733 orr r0, r0, #0x00800000 | |
734 mov r3, #0 | |
735 subs r2, r2, #1 | |
736 b LSYM(Lml_u) | |
737 | |
738 @ One or both arguments are denormalized. | |
739 @ Scale them leftwards and preserve sign bit. | |
740 LSYM(Ldv_d): | |
741 teq r2, #0 | |
742 and ip, r0, #0x80000000 | |
743 1: do_it eq, tt | |
744 moveq r0, r0, lsl #1 | |
745 tsteq r0, #0x00800000 | |
746 subeq r2, r2, #1 | |
747 beq 1b | |
748 orr r0, r0, ip | |
749 teq r3, #0 | |
750 and ip, r1, #0x80000000 | |
751 2: do_it eq, tt | |
752 moveq r1, r1, lsl #1 | |
753 tsteq r1, #0x00800000 | |
754 subeq r3, r3, #1 | |
755 beq 2b | |
756 orr r1, r1, ip | |
757 b LSYM(Ldv_x) | |
758 | |
759 @ One or both arguments are either INF, NAN, zero or denormalized. | |
760 LSYM(Ldv_s): | |
761 and r3, ip, r1, lsr #23 | |
762 teq r2, ip | |
763 bne 1f | |
764 movs r2, r0, lsl #9 | |
765 bne LSYM(Lml_n) @ NAN / <anything> -> NAN | |
766 teq r3, ip | |
767 bne LSYM(Lml_i) @ INF / <anything> -> INF | |
768 mov r0, r1 | |
769 b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN | |
770 1: teq r3, ip | |
771 bne 2f | |
772 movs r3, r1, lsl #9 | |
773 beq LSYM(Lml_z) @ <anything> / INF -> 0 | |
774 mov r0, r1 | |
775 b LSYM(Lml_n) @ <anything> / NAN -> NAN | |
776 2: @ If both are nonzero, we need to normalize and resume above. | |
777 bics ip, r0, #0x80000000 | |
778 do_it ne | |
779 COND(bic,s,ne) ip, r1, #0x80000000 | |
780 bne LSYM(Ldv_d) | |
781 @ One or both arguments are zero. | |
782 bics r2, r0, #0x80000000 | |
783 bne LSYM(Lml_i) @ <non_zero> / 0 -> INF | |
784 bics r3, r1, #0x80000000 | |
785 bne LSYM(Lml_z) @ 0 / <non_zero> -> 0 | |
786 b LSYM(Lml_n) @ 0 / 0 -> NAN | |
787 | |
788 FUNC_END aeabi_fdiv | |
789 FUNC_END divsf3 | |
790 | |
791 #endif /* L_muldivsf3 */ | |
792 | |
793 #ifdef L_arm_cmpsf2 | |
794 | |
795 @ The return value in r0 is | |
796 @ | |
797 @ 0 if the operands are equal | |
798 @ 1 if the first operand is greater than the second, or | |
799 @ the operands are unordered and the operation is | |
800 @ CMP, LT, LE, NE, or EQ. | |
801 @ -1 if the first operand is less than the second, or | |
802 @ the operands are unordered and the operation is GT | |
803 @ or GE. | |
804 @ | |
805 @ The Z flag will be set iff the operands are equal. | |
806 @ | |
807 @ The following registers are clobbered by this function: | |
808 @ ip, r0, r1, r2, r3 | |
809 | |
810 ARM_FUNC_START gtsf2 | |
811 ARM_FUNC_ALIAS gesf2 gtsf2 | |
812 mov ip, #-1 | |
813 b 1f | |
814 | |
815 ARM_FUNC_START ltsf2 | |
816 ARM_FUNC_ALIAS lesf2 ltsf2 | |
817 mov ip, #1 | |
818 b 1f | |
819 | |
820 ARM_FUNC_START cmpsf2 | |
821 ARM_FUNC_ALIAS nesf2 cmpsf2 | |
822 ARM_FUNC_ALIAS eqsf2 cmpsf2 | |
823 mov ip, #1 @ how should we specify unordered here? | |
824 | |
47
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
825 1: str ip, [sp, #-4]! |
0 | 826 |
827 @ Trap any INF/NAN first. | |
828 mov r2, r0, lsl #1 | |
829 mov r3, r1, lsl #1 | |
830 mvns ip, r2, asr #24 | |
831 do_it ne | |
832 COND(mvn,s,ne) ip, r3, asr #24 | |
833 beq 3f | |
834 | |
835 @ Compare values. | |
836 @ Note that 0.0 is equal to -0.0. | |
47
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
837 2: add sp, sp, #4 |
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
838 orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag |
0 | 839 do_it ne |
840 teqne r0, r1 @ if not 0 compare sign | |
841 do_it pl | |
842 COND(sub,s,pl) r0, r2, r3 @ if same sign compare values, set r0 | |
843 | |
844 @ Result: | |
845 do_it hi | |
846 movhi r0, r1, asr #31 | |
847 do_it lo | |
848 mvnlo r0, r1, asr #31 | |
849 do_it ne | |
850 orrne r0, r0, #1 | |
851 RET | |
852 | |
853 @ Look for a NAN. | |
854 3: mvns ip, r2, asr #24 | |
855 bne 4f | |
856 movs ip, r0, lsl #9 | |
857 bne 5f @ r0 is NAN | |
858 4: mvns ip, r3, asr #24 | |
859 bne 2b | |
860 movs ip, r1, lsl #9 | |
861 beq 2b @ r1 is not NAN | |
47
3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
kent <kent@cr.ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
862 5: ldr r0, [sp], #4 @ return unordered code. |
0 | 863 RET |
864 | |
865 FUNC_END gesf2 | |
866 FUNC_END gtsf2 | |
867 FUNC_END lesf2 | |
868 FUNC_END ltsf2 | |
869 FUNC_END nesf2 | |
870 FUNC_END eqsf2 | |
871 FUNC_END cmpsf2 | |
872 | |
873 ARM_FUNC_START aeabi_cfrcmple | |
874 | |
875 mov ip, r0 | |
876 mov r0, r1 | |
877 mov r1, ip | |
878 b 6f | |
879 | |
880 ARM_FUNC_START aeabi_cfcmpeq | |
881 ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq | |
882 | |
883 @ The status-returning routines are required to preserve all | |
884 @ registers except ip, lr, and cpsr. | |
885 6: do_push {r0, r1, r2, r3, lr} | |
886 ARM_CALL cmpsf2 | |
887 @ Set the Z flag correctly, and the C flag unconditionally. | |
888 cmp r0, #0 | |
889 @ Clear the C flag if the return value was -1, indicating | |
890 @ that the first operand was smaller than the second. | |
891 do_it mi | |
892 cmnmi r0, #0 | |
893 RETLDM "r0, r1, r2, r3" | |
894 | |
895 FUNC_END aeabi_cfcmple | |
896 FUNC_END aeabi_cfcmpeq | |
897 FUNC_END aeabi_cfrcmple | |
898 | |
899 ARM_FUNC_START aeabi_fcmpeq | |
900 | |
901 str lr, [sp, #-8]! | |
902 ARM_CALL aeabi_cfcmple | |
903 do_it eq, e | |
904 moveq r0, #1 @ Equal to. | |
905 movne r0, #0 @ Less than, greater than, or unordered. | |
906 RETLDM | |
907 | |
908 FUNC_END aeabi_fcmpeq | |
909 | |
910 ARM_FUNC_START aeabi_fcmplt | |
911 | |
912 str lr, [sp, #-8]! | |
913 ARM_CALL aeabi_cfcmple | |
914 do_it cc, e | |
915 movcc r0, #1 @ Less than. | |
916 movcs r0, #0 @ Equal to, greater than, or unordered. | |
917 RETLDM | |
918 | |
919 FUNC_END aeabi_fcmplt | |
920 | |
921 ARM_FUNC_START aeabi_fcmple | |
922 | |
923 str lr, [sp, #-8]! | |
924 ARM_CALL aeabi_cfcmple | |
925 do_it ls, e | |
926 movls r0, #1 @ Less than or equal to. | |
927 movhi r0, #0 @ Greater than or unordered. | |
928 RETLDM | |
929 | |
930 FUNC_END aeabi_fcmple | |
931 | |
932 ARM_FUNC_START aeabi_fcmpge | |
933 | |
934 str lr, [sp, #-8]! | |
935 ARM_CALL aeabi_cfrcmple | |
936 do_it ls, e | |
937 movls r0, #1 @ Operand 2 is less than or equal to operand 1. | |
938 movhi r0, #0 @ Operand 2 greater than operand 1, or unordered. | |
939 RETLDM | |
940 | |
941 FUNC_END aeabi_fcmpge | |
942 | |
943 ARM_FUNC_START aeabi_fcmpgt | |
944 | |
945 str lr, [sp, #-8]! | |
946 ARM_CALL aeabi_cfrcmple | |
947 do_it cc, e | |
948 movcc r0, #1 @ Operand 2 is less than operand 1. | |
949 movcs r0, #0 @ Operand 2 is greater than or equal to operand 1, | |
950 @ or they are unordered. | |
951 RETLDM | |
952 | |
953 FUNC_END aeabi_fcmpgt | |
954 | |
955 #endif /* L_cmpsf2 */ | |
956 | |
957 #ifdef L_arm_unordsf2 | |
958 | |
959 ARM_FUNC_START unordsf2 | |
960 ARM_FUNC_ALIAS aeabi_fcmpun unordsf2 | |
961 | |
962 mov r2, r0, lsl #1 | |
963 mov r3, r1, lsl #1 | |
964 mvns ip, r2, asr #24 | |
965 bne 1f | |
966 movs ip, r0, lsl #9 | |
967 bne 3f @ r0 is NAN | |
968 1: mvns ip, r3, asr #24 | |
969 bne 2f | |
970 movs ip, r1, lsl #9 | |
971 bne 3f @ r1 is NAN | |
972 2: mov r0, #0 @ arguments are ordered. | |
973 RET | |
974 3: mov r0, #1 @ arguments are unordered. | |
975 RET | |
976 | |
977 FUNC_END aeabi_fcmpun | |
978 FUNC_END unordsf2 | |
979 | |
980 #endif /* L_unordsf2 */ | |
981 | |
982 #ifdef L_arm_fixsfsi | |
983 | |
984 ARM_FUNC_START fixsfsi | |
985 ARM_FUNC_ALIAS aeabi_f2iz fixsfsi | |
986 | |
987 @ check exponent range. | |
988 mov r2, r0, lsl #1 | |
989 cmp r2, #(127 << 24) | |
990 bcc 1f @ value is too small | |
991 mov r3, #(127 + 31) | |
992 subs r2, r3, r2, lsr #24 | |
993 bls 2f @ value is too large | |
994 | |
995 @ scale value | |
996 mov r3, r0, lsl #8 | |
997 orr r3, r3, #0x80000000 | |
998 tst r0, #0x80000000 @ the sign bit | |
999 shift1 lsr, r0, r3, r2 | |
1000 do_it ne | |
1001 rsbne r0, r0, #0 | |
1002 RET | |
1003 | |
1004 1: mov r0, #0 | |
1005 RET | |
1006 | |
1007 2: cmp r2, #(127 + 31 - 0xff) | |
1008 bne 3f | |
1009 movs r2, r0, lsl #9 | |
1010 bne 4f @ r0 is NAN. | |
1011 3: ands r0, r0, #0x80000000 @ the sign bit | |
1012 do_it eq | |
1013 moveq r0, #0x7fffffff @ the maximum signed positive si | |
1014 RET | |
1015 | |
1016 4: mov r0, #0 @ What should we convert NAN to? | |
1017 RET | |
1018 | |
1019 FUNC_END aeabi_f2iz | |
1020 FUNC_END fixsfsi | |
1021 | |
1022 #endif /* L_fixsfsi */ | |
1023 | |
1024 #ifdef L_arm_fixunssfsi | |
1025 | |
1026 ARM_FUNC_START fixunssfsi | |
1027 ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi | |
1028 | |
1029 @ check exponent range. | |
1030 movs r2, r0, lsl #1 | |
1031 bcs 1f @ value is negative | |
1032 cmp r2, #(127 << 24) | |
1033 bcc 1f @ value is too small | |
1034 mov r3, #(127 + 31) | |
1035 subs r2, r3, r2, lsr #24 | |
1036 bmi 2f @ value is too large | |
1037 | |
1038 @ scale the value | |
1039 mov r3, r0, lsl #8 | |
1040 orr r3, r3, #0x80000000 | |
1041 shift1 lsr, r0, r3, r2 | |
1042 RET | |
1043 | |
1044 1: mov r0, #0 | |
1045 RET | |
1046 | |
1047 2: cmp r2, #(127 + 31 - 0xff) | |
1048 bne 3f | |
1049 movs r2, r0, lsl #9 | |
1050 bne 4f @ r0 is NAN. | |
1051 3: mov r0, #0xffffffff @ maximum unsigned si | |
1052 RET | |
1053 | |
1054 4: mov r0, #0 @ What should we convert NAN to? | |
1055 RET | |
1056 | |
1057 FUNC_END aeabi_f2uiz | |
1058 FUNC_END fixunssfsi | |
1059 | |
1060 #endif /* L_fixunssfsi */ |