0
|
1 /* ieee754-sf.S single-precision floating point support for ARM
|
|
2
|
|
3 Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc.
|
|
4 Contributed by Nicolas Pitre (nico@cam.org)
|
|
5
|
|
6 This file is free software; you can redistribute it and/or modify it
|
|
7 under the terms of the GNU General Public License as published by the
|
|
8 Free Software Foundation; either version 3, or (at your option) any
|
|
9 later version.
|
|
10
|
|
11 This file is distributed in the hope that it will be useful, but
|
|
12 WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14 General Public License for more details.
|
|
15
|
|
16 Under Section 7 of GPL version 3, you are granted additional
|
|
17 permissions described in the GCC Runtime Library Exception, version
|
|
18 3.1, as published by the Free Software Foundation.
|
|
19
|
|
20 You should have received a copy of the GNU General Public License and
|
|
21 a copy of the GCC Runtime Library Exception along with this program;
|
|
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
23 <http://www.gnu.org/licenses/>. */
|
|
24
|
|
25 /*
|
|
26 * Notes:
|
|
27 *
|
|
28 * The goal of this code is to be as fast as possible. This is
|
|
29 * not meant to be easy to understand for the casual reader.
|
|
30 *
|
|
31 * Only the default rounding mode is intended for best performances.
|
|
32 * Exceptions aren't supported yet, but that can be added quite easily
|
|
33 * if necessary without impacting performances.
|
|
34 */
|
|
35
|
|
36 #ifdef L_arm_negsf2
|
|
37
|
|
38 ARM_FUNC_START negsf2
|
|
39 ARM_FUNC_ALIAS aeabi_fneg negsf2
|
|
40
|
|
41 eor r0, r0, #0x80000000 @ flip sign bit
|
|
42 RET
|
|
43
|
|
44 FUNC_END aeabi_fneg
|
|
45 FUNC_END negsf2
|
|
46
|
|
47 #endif
|
|
48
|
|
49 #ifdef L_arm_addsubsf3
|
|
50
|
|
51 ARM_FUNC_START aeabi_frsub
|
|
52
|
|
53 eor r0, r0, #0x80000000 @ flip sign bit of first arg
|
|
54 b 1f
|
|
55
|
|
56 ARM_FUNC_START subsf3
|
|
57 ARM_FUNC_ALIAS aeabi_fsub subsf3
|
|
58
|
|
59 eor r1, r1, #0x80000000 @ flip sign bit of second arg
|
|
60 #if defined(__INTERWORKING_STUBS__)
|
|
61 b 1f @ Skip Thumb-code prologue
|
|
62 #endif
|
|
63
|
|
64 ARM_FUNC_START addsf3
|
|
65 ARM_FUNC_ALIAS aeabi_fadd addsf3
|
|
66
|
|
67 1: @ Look for zeroes, equal values, INF, or NAN.
|
|
68 movs r2, r0, lsl #1
|
|
69 do_it ne, ttt
|
|
70 COND(mov,s,ne) r3, r1, lsl #1
|
|
71 teqne r2, r3
|
|
72 COND(mvn,s,ne) ip, r2, asr #24
|
|
73 COND(mvn,s,ne) ip, r3, asr #24
|
|
74 beq LSYM(Lad_s)
|
|
75
|
|
76 @ Compute exponent difference. Make largest exponent in r2,
|
|
77 @ corresponding arg in r0, and positive exponent difference in r3.
|
|
78 mov r2, r2, lsr #24
|
|
79 rsbs r3, r2, r3, lsr #24
|
|
80 do_it gt, ttt
|
|
81 addgt r2, r2, r3
|
|
82 eorgt r1, r0, r1
|
|
83 eorgt r0, r1, r0
|
|
84 eorgt r1, r0, r1
|
|
85 do_it lt
|
|
86 rsblt r3, r3, #0
|
|
87
|
|
88 @ If exponent difference is too large, return largest argument
|
|
89 @ already in r0. We need up to 25 bit to handle proper rounding
|
|
90 @ of 0x1p25 - 1.1.
|
|
91 cmp r3, #25
|
|
92 do_it hi
|
|
93 RETc(hi)
|
|
94
|
|
95 @ Convert mantissa to signed integer.
|
|
96 tst r0, #0x80000000
|
|
97 orr r0, r0, #0x00800000
|
|
98 bic r0, r0, #0xff000000
|
|
99 do_it ne
|
|
100 rsbne r0, r0, #0
|
|
101 tst r1, #0x80000000
|
|
102 orr r1, r1, #0x00800000
|
|
103 bic r1, r1, #0xff000000
|
|
104 do_it ne
|
|
105 rsbne r1, r1, #0
|
|
106
|
|
107 @ If exponent == difference, one or both args were denormalized.
|
|
108 @ Since this is not common case, rescale them off line.
|
|
109 teq r2, r3
|
|
110 beq LSYM(Lad_d)
|
|
111 LSYM(Lad_x):
|
|
112
|
|
113 @ Compensate for the exponent overlapping the mantissa MSB added later
|
|
114 sub r2, r2, #1
|
|
115
|
|
116 @ Shift and add second arg to first arg in r0.
|
|
117 @ Keep leftover bits into r1.
|
|
118 shiftop adds r0 r0 r1 asr r3 ip
|
|
119 rsb r3, r3, #32
|
|
120 shift1 lsl, r1, r1, r3
|
|
121
|
|
122 @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above)
|
|
123 and r3, r0, #0x80000000
|
|
124 bpl LSYM(Lad_p)
|
|
125 #if defined(__thumb2__)
|
|
126 negs r1, r1
|
|
127 sbc r0, r0, r0, lsl #1
|
|
128 #else
|
|
129 rsbs r1, r1, #0
|
|
130 rsc r0, r0, #0
|
|
131 #endif
|
|
132
|
|
133 @ Determine how to normalize the result.
|
|
134 LSYM(Lad_p):
|
|
135 cmp r0, #0x00800000
|
|
136 bcc LSYM(Lad_a)
|
|
137 cmp r0, #0x01000000
|
|
138 bcc LSYM(Lad_e)
|
|
139
|
|
140 @ Result needs to be shifted right.
|
|
141 movs r0, r0, lsr #1
|
|
142 mov r1, r1, rrx
|
|
143 add r2, r2, #1
|
|
144
|
|
145 @ Make sure we did not bust our exponent.
|
|
146 cmp r2, #254
|
|
147 bhs LSYM(Lad_o)
|
|
148
|
|
149 @ Our result is now properly aligned into r0, remaining bits in r1.
|
|
150 @ Pack final result together.
|
|
151 @ Round with MSB of r1. If halfway between two numbers, round towards
|
|
152 @ LSB of r0 = 0.
|
|
153 LSYM(Lad_e):
|
|
154 cmp r1, #0x80000000
|
|
155 adc r0, r0, r2, lsl #23
|
|
156 do_it eq
|
|
157 biceq r0, r0, #1
|
|
158 orr r0, r0, r3
|
|
159 RET
|
|
160
|
|
161 @ Result must be shifted left and exponent adjusted.
|
|
162 LSYM(Lad_a):
|
|
163 movs r1, r1, lsl #1
|
|
164 adc r0, r0, r0
|
|
165 tst r0, #0x00800000
|
|
166 sub r2, r2, #1
|
|
167 bne LSYM(Lad_e)
|
|
168
|
|
169 @ No rounding necessary since r1 will always be 0 at this point.
|
|
170 LSYM(Lad_l):
|
|
171
|
|
172 #if __ARM_ARCH__ < 5
|
|
173
|
|
174 movs ip, r0, lsr #12
|
|
175 moveq r0, r0, lsl #12
|
|
176 subeq r2, r2, #12
|
|
177 tst r0, #0x00ff0000
|
|
178 moveq r0, r0, lsl #8
|
|
179 subeq r2, r2, #8
|
|
180 tst r0, #0x00f00000
|
|
181 moveq r0, r0, lsl #4
|
|
182 subeq r2, r2, #4
|
|
183 tst r0, #0x00c00000
|
|
184 moveq r0, r0, lsl #2
|
|
185 subeq r2, r2, #2
|
|
186 cmp r0, #0x00800000
|
|
187 movcc r0, r0, lsl #1
|
|
188 sbcs r2, r2, #0
|
|
189
|
|
190 #else
|
|
191
|
|
192 clz ip, r0
|
|
193 sub ip, ip, #8
|
|
194 subs r2, r2, ip
|
|
195 shift1 lsl, r0, r0, ip
|
|
196
|
|
197 #endif
|
|
198
|
|
199 @ Final result with sign
|
|
200 @ If exponent negative, denormalize result.
|
|
201 do_it ge, et
|
|
202 addge r0, r0, r2, lsl #23
|
|
203 rsblt r2, r2, #0
|
|
204 orrge r0, r0, r3
|
|
205 #if defined(__thumb2__)
|
|
206 do_it lt, t
|
|
207 lsrlt r0, r0, r2
|
|
208 orrlt r0, r3, r0
|
|
209 #else
|
|
210 orrlt r0, r3, r0, lsr r2
|
|
211 #endif
|
|
212 RET
|
|
213
|
|
214 @ Fixup and adjust bit position for denormalized arguments.
|
|
215 @ Note that r2 must not remain equal to 0.
|
|
216 LSYM(Lad_d):
|
|
217 teq r2, #0
|
|
218 eor r1, r1, #0x00800000
|
|
219 do_it eq, te
|
|
220 eoreq r0, r0, #0x00800000
|
|
221 addeq r2, r2, #1
|
|
222 subne r3, r3, #1
|
|
223 b LSYM(Lad_x)
|
|
224
|
|
225 LSYM(Lad_s):
|
|
226 mov r3, r1, lsl #1
|
|
227
|
|
228 mvns ip, r2, asr #24
|
|
229 do_it ne
|
|
230 COND(mvn,s,ne) ip, r3, asr #24
|
|
231 beq LSYM(Lad_i)
|
|
232
|
|
233 teq r2, r3
|
|
234 beq 1f
|
|
235
|
|
236 @ Result is x + 0.0 = x or 0.0 + y = y.
|
|
237 teq r2, #0
|
|
238 do_it eq
|
|
239 moveq r0, r1
|
|
240 RET
|
|
241
|
|
242 1: teq r0, r1
|
|
243
|
|
244 @ Result is x - x = 0.
|
|
245 do_it ne, t
|
|
246 movne r0, #0
|
|
247 RETc(ne)
|
|
248
|
|
249 @ Result is x + x = 2x.
|
|
250 tst r2, #0xff000000
|
|
251 bne 2f
|
|
252 movs r0, r0, lsl #1
|
|
253 do_it cs
|
|
254 orrcs r0, r0, #0x80000000
|
|
255 RET
|
|
256 2: adds r2, r2, #(2 << 24)
|
|
257 do_it cc, t
|
|
258 addcc r0, r0, #(1 << 23)
|
|
259 RETc(cc)
|
|
260 and r3, r0, #0x80000000
|
|
261
|
|
262 @ Overflow: return INF.
|
|
263 LSYM(Lad_o):
|
|
264 orr r0, r3, #0x7f000000
|
|
265 orr r0, r0, #0x00800000
|
|
266 RET
|
|
267
|
|
268 @ At least one of r0/r1 is INF/NAN.
|
|
269 @ if r0 != INF/NAN: return r1 (which is INF/NAN)
|
|
270 @ if r1 != INF/NAN: return r0 (which is INF/NAN)
|
|
271 @ if r0 or r1 is NAN: return NAN
|
|
272 @ if opposite sign: return NAN
|
|
273 @ otherwise return r0 (which is INF or -INF)
|
|
274 LSYM(Lad_i):
|
|
275 mvns r2, r2, asr #24
|
|
276 do_it ne, et
|
|
277 movne r0, r1
|
|
278 COND(mvn,s,eq) r3, r3, asr #24
|
|
279 movne r1, r0
|
|
280 movs r2, r0, lsl #9
|
|
281 do_it eq, te
|
|
282 COND(mov,s,eq) r3, r1, lsl #9
|
|
283 teqeq r0, r1
|
|
284 orrne r0, r0, #0x00400000 @ quiet NAN
|
|
285 RET
|
|
286
|
|
287 FUNC_END aeabi_frsub
|
|
288 FUNC_END aeabi_fadd
|
|
289 FUNC_END addsf3
|
|
290 FUNC_END aeabi_fsub
|
|
291 FUNC_END subsf3
|
|
292
|
|
293 ARM_FUNC_START floatunsisf
|
|
294 ARM_FUNC_ALIAS aeabi_ui2f floatunsisf
|
|
295
|
|
296 mov r3, #0
|
|
297 b 1f
|
|
298
|
|
299 ARM_FUNC_START floatsisf
|
|
300 ARM_FUNC_ALIAS aeabi_i2f floatsisf
|
|
301
|
|
302 ands r3, r0, #0x80000000
|
|
303 do_it mi
|
|
304 rsbmi r0, r0, #0
|
|
305
|
|
306 1: movs ip, r0
|
|
307 do_it eq
|
|
308 RETc(eq)
|
|
309
|
|
310 @ Add initial exponent to sign
|
|
311 orr r3, r3, #((127 + 23) << 23)
|
|
312
|
|
313 .ifnc ah, r0
|
|
314 mov ah, r0
|
|
315 .endif
|
|
316 mov al, #0
|
|
317 b 2f
|
|
318
|
|
319 FUNC_END aeabi_i2f
|
|
320 FUNC_END floatsisf
|
|
321 FUNC_END aeabi_ui2f
|
|
322 FUNC_END floatunsisf
|
|
323
|
|
324 ARM_FUNC_START floatundisf
|
|
325 ARM_FUNC_ALIAS aeabi_ul2f floatundisf
|
|
326
|
|
327 orrs r2, r0, r1
|
|
328 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
|
|
329 do_it eq, t
|
|
330 mvfeqs f0, #0.0
|
|
331 #else
|
|
332 do_it eq
|
|
333 #endif
|
|
334 RETc(eq)
|
|
335
|
|
336 mov r3, #0
|
|
337 b 1f
|
|
338
|
|
339 ARM_FUNC_START floatdisf
|
|
340 ARM_FUNC_ALIAS aeabi_l2f floatdisf
|
|
341
|
|
342 orrs r2, r0, r1
|
|
343 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
|
|
344 do_it eq, t
|
|
345 mvfeqs f0, #0.0
|
|
346 #else
|
|
347 do_it eq
|
|
348 #endif
|
|
349 RETc(eq)
|
|
350
|
|
351 ands r3, ah, #0x80000000 @ sign bit in r3
|
|
352 bpl 1f
|
|
353 #if defined(__thumb2__)
|
|
354 negs al, al
|
|
355 sbc ah, ah, ah, lsl #1
|
|
356 #else
|
|
357 rsbs al, al, #0
|
|
358 rsc ah, ah, #0
|
|
359 #endif
|
|
360 1:
|
|
361 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
|
|
362 @ For hard FPA code we want to return via the tail below so that
|
|
363 @ we can return the result in f0 as well as in r0 for backwards
|
|
364 @ compatibility.
|
|
365 str lr, [sp, #-8]!
|
|
366 adr lr, LSYM(f0_ret)
|
|
367 #endif
|
|
368
|
|
369 movs ip, ah
|
|
370 do_it eq, tt
|
|
371 moveq ip, al
|
|
372 moveq ah, al
|
|
373 moveq al, #0
|
|
374
|
|
375 @ Add initial exponent to sign
|
|
376 orr r3, r3, #((127 + 23 + 32) << 23)
|
|
377 do_it eq
|
|
378 subeq r3, r3, #(32 << 23)
|
|
379 2: sub r3, r3, #(1 << 23)
|
|
380
|
|
381 #if __ARM_ARCH__ < 5
|
|
382
|
|
383 mov r2, #23
|
|
384 cmp ip, #(1 << 16)
|
|
385 do_it hs, t
|
|
386 movhs ip, ip, lsr #16
|
|
387 subhs r2, r2, #16
|
|
388 cmp ip, #(1 << 8)
|
|
389 do_it hs, t
|
|
390 movhs ip, ip, lsr #8
|
|
391 subhs r2, r2, #8
|
|
392 cmp ip, #(1 << 4)
|
|
393 do_it hs, t
|
|
394 movhs ip, ip, lsr #4
|
|
395 subhs r2, r2, #4
|
|
396 cmp ip, #(1 << 2)
|
|
397 do_it hs, e
|
|
398 subhs r2, r2, #2
|
|
399 sublo r2, r2, ip, lsr #1
|
|
400 subs r2, r2, ip, lsr #3
|
|
401
|
|
402 #else
|
|
403
|
|
404 clz r2, ip
|
|
405 subs r2, r2, #8
|
|
406
|
|
407 #endif
|
|
408
|
|
409 sub r3, r3, r2, lsl #23
|
|
410 blt 3f
|
|
411
|
|
412 shiftop add r3 r3 ah lsl r2 ip
|
|
413 shift1 lsl, ip, al, r2
|
|
414 rsb r2, r2, #32
|
|
415 cmp ip, #0x80000000
|
|
416 shiftop adc r0 r3 al lsr r2 r2
|
|
417 do_it eq
|
|
418 biceq r0, r0, #1
|
|
419 RET
|
|
420
|
|
421 3: add r2, r2, #32
|
|
422 shift1 lsl, ip, ah, r2
|
|
423 rsb r2, r2, #32
|
|
424 orrs al, al, ip, lsl #1
|
|
425 shiftop adc r0 r3 ah lsr r2 r2
|
|
426 do_it eq
|
|
427 biceq r0, r0, ip, lsr #31
|
|
428 RET
|
|
429
|
|
430 #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
|
|
431
|
|
432 LSYM(f0_ret):
|
|
433 str r0, [sp, #-4]!
|
|
434 ldfs f0, [sp], #4
|
|
435 RETLDM
|
|
436
|
|
437 #endif
|
|
438
|
|
439 FUNC_END floatdisf
|
|
440 FUNC_END aeabi_l2f
|
|
441 FUNC_END floatundisf
|
|
442 FUNC_END aeabi_ul2f
|
|
443
|
|
444 #endif /* L_addsubsf3 */
|
|
445
|
|
446 #ifdef L_arm_muldivsf3
|
|
447
|
|
448 ARM_FUNC_START mulsf3
|
|
449 ARM_FUNC_ALIAS aeabi_fmul mulsf3
|
|
450
|
|
451 @ Mask out exponents, trap any zero/denormal/INF/NAN.
|
|
452 mov ip, #0xff
|
|
453 ands r2, ip, r0, lsr #23
|
|
454 do_it ne, tt
|
|
455 COND(and,s,ne) r3, ip, r1, lsr #23
|
|
456 teqne r2, ip
|
|
457 teqne r3, ip
|
|
458 beq LSYM(Lml_s)
|
|
459 LSYM(Lml_x):
|
|
460
|
|
461 @ Add exponents together
|
|
462 add r2, r2, r3
|
|
463
|
|
464 @ Determine final sign.
|
|
465 eor ip, r0, r1
|
|
466
|
|
467 @ Convert mantissa to unsigned integer.
|
|
468 @ If power of two, branch to a separate path.
|
|
469 @ Make up for final alignment.
|
|
470 movs r0, r0, lsl #9
|
|
471 do_it ne
|
|
472 COND(mov,s,ne) r1, r1, lsl #9
|
|
473 beq LSYM(Lml_1)
|
|
474 mov r3, #0x08000000
|
|
475 orr r0, r3, r0, lsr #5
|
|
476 orr r1, r3, r1, lsr #5
|
|
477
|
|
478 #if __ARM_ARCH__ < 4
|
|
479
|
|
480 @ Put sign bit in r3, which will be restored into r0 later.
|
|
481 and r3, ip, #0x80000000
|
|
482
|
|
483 @ Well, no way to make it shorter without the umull instruction.
|
|
484 do_push {r3, r4, r5}
|
|
485 mov r4, r0, lsr #16
|
|
486 mov r5, r1, lsr #16
|
|
487 bic r0, r0, r4, lsl #16
|
|
488 bic r1, r1, r5, lsl #16
|
|
489 mul ip, r4, r5
|
|
490 mul r3, r0, r1
|
|
491 mul r0, r5, r0
|
|
492 mla r0, r4, r1, r0
|
|
493 adds r3, r3, r0, lsl #16
|
|
494 adc r1, ip, r0, lsr #16
|
|
495 do_pop {r0, r4, r5}
|
|
496
|
|
497 #else
|
|
498
|
|
499 @ The actual multiplication.
|
|
500 umull r3, r1, r0, r1
|
|
501
|
|
502 @ Put final sign in r0.
|
|
503 and r0, ip, #0x80000000
|
|
504
|
|
505 #endif
|
|
506
|
|
507 @ Adjust result upon the MSB position.
|
|
508 cmp r1, #(1 << 23)
|
|
509 do_it cc, tt
|
|
510 movcc r1, r1, lsl #1
|
|
511 orrcc r1, r1, r3, lsr #31
|
|
512 movcc r3, r3, lsl #1
|
|
513
|
|
514 @ Add sign to result.
|
|
515 orr r0, r0, r1
|
|
516
|
|
517 @ Apply exponent bias, check for under/overflow.
|
|
518 sbc r2, r2, #127
|
|
519 cmp r2, #(254 - 1)
|
|
520 bhi LSYM(Lml_u)
|
|
521
|
|
522 @ Round the result, merge final exponent.
|
|
523 cmp r3, #0x80000000
|
|
524 adc r0, r0, r2, lsl #23
|
|
525 do_it eq
|
|
526 biceq r0, r0, #1
|
|
527 RET
|
|
528
|
|
529 @ Multiplication by 0x1p*: let''s shortcut a lot of code.
|
|
530 LSYM(Lml_1):
|
|
531 teq r0, #0
|
|
532 and ip, ip, #0x80000000
|
|
533 do_it eq
|
|
534 moveq r1, r1, lsl #9
|
|
535 orr r0, ip, r0, lsr #9
|
|
536 orr r0, r0, r1, lsr #9
|
|
537 subs r2, r2, #127
|
|
538 do_it gt, tt
|
|
539 COND(rsb,s,gt) r3, r2, #255
|
|
540 orrgt r0, r0, r2, lsl #23
|
|
541 RETc(gt)
|
|
542
|
|
543 @ Under/overflow: fix things up for the code below.
|
|
544 orr r0, r0, #0x00800000
|
|
545 mov r3, #0
|
|
546 subs r2, r2, #1
|
|
547
|
|
548 LSYM(Lml_u):
|
|
549 @ Overflow?
|
|
550 bgt LSYM(Lml_o)
|
|
551
|
|
552 @ Check if denormalized result is possible, otherwise return signed 0.
|
|
553 cmn r2, #(24 + 1)
|
|
554 do_it le, t
|
|
555 bicle r0, r0, #0x7fffffff
|
|
556 RETc(le)
|
|
557
|
|
558 @ Shift value right, round, etc.
|
|
559 rsb r2, r2, #0
|
|
560 movs r1, r0, lsl #1
|
|
561 shift1 lsr, r1, r1, r2
|
|
562 rsb r2, r2, #32
|
|
563 shift1 lsl, ip, r0, r2
|
|
564 movs r0, r1, rrx
|
|
565 adc r0, r0, #0
|
|
566 orrs r3, r3, ip, lsl #1
|
|
567 do_it eq
|
|
568 biceq r0, r0, ip, lsr #31
|
|
569 RET
|
|
570
|
|
571 @ One or both arguments are denormalized.
|
|
572 @ Scale them leftwards and preserve sign bit.
|
|
573 LSYM(Lml_d):
|
|
574 teq r2, #0
|
|
575 and ip, r0, #0x80000000
|
|
576 1: do_it eq, tt
|
|
577 moveq r0, r0, lsl #1
|
|
578 tsteq r0, #0x00800000
|
|
579 subeq r2, r2, #1
|
|
580 beq 1b
|
|
581 orr r0, r0, ip
|
|
582 teq r3, #0
|
|
583 and ip, r1, #0x80000000
|
|
584 2: do_it eq, tt
|
|
585 moveq r1, r1, lsl #1
|
|
586 tsteq r1, #0x00800000
|
|
587 subeq r3, r3, #1
|
|
588 beq 2b
|
|
589 orr r1, r1, ip
|
|
590 b LSYM(Lml_x)
|
|
591
|
|
592 LSYM(Lml_s):
|
|
593 @ Isolate the INF and NAN cases away
|
|
594 and r3, ip, r1, lsr #23
|
|
595 teq r2, ip
|
|
596 do_it ne
|
|
597 teqne r3, ip
|
|
598 beq 1f
|
|
599
|
|
600 @ Here, one or more arguments are either denormalized or zero.
|
|
601 bics ip, r0, #0x80000000
|
|
602 do_it ne
|
|
603 COND(bic,s,ne) ip, r1, #0x80000000
|
|
604 bne LSYM(Lml_d)
|
|
605
|
|
606 @ Result is 0, but determine sign anyway.
|
|
607 LSYM(Lml_z):
|
|
608 eor r0, r0, r1
|
|
609 bic r0, r0, #0x7fffffff
|
|
610 RET
|
|
611
|
|
612 1: @ One or both args are INF or NAN.
|
|
613 teq r0, #0x0
|
|
614 do_it ne, ett
|
|
615 teqne r0, #0x80000000
|
|
616 moveq r0, r1
|
|
617 teqne r1, #0x0
|
|
618 teqne r1, #0x80000000
|
|
619 beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
|
|
620 teq r2, ip
|
|
621 bne 1f
|
|
622 movs r2, r0, lsl #9
|
|
623 bne LSYM(Lml_n) @ NAN * <anything> -> NAN
|
|
624 1: teq r3, ip
|
|
625 bne LSYM(Lml_i)
|
|
626 movs r3, r1, lsl #9
|
|
627 do_it ne
|
|
628 movne r0, r1
|
|
629 bne LSYM(Lml_n) @ <anything> * NAN -> NAN
|
|
630
|
|
631 @ Result is INF, but we need to determine its sign.
|
|
632 LSYM(Lml_i):
|
|
633 eor r0, r0, r1
|
|
634
|
|
635 @ Overflow: return INF (sign already in r0).
|
|
636 LSYM(Lml_o):
|
|
637 and r0, r0, #0x80000000
|
|
638 orr r0, r0, #0x7f000000
|
|
639 orr r0, r0, #0x00800000
|
|
640 RET
|
|
641
|
|
642 @ Return a quiet NAN.
|
|
643 LSYM(Lml_n):
|
|
644 orr r0, r0, #0x7f000000
|
|
645 orr r0, r0, #0x00c00000
|
|
646 RET
|
|
647
|
|
648 FUNC_END aeabi_fmul
|
|
649 FUNC_END mulsf3
|
|
650
|
|
651 ARM_FUNC_START divsf3
|
|
652 ARM_FUNC_ALIAS aeabi_fdiv divsf3
|
|
653
|
|
654 @ Mask out exponents, trap any zero/denormal/INF/NAN.
|
|
655 mov ip, #0xff
|
|
656 ands r2, ip, r0, lsr #23
|
|
657 do_it ne, tt
|
|
658 COND(and,s,ne) r3, ip, r1, lsr #23
|
|
659 teqne r2, ip
|
|
660 teqne r3, ip
|
|
661 beq LSYM(Ldv_s)
|
|
662 LSYM(Ldv_x):
|
|
663
|
|
664 @ Substract divisor exponent from dividend''s
|
|
665 sub r2, r2, r3
|
|
666
|
|
667 @ Preserve final sign into ip.
|
|
668 eor ip, r0, r1
|
|
669
|
|
670 @ Convert mantissa to unsigned integer.
|
|
671 @ Dividend -> r3, divisor -> r1.
|
|
672 movs r1, r1, lsl #9
|
|
673 mov r0, r0, lsl #9
|
|
674 beq LSYM(Ldv_1)
|
|
675 mov r3, #0x10000000
|
|
676 orr r1, r3, r1, lsr #4
|
|
677 orr r3, r3, r0, lsr #4
|
|
678
|
|
679 @ Initialize r0 (result) with final sign bit.
|
|
680 and r0, ip, #0x80000000
|
|
681
|
|
682 @ Ensure result will land to known bit position.
|
|
683 @ Apply exponent bias accordingly.
|
|
684 cmp r3, r1
|
|
685 do_it cc
|
|
686 movcc r3, r3, lsl #1
|
|
687 adc r2, r2, #(127 - 2)
|
|
688
|
|
689 @ The actual division loop.
|
|
690 mov ip, #0x00800000
|
|
691 1: cmp r3, r1
|
|
692 do_it cs, t
|
|
693 subcs r3, r3, r1
|
|
694 orrcs r0, r0, ip
|
|
695 cmp r3, r1, lsr #1
|
|
696 do_it cs, t
|
|
697 subcs r3, r3, r1, lsr #1
|
|
698 orrcs r0, r0, ip, lsr #1
|
|
699 cmp r3, r1, lsr #2
|
|
700 do_it cs, t
|
|
701 subcs r3, r3, r1, lsr #2
|
|
702 orrcs r0, r0, ip, lsr #2
|
|
703 cmp r3, r1, lsr #3
|
|
704 do_it cs, t
|
|
705 subcs r3, r3, r1, lsr #3
|
|
706 orrcs r0, r0, ip, lsr #3
|
|
707 movs r3, r3, lsl #4
|
|
708 do_it ne
|
|
709 COND(mov,s,ne) ip, ip, lsr #4
|
|
710 bne 1b
|
|
711
|
|
712 @ Check exponent for under/overflow.
|
|
713 cmp r2, #(254 - 1)
|
|
714 bhi LSYM(Lml_u)
|
|
715
|
|
716 @ Round the result, merge final exponent.
|
|
717 cmp r3, r1
|
|
718 adc r0, r0, r2, lsl #23
|
|
719 do_it eq
|
|
720 biceq r0, r0, #1
|
|
721 RET
|
|
722
|
|
723 @ Division by 0x1p*: let''s shortcut a lot of code.
|
|
724 LSYM(Ldv_1):
|
|
725 and ip, ip, #0x80000000
|
|
726 orr r0, ip, r0, lsr #9
|
|
727 adds r2, r2, #127
|
|
728 do_it gt, tt
|
|
729 COND(rsb,s,gt) r3, r2, #255
|
|
730 orrgt r0, r0, r2, lsl #23
|
|
731 RETc(gt)
|
|
732
|
|
733 orr r0, r0, #0x00800000
|
|
734 mov r3, #0
|
|
735 subs r2, r2, #1
|
|
736 b LSYM(Lml_u)
|
|
737
|
|
738 @ One or both arguments are denormalized.
|
|
739 @ Scale them leftwards and preserve sign bit.
|
|
740 LSYM(Ldv_d):
|
|
741 teq r2, #0
|
|
742 and ip, r0, #0x80000000
|
|
743 1: do_it eq, tt
|
|
744 moveq r0, r0, lsl #1
|
|
745 tsteq r0, #0x00800000
|
|
746 subeq r2, r2, #1
|
|
747 beq 1b
|
|
748 orr r0, r0, ip
|
|
749 teq r3, #0
|
|
750 and ip, r1, #0x80000000
|
|
751 2: do_it eq, tt
|
|
752 moveq r1, r1, lsl #1
|
|
753 tsteq r1, #0x00800000
|
|
754 subeq r3, r3, #1
|
|
755 beq 2b
|
|
756 orr r1, r1, ip
|
|
757 b LSYM(Ldv_x)
|
|
758
|
|
759 @ One or both arguments are either INF, NAN, zero or denormalized.
|
|
760 LSYM(Ldv_s):
|
|
761 and r3, ip, r1, lsr #23
|
|
762 teq r2, ip
|
|
763 bne 1f
|
|
764 movs r2, r0, lsl #9
|
|
765 bne LSYM(Lml_n) @ NAN / <anything> -> NAN
|
|
766 teq r3, ip
|
|
767 bne LSYM(Lml_i) @ INF / <anything> -> INF
|
|
768 mov r0, r1
|
|
769 b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
|
|
770 1: teq r3, ip
|
|
771 bne 2f
|
|
772 movs r3, r1, lsl #9
|
|
773 beq LSYM(Lml_z) @ <anything> / INF -> 0
|
|
774 mov r0, r1
|
|
775 b LSYM(Lml_n) @ <anything> / NAN -> NAN
|
|
776 2: @ If both are nonzero, we need to normalize and resume above.
|
|
777 bics ip, r0, #0x80000000
|
|
778 do_it ne
|
|
779 COND(bic,s,ne) ip, r1, #0x80000000
|
|
780 bne LSYM(Ldv_d)
|
|
781 @ One or both arguments are zero.
|
|
782 bics r2, r0, #0x80000000
|
|
783 bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
|
|
784 bics r3, r1, #0x80000000
|
|
785 bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
|
|
786 b LSYM(Lml_n) @ 0 / 0 -> NAN
|
|
787
|
|
788 FUNC_END aeabi_fdiv
|
|
789 FUNC_END divsf3
|
|
790
|
|
791 #endif /* L_muldivsf3 */
|
|
792
|
|
793 #ifdef L_arm_cmpsf2
|
|
794
|
|
795 @ The return value in r0 is
|
|
796 @
|
|
797 @ 0 if the operands are equal
|
|
798 @ 1 if the first operand is greater than the second, or
|
|
799 @ the operands are unordered and the operation is
|
|
800 @ CMP, LT, LE, NE, or EQ.
|
|
801 @ -1 if the first operand is less than the second, or
|
|
802 @ the operands are unordered and the operation is GT
|
|
803 @ or GE.
|
|
804 @
|
|
805 @ The Z flag will be set iff the operands are equal.
|
|
806 @
|
|
807 @ The following registers are clobbered by this function:
|
|
808 @ ip, r0, r1, r2, r3
|
|
809
|
|
810 ARM_FUNC_START gtsf2
|
|
811 ARM_FUNC_ALIAS gesf2 gtsf2
|
|
812 mov ip, #-1
|
|
813 b 1f
|
|
814
|
|
815 ARM_FUNC_START ltsf2
|
|
816 ARM_FUNC_ALIAS lesf2 ltsf2
|
|
817 mov ip, #1
|
|
818 b 1f
|
|
819
|
|
820 ARM_FUNC_START cmpsf2
|
|
821 ARM_FUNC_ALIAS nesf2 cmpsf2
|
|
822 ARM_FUNC_ALIAS eqsf2 cmpsf2
|
|
823 mov ip, #1 @ how should we specify unordered here?
|
|
824
|
|
825 1: str ip, [sp, #-4]
|
|
826
|
|
827 @ Trap any INF/NAN first.
|
|
828 mov r2, r0, lsl #1
|
|
829 mov r3, r1, lsl #1
|
|
830 mvns ip, r2, asr #24
|
|
831 do_it ne
|
|
832 COND(mvn,s,ne) ip, r3, asr #24
|
|
833 beq 3f
|
|
834
|
|
835 @ Compare values.
|
|
836 @ Note that 0.0 is equal to -0.0.
|
|
837 2: orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag
|
|
838 do_it ne
|
|
839 teqne r0, r1 @ if not 0 compare sign
|
|
840 do_it pl
|
|
841 COND(sub,s,pl) r0, r2, r3 @ if same sign compare values, set r0
|
|
842
|
|
843 @ Result:
|
|
844 do_it hi
|
|
845 movhi r0, r1, asr #31
|
|
846 do_it lo
|
|
847 mvnlo r0, r1, asr #31
|
|
848 do_it ne
|
|
849 orrne r0, r0, #1
|
|
850 RET
|
|
851
|
|
852 @ Look for a NAN.
|
|
853 3: mvns ip, r2, asr #24
|
|
854 bne 4f
|
|
855 movs ip, r0, lsl #9
|
|
856 bne 5f @ r0 is NAN
|
|
857 4: mvns ip, r3, asr #24
|
|
858 bne 2b
|
|
859 movs ip, r1, lsl #9
|
|
860 beq 2b @ r1 is not NAN
|
|
861 5: ldr r0, [sp, #-4] @ return unordered code.
|
|
862 RET
|
|
863
|
|
864 FUNC_END gesf2
|
|
865 FUNC_END gtsf2
|
|
866 FUNC_END lesf2
|
|
867 FUNC_END ltsf2
|
|
868 FUNC_END nesf2
|
|
869 FUNC_END eqsf2
|
|
870 FUNC_END cmpsf2
|
|
871
|
|
872 ARM_FUNC_START aeabi_cfrcmple
|
|
873
|
|
874 mov ip, r0
|
|
875 mov r0, r1
|
|
876 mov r1, ip
|
|
877 b 6f
|
|
878
|
|
879 ARM_FUNC_START aeabi_cfcmpeq
|
|
880 ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
|
|
881
|
|
882 @ The status-returning routines are required to preserve all
|
|
883 @ registers except ip, lr, and cpsr.
|
|
884 6: do_push {r0, r1, r2, r3, lr}
|
|
885 ARM_CALL cmpsf2
|
|
886 @ Set the Z flag correctly, and the C flag unconditionally.
|
|
887 cmp r0, #0
|
|
888 @ Clear the C flag if the return value was -1, indicating
|
|
889 @ that the first operand was smaller than the second.
|
|
890 do_it mi
|
|
891 cmnmi r0, #0
|
|
892 RETLDM "r0, r1, r2, r3"
|
|
893
|
|
894 FUNC_END aeabi_cfcmple
|
|
895 FUNC_END aeabi_cfcmpeq
|
|
896 FUNC_END aeabi_cfrcmple
|
|
897
|
|
898 ARM_FUNC_START aeabi_fcmpeq
|
|
899
|
|
900 str lr, [sp, #-8]!
|
|
901 ARM_CALL aeabi_cfcmple
|
|
902 do_it eq, e
|
|
903 moveq r0, #1 @ Equal to.
|
|
904 movne r0, #0 @ Less than, greater than, or unordered.
|
|
905 RETLDM
|
|
906
|
|
907 FUNC_END aeabi_fcmpeq
|
|
908
|
|
909 ARM_FUNC_START aeabi_fcmplt
|
|
910
|
|
911 str lr, [sp, #-8]!
|
|
912 ARM_CALL aeabi_cfcmple
|
|
913 do_it cc, e
|
|
914 movcc r0, #1 @ Less than.
|
|
915 movcs r0, #0 @ Equal to, greater than, or unordered.
|
|
916 RETLDM
|
|
917
|
|
918 FUNC_END aeabi_fcmplt
|
|
919
|
|
920 ARM_FUNC_START aeabi_fcmple
|
|
921
|
|
922 str lr, [sp, #-8]!
|
|
923 ARM_CALL aeabi_cfcmple
|
|
924 do_it ls, e
|
|
925 movls r0, #1 @ Less than or equal to.
|
|
926 movhi r0, #0 @ Greater than or unordered.
|
|
927 RETLDM
|
|
928
|
|
929 FUNC_END aeabi_fcmple
|
|
930
|
|
931 ARM_FUNC_START aeabi_fcmpge
|
|
932
|
|
933 str lr, [sp, #-8]!
|
|
934 ARM_CALL aeabi_cfrcmple
|
|
935 do_it ls, e
|
|
936 movls r0, #1 @ Operand 2 is less than or equal to operand 1.
|
|
937 movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
|
|
938 RETLDM
|
|
939
|
|
940 FUNC_END aeabi_fcmpge
|
|
941
|
|
942 ARM_FUNC_START aeabi_fcmpgt
|
|
943
|
|
944 str lr, [sp, #-8]!
|
|
945 ARM_CALL aeabi_cfrcmple
|
|
946 do_it cc, e
|
|
947 movcc r0, #1 @ Operand 2 is less than operand 1.
|
|
948 movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,
|
|
949 @ or they are unordered.
|
|
950 RETLDM
|
|
951
|
|
952 FUNC_END aeabi_fcmpgt
|
|
953
|
|
954 #endif /* L_cmpsf2 */
|
|
955
|
|
956 #ifdef L_arm_unordsf2
|
|
957
|
|
958 ARM_FUNC_START unordsf2
|
|
959 ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
|
|
960
|
|
961 mov r2, r0, lsl #1
|
|
962 mov r3, r1, lsl #1
|
|
963 mvns ip, r2, asr #24
|
|
964 bne 1f
|
|
965 movs ip, r0, lsl #9
|
|
966 bne 3f @ r0 is NAN
|
|
967 1: mvns ip, r3, asr #24
|
|
968 bne 2f
|
|
969 movs ip, r1, lsl #9
|
|
970 bne 3f @ r1 is NAN
|
|
971 2: mov r0, #0 @ arguments are ordered.
|
|
972 RET
|
|
973 3: mov r0, #1 @ arguments are unordered.
|
|
974 RET
|
|
975
|
|
976 FUNC_END aeabi_fcmpun
|
|
977 FUNC_END unordsf2
|
|
978
|
|
979 #endif /* L_unordsf2 */
|
|
980
|
|
981 #ifdef L_arm_fixsfsi
|
|
982
|
|
983 ARM_FUNC_START fixsfsi
|
|
984 ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
|
|
985
|
|
986 @ check exponent range.
|
|
987 mov r2, r0, lsl #1
|
|
988 cmp r2, #(127 << 24)
|
|
989 bcc 1f @ value is too small
|
|
990 mov r3, #(127 + 31)
|
|
991 subs r2, r3, r2, lsr #24
|
|
992 bls 2f @ value is too large
|
|
993
|
|
994 @ scale value
|
|
995 mov r3, r0, lsl #8
|
|
996 orr r3, r3, #0x80000000
|
|
997 tst r0, #0x80000000 @ the sign bit
|
|
998 shift1 lsr, r0, r3, r2
|
|
999 do_it ne
|
|
1000 rsbne r0, r0, #0
|
|
1001 RET
|
|
1002
|
|
1003 1: mov r0, #0
|
|
1004 RET
|
|
1005
|
|
1006 2: cmp r2, #(127 + 31 - 0xff)
|
|
1007 bne 3f
|
|
1008 movs r2, r0, lsl #9
|
|
1009 bne 4f @ r0 is NAN.
|
|
1010 3: ands r0, r0, #0x80000000 @ the sign bit
|
|
1011 do_it eq
|
|
1012 moveq r0, #0x7fffffff @ the maximum signed positive si
|
|
1013 RET
|
|
1014
|
|
1015 4: mov r0, #0 @ What should we convert NAN to?
|
|
1016 RET
|
|
1017
|
|
1018 FUNC_END aeabi_f2iz
|
|
1019 FUNC_END fixsfsi
|
|
1020
|
|
1021 #endif /* L_fixsfsi */
|
|
1022
|
|
1023 #ifdef L_arm_fixunssfsi
|
|
1024
|
|
1025 ARM_FUNC_START fixunssfsi
|
|
1026 ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
|
|
1027
|
|
1028 @ check exponent range.
|
|
1029 movs r2, r0, lsl #1
|
|
1030 bcs 1f @ value is negative
|
|
1031 cmp r2, #(127 << 24)
|
|
1032 bcc 1f @ value is too small
|
|
1033 mov r3, #(127 + 31)
|
|
1034 subs r2, r3, r2, lsr #24
|
|
1035 bmi 2f @ value is too large
|
|
1036
|
|
1037 @ scale the value
|
|
1038 mov r3, r0, lsl #8
|
|
1039 orr r3, r3, #0x80000000
|
|
1040 shift1 lsr, r0, r3, r2
|
|
1041 RET
|
|
1042
|
|
1043 1: mov r0, #0
|
|
1044 RET
|
|
1045
|
|
1046 2: cmp r2, #(127 + 31 - 0xff)
|
|
1047 bne 3f
|
|
1048 movs r2, r0, lsl #9
|
|
1049 bne 4f @ r0 is NAN.
|
|
1050 3: mov r0, #0xffffffff @ maximum unsigned si
|
|
1051 RET
|
|
1052
|
|
1053 4: mov r0, #0 @ What should we convert NAN to?
|
|
1054 RET
|
|
1055
|
|
1056 FUNC_END aeabi_f2uiz
|
|
1057 FUNC_END fixunssfsi
|
|
1058
|
|
1059 #endif /* L_fixunssfsi */
|