0
|
1 /* Software floating-point emulation.
|
|
2 Definitions for IEEE Extended Precision.
|
|
3 Copyright (C) 1999,2006,2007 Free Software Foundation, Inc.
|
|
4 This file is part of the GNU C Library.
|
|
5 Contributed by Jakub Jelinek (jj@ultra.linux.cz).
|
|
6
|
|
7 The GNU C Library is free software; you can redistribute it and/or
|
|
8 modify it under the terms of the GNU Lesser General Public
|
|
9 License as published by the Free Software Foundation; either
|
|
10 version 2.1 of the License, or (at your option) any later version.
|
|
11
|
|
12 In addition to the permissions in the GNU Lesser General Public
|
|
13 License, the Free Software Foundation gives you unlimited
|
|
14 permission to link the compiled version of this file into
|
|
15 combinations with other programs, and to distribute those
|
|
16 combinations without any restriction coming from the use of this
|
|
17 file. (The Lesser General Public License restrictions do apply in
|
|
18 other respects; for example, they cover modification of the file,
|
|
19 and distribution when not linked into a combine executable.)
|
|
20
|
|
21 The GNU C Library is distributed in the hope that it will be useful,
|
|
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
24 Lesser General Public License for more details.
|
|
25
|
|
26 You should have received a copy of the GNU Lesser General Public
|
|
27 License along with the GNU C Library; if not, write to the Free
|
|
28 Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
|
|
29 MA 02110-1301, USA. */
|
|
30
|
|
31 #if _FP_W_TYPE_SIZE < 32
|
|
32 #error "Here's a nickel, kid. Go buy yourself a real computer."
|
|
33 #endif
|
|
34
|
|
35 #if _FP_W_TYPE_SIZE < 64
|
|
36 #define _FP_FRACTBITS_E (4*_FP_W_TYPE_SIZE)
|
|
37 #else
|
|
38 #define _FP_FRACTBITS_E (2*_FP_W_TYPE_SIZE)
|
|
39 #endif
|
|
40
|
|
41 #define _FP_FRACBITS_E 64
|
|
42 #define _FP_FRACXBITS_E (_FP_FRACTBITS_E - _FP_FRACBITS_E)
|
|
43 #define _FP_WFRACBITS_E (_FP_WORKBITS + _FP_FRACBITS_E)
|
|
44 #define _FP_WFRACXBITS_E (_FP_FRACTBITS_E - _FP_WFRACBITS_E)
|
|
45 #define _FP_EXPBITS_E 15
|
|
46 #define _FP_EXPBIAS_E 16383
|
|
47 #define _FP_EXPMAX_E 32767
|
|
48
|
|
49 #define _FP_QNANBIT_E \
|
|
50 ((_FP_W_TYPE)1 << (_FP_FRACBITS_E-2) % _FP_W_TYPE_SIZE)
|
|
51 #define _FP_QNANBIT_SH_E \
|
|
52 ((_FP_W_TYPE)1 << (_FP_FRACBITS_E-2+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
|
|
53 #define _FP_IMPLBIT_E \
|
|
54 ((_FP_W_TYPE)1 << (_FP_FRACBITS_E-1) % _FP_W_TYPE_SIZE)
|
|
55 #define _FP_IMPLBIT_SH_E \
|
|
56 ((_FP_W_TYPE)1 << (_FP_FRACBITS_E-1+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
|
|
57 #define _FP_OVERFLOW_E \
|
|
58 ((_FP_W_TYPE)1 << (_FP_WFRACBITS_E % _FP_W_TYPE_SIZE))
|
|
59
|
|
60 typedef float XFtype __attribute__((mode(XF)));
|
|
61
|
|
62 #if _FP_W_TYPE_SIZE < 64
|
|
63
|
|
64 union _FP_UNION_E
|
|
65 {
|
|
66 XFtype flt;
|
|
67 struct
|
|
68 {
|
|
69 #if __BYTE_ORDER == __BIG_ENDIAN
|
|
70 unsigned long pad1 : _FP_W_TYPE_SIZE;
|
|
71 unsigned long pad2 : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
|
|
72 unsigned long sign : 1;
|
|
73 unsigned long exp : _FP_EXPBITS_E;
|
|
74 unsigned long frac1 : _FP_W_TYPE_SIZE;
|
|
75 unsigned long frac0 : _FP_W_TYPE_SIZE;
|
|
76 #else
|
|
77 unsigned long frac0 : _FP_W_TYPE_SIZE;
|
|
78 unsigned long frac1 : _FP_W_TYPE_SIZE;
|
|
79 unsigned exp : _FP_EXPBITS_E;
|
|
80 unsigned sign : 1;
|
|
81 #endif /* not bigendian */
|
|
82 } bits __attribute__((packed));
|
|
83 };
|
|
84
|
|
85
|
|
86 #define FP_DECL_E(X) _FP_DECL(4,X)
|
|
87
|
|
88 #define FP_UNPACK_RAW_E(X, val) \
|
|
89 do { \
|
|
90 union _FP_UNION_E _flo; _flo.flt = (val); \
|
|
91 \
|
|
92 X##_f[2] = 0; X##_f[3] = 0; \
|
|
93 X##_f[0] = _flo.bits.frac0; \
|
|
94 X##_f[1] = _flo.bits.frac1; \
|
|
95 X##_e = _flo.bits.exp; \
|
|
96 X##_s = _flo.bits.sign; \
|
|
97 } while (0)
|
|
98
|
|
99 #define FP_UNPACK_RAW_EP(X, val) \
|
|
100 do { \
|
|
101 union _FP_UNION_E *_flo = \
|
|
102 (union _FP_UNION_E *)(val); \
|
|
103 \
|
|
104 X##_f[2] = 0; X##_f[3] = 0; \
|
|
105 X##_f[0] = _flo->bits.frac0; \
|
|
106 X##_f[1] = _flo->bits.frac1; \
|
|
107 X##_e = _flo->bits.exp; \
|
|
108 X##_s = _flo->bits.sign; \
|
|
109 } while (0)
|
|
110
|
|
111 #define FP_PACK_RAW_E(val, X) \
|
|
112 do { \
|
|
113 union _FP_UNION_E _flo; \
|
|
114 \
|
|
115 if (X##_e) X##_f[1] |= _FP_IMPLBIT_E; \
|
|
116 else X##_f[1] &= ~(_FP_IMPLBIT_E); \
|
|
117 _flo.bits.frac0 = X##_f[0]; \
|
|
118 _flo.bits.frac1 = X##_f[1]; \
|
|
119 _flo.bits.exp = X##_e; \
|
|
120 _flo.bits.sign = X##_s; \
|
|
121 \
|
|
122 (val) = _flo.flt; \
|
|
123 } while (0)
|
|
124
|
|
125 #define FP_PACK_RAW_EP(val, X) \
|
|
126 do { \
|
|
127 if (!FP_INHIBIT_RESULTS) \
|
|
128 { \
|
|
129 union _FP_UNION_E *_flo = \
|
|
130 (union _FP_UNION_E *)(val); \
|
|
131 \
|
|
132 if (X##_e) X##_f[1] |= _FP_IMPLBIT_E; \
|
|
133 else X##_f[1] &= ~(_FP_IMPLBIT_E); \
|
|
134 _flo->bits.frac0 = X##_f[0]; \
|
|
135 _flo->bits.frac1 = X##_f[1]; \
|
|
136 _flo->bits.exp = X##_e; \
|
|
137 _flo->bits.sign = X##_s; \
|
|
138 } \
|
|
139 } while (0)
|
|
140
|
|
141 #define FP_UNPACK_E(X,val) \
|
|
142 do { \
|
|
143 FP_UNPACK_RAW_E(X,val); \
|
|
144 _FP_UNPACK_CANONICAL(E,4,X); \
|
|
145 } while (0)
|
|
146
|
|
147 #define FP_UNPACK_EP(X,val) \
|
|
148 do { \
|
|
149 FP_UNPACK_RAW_EP(X,val); \
|
|
150 _FP_UNPACK_CANONICAL(E,4,X); \
|
|
151 } while (0)
|
|
152
|
|
153 #define FP_UNPACK_SEMIRAW_E(X,val) \
|
|
154 do { \
|
|
155 FP_UNPACK_RAW_E(X,val); \
|
|
156 _FP_UNPACK_SEMIRAW(E,4,X); \
|
|
157 } while (0)
|
|
158
|
|
159 #define FP_UNPACK_SEMIRAW_EP(X,val) \
|
|
160 do { \
|
|
161 FP_UNPACK_RAW_EP(X,val); \
|
|
162 _FP_UNPACK_SEMIRAW(E,4,X); \
|
|
163 } while (0)
|
|
164
|
|
165 #define FP_PACK_E(val,X) \
|
|
166 do { \
|
|
167 _FP_PACK_CANONICAL(E,4,X); \
|
|
168 FP_PACK_RAW_E(val,X); \
|
|
169 } while (0)
|
|
170
|
|
171 #define FP_PACK_EP(val,X) \
|
|
172 do { \
|
|
173 _FP_PACK_CANONICAL(E,4,X); \
|
|
174 FP_PACK_RAW_EP(val,X); \
|
|
175 } while (0)
|
|
176
|
|
177 #define FP_PACK_SEMIRAW_E(val,X) \
|
|
178 do { \
|
|
179 _FP_PACK_SEMIRAW(E,4,X); \
|
|
180 FP_PACK_RAW_E(val,X); \
|
|
181 } while (0)
|
|
182
|
|
183 #define FP_PACK_SEMIRAW_EP(val,X) \
|
|
184 do { \
|
|
185 _FP_PACK_SEMIRAW(E,4,X); \
|
|
186 FP_PACK_RAW_EP(val,X); \
|
|
187 } while (0)
|
|
188
|
|
189 #define FP_ISSIGNAN_E(X) _FP_ISSIGNAN(E,4,X)
|
|
190 #define FP_NEG_E(R,X) _FP_NEG(E,4,R,X)
|
|
191 #define FP_ADD_E(R,X,Y) _FP_ADD(E,4,R,X,Y)
|
|
192 #define FP_SUB_E(R,X,Y) _FP_SUB(E,4,R,X,Y)
|
|
193 #define FP_MUL_E(R,X,Y) _FP_MUL(E,4,R,X,Y)
|
|
194 #define FP_DIV_E(R,X,Y) _FP_DIV(E,4,R,X,Y)
|
|
195 #define FP_SQRT_E(R,X) _FP_SQRT(E,4,R,X)
|
|
196
|
|
197 /*
|
|
198 * Square root algorithms:
|
|
199 * We have just one right now, maybe Newton approximation
|
|
200 * should be added for those machines where division is fast.
|
|
201 * This has special _E version because standard _4 square
|
|
202 * root would not work (it has to start normally with the
|
|
203 * second word and not the first), but as we have to do it
|
|
204 * anyway, we optimize it by doing most of the calculations
|
|
205 * in two UWtype registers instead of four.
|
|
206 */
|
|
207
|
|
208 #define _FP_SQRT_MEAT_E(R, S, T, X, q) \
|
|
209 do { \
|
|
210 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
|
|
211 _FP_FRAC_SRL_4(X, (_FP_WORKBITS)); \
|
|
212 while (q) \
|
|
213 { \
|
|
214 T##_f[1] = S##_f[1] + q; \
|
|
215 if (T##_f[1] <= X##_f[1]) \
|
|
216 { \
|
|
217 S##_f[1] = T##_f[1] + q; \
|
|
218 X##_f[1] -= T##_f[1]; \
|
|
219 R##_f[1] += q; \
|
|
220 } \
|
|
221 _FP_FRAC_SLL_2(X, 1); \
|
|
222 q >>= 1; \
|
|
223 } \
|
|
224 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
|
|
225 while (q) \
|
|
226 { \
|
|
227 T##_f[0] = S##_f[0] + q; \
|
|
228 T##_f[1] = S##_f[1]; \
|
|
229 if (T##_f[1] < X##_f[1] || \
|
|
230 (T##_f[1] == X##_f[1] && \
|
|
231 T##_f[0] <= X##_f[0])) \
|
|
232 { \
|
|
233 S##_f[0] = T##_f[0] + q; \
|
|
234 S##_f[1] += (T##_f[0] > S##_f[0]); \
|
|
235 _FP_FRAC_DEC_2(X, T); \
|
|
236 R##_f[0] += q; \
|
|
237 } \
|
|
238 _FP_FRAC_SLL_2(X, 1); \
|
|
239 q >>= 1; \
|
|
240 } \
|
|
241 _FP_FRAC_SLL_4(R, (_FP_WORKBITS)); \
|
|
242 if (X##_f[0] | X##_f[1]) \
|
|
243 { \
|
|
244 if (S##_f[1] < X##_f[1] || \
|
|
245 (S##_f[1] == X##_f[1] && \
|
|
246 S##_f[0] < X##_f[0])) \
|
|
247 R##_f[0] |= _FP_WORK_ROUND; \
|
|
248 R##_f[0] |= _FP_WORK_STICKY; \
|
|
249 } \
|
|
250 } while (0)
|
|
251
|
|
252 #define FP_CMP_E(r,X,Y,un) _FP_CMP(E,4,r,X,Y,un)
|
|
253 #define FP_CMP_EQ_E(r,X,Y) _FP_CMP_EQ(E,4,r,X,Y)
|
|
254 #define FP_CMP_UNORD_E(r,X,Y) _FP_CMP_UNORD(E,4,r,X,Y)
|
|
255
|
|
256 #define FP_TO_INT_E(r,X,rsz,rsg) _FP_TO_INT(E,4,r,X,rsz,rsg)
|
|
257 #define FP_FROM_INT_E(X,r,rs,rt) _FP_FROM_INT(E,4,X,r,rs,rt)
|
|
258
|
|
259 #define _FP_FRAC_HIGH_E(X) (X##_f[2])
|
|
260 #define _FP_FRAC_HIGH_RAW_E(X) (X##_f[1])
|
|
261
|
|
262 #else /* not _FP_W_TYPE_SIZE < 64 */
|
|
263 union _FP_UNION_E
|
|
264 {
|
|
265 XFtype flt;
|
|
266 struct {
|
|
267 #if __BYTE_ORDER == __BIG_ENDIAN
|
|
268 _FP_W_TYPE pad : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
|
|
269 unsigned sign : 1;
|
|
270 unsigned exp : _FP_EXPBITS_E;
|
|
271 _FP_W_TYPE frac : _FP_W_TYPE_SIZE;
|
|
272 #else
|
|
273 _FP_W_TYPE frac : _FP_W_TYPE_SIZE;
|
|
274 unsigned exp : _FP_EXPBITS_E;
|
|
275 unsigned sign : 1;
|
|
276 #endif
|
|
277 } bits;
|
|
278 };
|
|
279
|
|
280 #define FP_DECL_E(X) _FP_DECL(2,X)
|
|
281
|
|
282 #define FP_UNPACK_RAW_E(X, val) \
|
|
283 do { \
|
|
284 union _FP_UNION_E _flo; _flo.flt = (val); \
|
|
285 \
|
|
286 X##_f0 = _flo.bits.frac; \
|
|
287 X##_f1 = 0; \
|
|
288 X##_e = _flo.bits.exp; \
|
|
289 X##_s = _flo.bits.sign; \
|
|
290 } while (0)
|
|
291
|
|
292 #define FP_UNPACK_RAW_EP(X, val) \
|
|
293 do { \
|
|
294 union _FP_UNION_E *_flo = \
|
|
295 (union _FP_UNION_E *)(val); \
|
|
296 \
|
|
297 X##_f0 = _flo->bits.frac; \
|
|
298 X##_f1 = 0; \
|
|
299 X##_e = _flo->bits.exp; \
|
|
300 X##_s = _flo->bits.sign; \
|
|
301 } while (0)
|
|
302
|
|
303 #define FP_PACK_RAW_E(val, X) \
|
|
304 do { \
|
|
305 union _FP_UNION_E _flo; \
|
|
306 \
|
|
307 if (X##_e) X##_f0 |= _FP_IMPLBIT_E; \
|
|
308 else X##_f0 &= ~(_FP_IMPLBIT_E); \
|
|
309 _flo.bits.frac = X##_f0; \
|
|
310 _flo.bits.exp = X##_e; \
|
|
311 _flo.bits.sign = X##_s; \
|
|
312 \
|
|
313 (val) = _flo.flt; \
|
|
314 } while (0)
|
|
315
|
|
316 #define FP_PACK_RAW_EP(fs, val, X) \
|
|
317 do { \
|
|
318 if (!FP_INHIBIT_RESULTS) \
|
|
319 { \
|
|
320 union _FP_UNION_E *_flo = \
|
|
321 (union _FP_UNION_E *)(val); \
|
|
322 \
|
|
323 if (X##_e) X##_f0 |= _FP_IMPLBIT_E; \
|
|
324 else X##_f0 &= ~(_FP_IMPLBIT_E); \
|
|
325 _flo->bits.frac = X##_f0; \
|
|
326 _flo->bits.exp = X##_e; \
|
|
327 _flo->bits.sign = X##_s; \
|
|
328 } \
|
|
329 } while (0)
|
|
330
|
|
331
|
|
332 #define FP_UNPACK_E(X,val) \
|
|
333 do { \
|
|
334 FP_UNPACK_RAW_E(X,val); \
|
|
335 _FP_UNPACK_CANONICAL(E,2,X); \
|
|
336 } while (0)
|
|
337
|
|
338 #define FP_UNPACK_EP(X,val) \
|
|
339 do { \
|
|
340 FP_UNPACK_RAW_EP(X,val); \
|
|
341 _FP_UNPACK_CANONICAL(E,2,X); \
|
|
342 } while (0)
|
|
343
|
|
344 #define FP_UNPACK_SEMIRAW_E(X,val) \
|
|
345 do { \
|
|
346 FP_UNPACK_RAW_E(X,val); \
|
|
347 _FP_UNPACK_SEMIRAW(E,2,X); \
|
|
348 } while (0)
|
|
349
|
|
350 #define FP_UNPACK_SEMIRAW_EP(X,val) \
|
|
351 do { \
|
|
352 FP_UNPACK_RAW_EP(X,val); \
|
|
353 _FP_UNPACK_SEMIRAW(E,2,X); \
|
|
354 } while (0)
|
|
355
|
|
356 #define FP_PACK_E(val,X) \
|
|
357 do { \
|
|
358 _FP_PACK_CANONICAL(E,2,X); \
|
|
359 FP_PACK_RAW_E(val,X); \
|
|
360 } while (0)
|
|
361
|
|
362 #define FP_PACK_EP(val,X) \
|
|
363 do { \
|
|
364 _FP_PACK_CANONICAL(E,2,X); \
|
|
365 FP_PACK_RAW_EP(val,X); \
|
|
366 } while (0)
|
|
367
|
|
368 #define FP_PACK_SEMIRAW_E(val,X) \
|
|
369 do { \
|
|
370 _FP_PACK_SEMIRAW(E,2,X); \
|
|
371 FP_PACK_RAW_E(val,X); \
|
|
372 } while (0)
|
|
373
|
|
374 #define FP_PACK_SEMIRAW_EP(val,X) \
|
|
375 do { \
|
|
376 _FP_PACK_SEMIRAW(E,2,X); \
|
|
377 FP_PACK_RAW_EP(val,X); \
|
|
378 } while (0)
|
|
379
|
|
380 #define FP_ISSIGNAN_E(X) _FP_ISSIGNAN(E,2,X)
|
|
381 #define FP_NEG_E(R,X) _FP_NEG(E,2,R,X)
|
|
382 #define FP_ADD_E(R,X,Y) _FP_ADD(E,2,R,X,Y)
|
|
383 #define FP_SUB_E(R,X,Y) _FP_SUB(E,2,R,X,Y)
|
|
384 #define FP_MUL_E(R,X,Y) _FP_MUL(E,2,R,X,Y)
|
|
385 #define FP_DIV_E(R,X,Y) _FP_DIV(E,2,R,X,Y)
|
|
386 #define FP_SQRT_E(R,X) _FP_SQRT(E,2,R,X)
|
|
387
|
|
388 /*
|
|
389 * Square root algorithms:
|
|
390 * We have just one right now, maybe Newton approximation
|
|
391 * should be added for those machines where division is fast.
|
|
392 * We optimize it by doing most of the calculations
|
|
393 * in one UWtype registers instead of two, although we don't
|
|
394 * have to.
|
|
395 */
|
|
396 #define _FP_SQRT_MEAT_E(R, S, T, X, q) \
|
|
397 do { \
|
|
398 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
|
|
399 _FP_FRAC_SRL_2(X, (_FP_WORKBITS)); \
|
|
400 while (q) \
|
|
401 { \
|
|
402 T##_f0 = S##_f0 + q; \
|
|
403 if (T##_f0 <= X##_f0) \
|
|
404 { \
|
|
405 S##_f0 = T##_f0 + q; \
|
|
406 X##_f0 -= T##_f0; \
|
|
407 R##_f0 += q; \
|
|
408 } \
|
|
409 _FP_FRAC_SLL_1(X, 1); \
|
|
410 q >>= 1; \
|
|
411 } \
|
|
412 _FP_FRAC_SLL_2(R, (_FP_WORKBITS)); \
|
|
413 if (X##_f0) \
|
|
414 { \
|
|
415 if (S##_f0 < X##_f0) \
|
|
416 R##_f0 |= _FP_WORK_ROUND; \
|
|
417 R##_f0 |= _FP_WORK_STICKY; \
|
|
418 } \
|
|
419 } while (0)
|
|
420
|
|
421 #define FP_CMP_E(r,X,Y,un) _FP_CMP(E,2,r,X,Y,un)
|
|
422 #define FP_CMP_EQ_E(r,X,Y) _FP_CMP_EQ(E,2,r,X,Y)
|
|
423 #define FP_CMP_UNORD_E(r,X,Y) _FP_CMP_UNORD(E,2,r,X,Y)
|
|
424
|
|
425 #define FP_TO_INT_E(r,X,rsz,rsg) _FP_TO_INT(E,2,r,X,rsz,rsg)
|
|
426 #define FP_FROM_INT_E(X,r,rs,rt) _FP_FROM_INT(E,2,X,r,rs,rt)
|
|
427
|
|
428 #define _FP_FRAC_HIGH_E(X) (X##_f1)
|
|
429 #define _FP_FRAC_HIGH_RAW_E(X) (X##_f0)
|
|
430
|
|
431 #endif /* not _FP_W_TYPE_SIZE < 64 */
|