111
|
1 /* Copyright (C) 2007-2017 Free Software Foundation, Inc.
|
0
|
2
|
|
3 This file is part of GCC.
|
|
4
|
|
5 GCC is free software; you can redistribute it and/or modify it under
|
|
6 the terms of the GNU General Public License as published by the Free
|
|
7 Software Foundation; either version 3, or (at your option) any later
|
|
8 version.
|
|
9
|
|
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
13 for more details.
|
|
14
|
|
15 Under Section 7 of GPL version 3, you are granted additional
|
|
16 permissions described in the GCC Runtime Library Exception, version
|
|
17 3.1, as published by the Free Software Foundation.
|
|
18
|
|
19 You should have received a copy of the GNU General Public License and
|
|
20 a copy of the GCC Runtime Library Exception along with this program;
|
|
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
22 <http://www.gnu.org/licenses/>. */
|
|
23
|
|
24 #ifndef __BIDECIMAL_H
|
|
25 #define __BIDECIMAL_H
|
|
26
|
|
27 #include "bid_conf.h"
|
|
28 #include "bid_functions.h"
|
|
29
|
|
30 #define __BID_INLINE__ static __inline
|
|
31
|
|
32 /*********************************************************************
|
|
33 *
|
|
34 * Logical Shift Macros
|
|
35 *
|
|
36 *********************************************************************/
|
|
37
|
|
38 #define __shr_128(Q, A, k) \
|
|
39 { \
|
|
40 (Q).w[0] = (A).w[0] >> k; \
|
|
41 (Q).w[0] |= (A).w[1] << (64-k); \
|
|
42 (Q).w[1] = (A).w[1] >> k; \
|
|
43 }
|
|
44
|
|
45 #define __shr_128_long(Q, A, k) \
|
|
46 { \
|
|
47 if((k)<64) { \
|
|
48 (Q).w[0] = (A).w[0] >> k; \
|
|
49 (Q).w[0] |= (A).w[1] << (64-k); \
|
|
50 (Q).w[1] = (A).w[1] >> k; \
|
|
51 } \
|
|
52 else { \
|
|
53 (Q).w[0] = (A).w[1]>>((k)-64); \
|
|
54 (Q).w[1] = 0; \
|
|
55 } \
|
|
56 }
|
|
57
|
|
58 #define __shl_128_long(Q, A, k) \
|
|
59 { \
|
|
60 if((k)<64) { \
|
|
61 (Q).w[1] = (A).w[1] << k; \
|
|
62 (Q).w[1] |= (A).w[0] >> (64-k); \
|
|
63 (Q).w[0] = (A).w[0] << k; \
|
|
64 } \
|
|
65 else { \
|
|
66 (Q).w[1] = (A).w[0]<<((k)-64); \
|
|
67 (Q).w[0] = 0; \
|
|
68 } \
|
|
69 }
|
|
70
|
|
71 #define __low_64(Q) (Q).w[0]
|
|
72 /*********************************************************************
|
|
73 *
|
|
74 * String Macros
|
|
75 *
|
|
76 *********************************************************************/
|
|
77 #define tolower_macro(x) (((unsigned char)((x)-'A')<=('Z'-'A'))?((x)-'A'+'a'):(x))
|
|
78 /*********************************************************************
|
|
79 *
|
|
80 * Compare Macros
|
|
81 *
|
|
82 *********************************************************************/
|
|
83 // greater than
|
|
84 // return 0 if A<=B
|
|
85 // non-zero if A>B
|
|
86 #define __unsigned_compare_gt_128(A, B) \
|
|
87 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>B.w[0])))
|
|
88 // greater-or-equal
|
|
89 #define __unsigned_compare_ge_128(A, B) \
|
|
90 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>=B.w[0])))
|
|
91 #define __test_equal_128(A, B) (((A).w[1]==(B).w[1]) && ((A).w[0]==(B).w[0]))
|
|
92 // tighten exponent range
|
|
93 #define __tight_bin_range_128(bp, P, bin_expon) \
|
|
94 { \
|
|
95 UINT64 M; \
|
|
96 M = 1; \
|
|
97 (bp) = (bin_expon); \
|
|
98 if((bp)<63) { \
|
|
99 M <<= ((bp)+1); \
|
|
100 if((P).w[0] >= M) (bp)++; } \
|
|
101 else if((bp)>64) { \
|
|
102 M <<= ((bp)+1-64); \
|
|
103 if(((P).w[1]>M) ||((P).w[1]==M && (P).w[0]))\
|
|
104 (bp)++; } \
|
|
105 else if((P).w[1]) (bp)++; \
|
|
106 }
|
|
107 /*********************************************************************
|
|
108 *
|
|
109 * Add/Subtract Macros
|
|
110 *
|
|
111 *********************************************************************/
|
|
112 // add 64-bit value to 128-bit
|
|
113 #define __add_128_64(R128, A128, B64) \
|
|
114 { \
|
|
115 UINT64 R64H; \
|
|
116 R64H = (A128).w[1]; \
|
|
117 (R128).w[0] = (B64) + (A128).w[0]; \
|
|
118 if((R128).w[0] < (B64)) \
|
|
119 R64H ++; \
|
|
120 (R128).w[1] = R64H; \
|
|
121 }
|
|
122 // subtract 64-bit value from 128-bit
|
|
123 #define __sub_128_64(R128, A128, B64) \
|
|
124 { \
|
|
125 UINT64 R64H; \
|
|
126 R64H = (A128).w[1]; \
|
|
127 if((A128).w[0] < (B64)) \
|
|
128 R64H --; \
|
|
129 (R128).w[1] = R64H; \
|
|
130 (R128).w[0] = (A128).w[0] - (B64); \
|
|
131 }
|
|
132 // add 128-bit value to 128-bit
|
|
133 // assume no carry-out
|
|
134 #define __add_128_128(R128, A128, B128) \
|
|
135 { \
|
|
136 UINT128 Q128; \
|
|
137 Q128.w[1] = (A128).w[1]+(B128).w[1]; \
|
|
138 Q128.w[0] = (B128).w[0] + (A128).w[0]; \
|
|
139 if(Q128.w[0] < (B128).w[0]) \
|
|
140 Q128.w[1] ++; \
|
|
141 (R128).w[1] = Q128.w[1]; \
|
|
142 (R128).w[0] = Q128.w[0]; \
|
|
143 }
|
|
144 #define __sub_128_128(R128, A128, B128) \
|
|
145 { \
|
|
146 UINT128 Q128; \
|
|
147 Q128.w[1] = (A128).w[1]-(B128).w[1]; \
|
|
148 Q128.w[0] = (A128).w[0] - (B128).w[0]; \
|
|
149 if((A128).w[0] < (B128).w[0]) \
|
|
150 Q128.w[1] --; \
|
|
151 (R128).w[1] = Q128.w[1]; \
|
|
152 (R128).w[0] = Q128.w[0]; \
|
|
153 }
|
|
154 #define __add_carry_out(S, CY, X, Y) \
|
|
155 { \
|
|
156 UINT64 X1=X; \
|
|
157 S = X + Y; \
|
|
158 CY = (S<X1) ? 1 : 0; \
|
|
159 }
|
|
160 #define __add_carry_in_out(S, CY, X, Y, CI) \
|
|
161 { \
|
|
162 UINT64 X1; \
|
|
163 X1 = X + CI; \
|
|
164 S = X1 + Y; \
|
|
165 CY = ((S<X1) || (X1<CI)) ? 1 : 0; \
|
|
166 }
|
|
167 #define __sub_borrow_out(S, CY, X, Y) \
|
|
168 { \
|
|
169 UINT64 X1=X; \
|
|
170 S = X - Y; \
|
|
171 CY = (S>X1) ? 1 : 0; \
|
|
172 }
|
|
173 #define __sub_borrow_in_out(S, CY, X, Y, CI) \
|
|
174 { \
|
|
175 UINT64 X1, X0=X; \
|
|
176 X1 = X - CI; \
|
|
177 S = X1 - Y; \
|
|
178 CY = ((S>X1) || (X1>X0)) ? 1 : 0; \
|
|
179 }
|
|
180 // increment C128 and check for rounding overflow:
|
|
181 // if (C_128) = 10^34 then (C_128) = 10^33 and increment the exponent
|
|
182 #define INCREMENT(C_128, exp) \
|
|
183 { \
|
|
184 C_128.w[0]++; \
|
|
185 if (C_128.w[0] == 0) C_128.w[1]++; \
|
|
186 if (C_128.w[1] == 0x0001ed09bead87c0ull && \
|
|
187 C_128.w[0] == 0x378d8e6400000000ull) { \
|
|
188 exp++; \
|
|
189 C_128.w[1] = 0x0000314dc6448d93ull; \
|
|
190 C_128.w[0] = 0x38c15b0a00000000ull; \
|
|
191 } \
|
|
192 }
|
|
193 // decrement C128 and check for rounding underflow, but only at the
|
|
194 // boundary: if C_128 = 10^33 - 1 and exp > 0 then C_128 = 10^34 - 1
|
|
195 // and decrement the exponent
|
|
196 #define DECREMENT(C_128, exp) \
|
|
197 { \
|
|
198 C_128.w[0]--; \
|
|
199 if (C_128.w[0] == 0xffffffffffffffffull) C_128.w[1]--; \
|
|
200 if (C_128.w[1] == 0x0000314dc6448d93ull && \
|
|
201 C_128.w[0] == 0x38c15b09ffffffffull && exp > 0) { \
|
|
202 exp--; \
|
|
203 C_128.w[1] = 0x0001ed09bead87c0ull; \
|
|
204 C_128.w[0] = 0x378d8e63ffffffffull; \
|
|
205 } \
|
|
206 }
|
|
207
|
|
208 /*********************************************************************
|
|
209 *
|
|
210 * Multiply Macros
|
|
211 *
|
|
212 *********************************************************************/
|
|
213 #define __mul_64x64_to_64(P64, CX, CY) (P64) = (CX) * (CY)
|
|
214 /***************************************
|
|
215 * Signed, Full 64x64-bit Multiply
|
|
216 ***************************************/
|
|
217 #define __imul_64x64_to_128(P, CX, CY) \
|
|
218 { \
|
|
219 UINT64 SX, SY; \
|
|
220 __mul_64x64_to_128(P, CX, CY); \
|
|
221 \
|
|
222 SX = ((SINT64)(CX))>>63; \
|
|
223 SY = ((SINT64)(CY))>>63; \
|
|
224 SX &= CY; SY &= CX; \
|
|
225 \
|
|
226 (P).w[1] = (P).w[1] - SX - SY; \
|
|
227 }
|
|
228 /***************************************
|
|
229 * Signed, Full 64x128-bit Multiply
|
|
230 ***************************************/
|
|
231 #define __imul_64x128_full(Ph, Ql, A, B) \
|
|
232 { \
|
|
233 UINT128 ALBL, ALBH, QM2, QM; \
|
|
234 \
|
|
235 __imul_64x64_to_128(ALBH, (A), (B).w[1]); \
|
|
236 __imul_64x64_to_128(ALBL, (A), (B).w[0]); \
|
|
237 \
|
|
238 (Ql).w[0] = ALBL.w[0]; \
|
|
239 QM.w[0] = ALBL.w[1]; \
|
|
240 QM.w[1] = ((SINT64)ALBL.w[1])>>63; \
|
|
241 __add_128_128(QM2, ALBH, QM); \
|
|
242 (Ql).w[1] = QM2.w[0]; \
|
|
243 Ph = QM2.w[1]; \
|
|
244 }
|
|
245 /*****************************************************
|
|
246 * Unsigned Multiply Macros
|
|
247 *****************************************************/
|
|
248 // get full 64x64bit product
|
|
249 //
|
|
250 #define __mul_64x64_to_128(P, CX, CY) \
|
|
251 { \
|
|
252 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
|
|
253 CXH = (CX) >> 32; \
|
|
254 CXL = (UINT32)(CX); \
|
|
255 CYH = (CY) >> 32; \
|
|
256 CYL = (UINT32)(CY); \
|
|
257 \
|
|
258 PM = CXH*CYL; \
|
|
259 PH = CXH*CYH; \
|
|
260 PL = CXL*CYL; \
|
|
261 PM2 = CXL*CYH; \
|
|
262 PH += (PM>>32); \
|
|
263 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
|
|
264 \
|
|
265 (P).w[1] = PH + (PM>>32); \
|
|
266 (P).w[0] = (PM<<32)+(UINT32)PL; \
|
|
267 }
|
|
268 // get full 64x64bit product
|
|
269 // Note:
|
|
270 // This macro is used for CX < 2^61, CY < 2^61
|
|
271 //
|
|
272 #define __mul_64x64_to_128_fast(P, CX, CY) \
|
|
273 { \
|
|
274 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
|
|
275 CXH = (CX) >> 32; \
|
|
276 CXL = (UINT32)(CX); \
|
|
277 CYH = (CY) >> 32; \
|
|
278 CYL = (UINT32)(CY); \
|
|
279 \
|
|
280 PM = CXH*CYL; \
|
|
281 PL = CXL*CYL; \
|
|
282 PH = CXH*CYH; \
|
|
283 PM += CXL*CYH; \
|
|
284 PM += (PL>>32); \
|
|
285 \
|
|
286 (P).w[1] = PH + (PM>>32); \
|
|
287 (P).w[0] = (PM<<32)+(UINT32)PL; \
|
|
288 }
|
|
289 // used for CX< 2^60
|
|
290 #define __sqr64_fast(P, CX) \
|
|
291 { \
|
|
292 UINT64 CXH, CXL, PL, PH, PM; \
|
|
293 CXH = (CX) >> 32; \
|
|
294 CXL = (UINT32)(CX); \
|
|
295 \
|
|
296 PM = CXH*CXL; \
|
|
297 PL = CXL*CXL; \
|
|
298 PH = CXH*CXH; \
|
|
299 PM += PM; \
|
|
300 PM += (PL>>32); \
|
|
301 \
|
|
302 (P).w[1] = PH + (PM>>32); \
|
|
303 (P).w[0] = (PM<<32)+(UINT32)PL; \
|
|
304 }
|
|
305 // get full 64x64bit product
|
|
306 // Note:
|
|
307 // This implementation is used for CX < 2^61, CY < 2^61
|
|
308 //
|
|
309 #define __mul_64x64_to_64_high_fast(P, CX, CY) \
|
|
310 { \
|
|
311 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
|
|
312 CXH = (CX) >> 32; \
|
|
313 CXL = (UINT32)(CX); \
|
|
314 CYH = (CY) >> 32; \
|
|
315 CYL = (UINT32)(CY); \
|
|
316 \
|
|
317 PM = CXH*CYL; \
|
|
318 PL = CXL*CYL; \
|
|
319 PH = CXH*CYH; \
|
|
320 PM += CXL*CYH; \
|
|
321 PM += (PL>>32); \
|
|
322 \
|
|
323 (P) = PH + (PM>>32); \
|
|
324 }
|
|
325 // get full 64x64bit product
|
|
326 //
|
|
327 #define __mul_64x64_to_128_full(P, CX, CY) \
|
|
328 { \
|
|
329 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
|
|
330 CXH = (CX) >> 32; \
|
|
331 CXL = (UINT32)(CX); \
|
|
332 CYH = (CY) >> 32; \
|
|
333 CYL = (UINT32)(CY); \
|
|
334 \
|
|
335 PM = CXH*CYL; \
|
|
336 PH = CXH*CYH; \
|
|
337 PL = CXL*CYL; \
|
|
338 PM2 = CXL*CYH; \
|
|
339 PH += (PM>>32); \
|
|
340 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
|
|
341 \
|
|
342 (P).w[1] = PH + (PM>>32); \
|
|
343 (P).w[0] = (PM<<32)+(UINT32)PL; \
|
|
344 }
|
|
345 #define __mul_128x128_high(Q, A, B) \
|
|
346 { \
|
|
347 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
|
|
348 \
|
|
349 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
|
|
350 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
|
|
351 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
|
|
352 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
|
|
353 \
|
|
354 __add_128_128(QM, ALBH, AHBL); \
|
|
355 __add_128_64(QM2, QM, ALBL.w[1]); \
|
|
356 __add_128_64((Q), AHBH, QM2.w[1]); \
|
|
357 }
|
|
358 #define __mul_128x128_full(Qh, Ql, A, B) \
|
|
359 { \
|
|
360 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
|
|
361 \
|
|
362 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
|
|
363 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
|
|
364 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
|
|
365 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
|
|
366 \
|
|
367 __add_128_128(QM, ALBH, AHBL); \
|
|
368 (Ql).w[0] = ALBL.w[0]; \
|
|
369 __add_128_64(QM2, QM, ALBL.w[1]); \
|
|
370 __add_128_64((Qh), AHBH, QM2.w[1]); \
|
|
371 (Ql).w[1] = QM2.w[0]; \
|
|
372 }
|
|
373 #define __mul_128x128_low(Ql, A, B) \
|
|
374 { \
|
|
375 UINT128 ALBL; \
|
|
376 UINT64 QM64; \
|
|
377 \
|
|
378 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
|
|
379 QM64 = (B).w[0]*(A).w[1] + (A).w[0]*(B).w[1]; \
|
|
380 \
|
|
381 (Ql).w[0] = ALBL.w[0]; \
|
|
382 (Ql).w[1] = QM64 + ALBL.w[1]; \
|
|
383 }
|
|
384 #define __mul_64x128_low(Ql, A, B) \
|
|
385 { \
|
|
386 UINT128 ALBL, ALBH, QM2; \
|
|
387 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
|
|
388 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
|
|
389 (Ql).w[0] = ALBL.w[0]; \
|
|
390 __add_128_64(QM2, ALBH, ALBL.w[1]); \
|
|
391 (Ql).w[1] = QM2.w[0]; \
|
|
392 }
|
|
393 #define __mul_64x128_full(Ph, Ql, A, B) \
|
|
394 { \
|
|
395 UINT128 ALBL, ALBH, QM2; \
|
|
396 \
|
|
397 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
|
|
398 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
|
|
399 \
|
|
400 (Ql).w[0] = ALBL.w[0]; \
|
|
401 __add_128_64(QM2, ALBH, ALBL.w[1]); \
|
|
402 (Ql).w[1] = QM2.w[0]; \
|
|
403 Ph = QM2.w[1]; \
|
|
404 }
|
|
405 #define __mul_64x128_to_192(Q, A, B) \
|
|
406 { \
|
|
407 UINT128 ALBL, ALBH, QM2; \
|
|
408 \
|
|
409 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
|
|
410 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
|
|
411 \
|
|
412 (Q).w[0] = ALBL.w[0]; \
|
|
413 __add_128_64(QM2, ALBH, ALBL.w[1]); \
|
|
414 (Q).w[1] = QM2.w[0]; \
|
|
415 (Q).w[2] = QM2.w[1]; \
|
|
416 }
|
|
417 #define __mul_64x128_to192(Q, A, B) \
|
|
418 { \
|
|
419 UINT128 ALBL, ALBH, QM2; \
|
|
420 \
|
|
421 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
|
|
422 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
|
|
423 \
|
|
424 (Q).w[0] = ALBL.w[0]; \
|
|
425 __add_128_64(QM2, ALBH, ALBL.w[1]); \
|
|
426 (Q).w[1] = QM2.w[0]; \
|
|
427 (Q).w[2] = QM2.w[1]; \
|
|
428 }
|
|
429 #define __mul_128x128_to_256(P256, A, B) \
|
|
430 { \
|
|
431 UINT128 Qll, Qlh; \
|
|
432 UINT64 Phl, Phh, CY1, CY2; \
|
|
433 \
|
|
434 __mul_64x128_full(Phl, Qll, A.w[0], B); \
|
|
435 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
|
|
436 (P256).w[0] = Qll.w[0]; \
|
|
437 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
|
|
438 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
|
|
439 (P256).w[3] = Phh + CY2; \
|
|
440 }
|
|
441 //
|
|
442 // For better performance, will check A.w[1] against 0,
|
|
443 // but not B.w[1]
|
|
444 // Use this macro accordingly
|
|
445 #define __mul_128x128_to_256_check_A(P256, A, B) \
|
|
446 { \
|
|
447 UINT128 Qll, Qlh; \
|
|
448 UINT64 Phl, Phh, CY1, CY2; \
|
|
449 \
|
|
450 __mul_64x128_full(Phl, Qll, A.w[0], B); \
|
|
451 (P256).w[0] = Qll.w[0]; \
|
|
452 if(A.w[1]) { \
|
|
453 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
|
|
454 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
|
|
455 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
|
|
456 (P256).w[3] = Phh + CY2; } \
|
|
457 else { \
|
|
458 (P256).w[1] = Qll.w[1]; \
|
|
459 (P256).w[2] = Phl; \
|
|
460 (P256).w[3] = 0; } \
|
|
461 }
|
|
462 #define __mul_64x192_to_256(lP, lA, lB) \
|
|
463 { \
|
|
464 UINT128 lP0,lP1,lP2; \
|
|
465 UINT64 lC; \
|
|
466 __mul_64x64_to_128(lP0, lA, (lB).w[0]); \
|
|
467 __mul_64x64_to_128(lP1, lA, (lB).w[1]); \
|
|
468 __mul_64x64_to_128(lP2, lA, (lB).w[2]); \
|
|
469 (lP).w[0] = lP0.w[0]; \
|
|
470 __add_carry_out((lP).w[1],lC,lP1.w[0],lP0.w[1]); \
|
|
471 __add_carry_in_out((lP).w[2],lC,lP2.w[0],lP1.w[1],lC); \
|
|
472 (lP).w[3] = lP2.w[1] + lC; \
|
|
473 }
|
|
474 #define __mul_64x256_to_320(P, A, B) \
|
|
475 { \
|
|
476 UINT128 lP0,lP1,lP2,lP3; \
|
|
477 UINT64 lC; \
|
|
478 __mul_64x64_to_128(lP0, A, (B).w[0]); \
|
|
479 __mul_64x64_to_128(lP1, A, (B).w[1]); \
|
|
480 __mul_64x64_to_128(lP2, A, (B).w[2]); \
|
|
481 __mul_64x64_to_128(lP3, A, (B).w[3]); \
|
|
482 (P).w[0] = lP0.w[0]; \
|
|
483 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
|
|
484 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
|
|
485 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
|
|
486 (P).w[4] = lP3.w[1] + lC; \
|
|
487 }
|
|
488 #define __mul_192x192_to_384(P, A, B) \
|
|
489 { \
|
|
490 UINT256 P0,P1,P2; \
|
|
491 UINT64 CY; \
|
|
492 __mul_64x192_to_256(P0, (A).w[0], B); \
|
|
493 __mul_64x192_to_256(P1, (A).w[1], B); \
|
|
494 __mul_64x192_to_256(P2, (A).w[2], B); \
|
|
495 (P).w[0] = P0.w[0]; \
|
|
496 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
|
|
497 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
|
|
498 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
|
|
499 (P).w[4] = P1.w[3] + CY; \
|
|
500 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
|
|
501 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
|
|
502 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
|
|
503 (P).w[5] = P2.w[3] + CY; \
|
|
504 }
|
|
505 #define __mul_64x320_to_384(P, A, B) \
|
|
506 { \
|
|
507 UINT128 lP0,lP1,lP2,lP3,lP4; \
|
|
508 UINT64 lC; \
|
|
509 __mul_64x64_to_128(lP0, A, (B).w[0]); \
|
|
510 __mul_64x64_to_128(lP1, A, (B).w[1]); \
|
|
511 __mul_64x64_to_128(lP2, A, (B).w[2]); \
|
|
512 __mul_64x64_to_128(lP3, A, (B).w[3]); \
|
|
513 __mul_64x64_to_128(lP4, A, (B).w[4]); \
|
|
514 (P).w[0] = lP0.w[0]; \
|
|
515 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
|
|
516 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
|
|
517 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
|
|
518 __add_carry_in_out((P).w[4],lC,lP4.w[0],lP3.w[1],lC); \
|
|
519 (P).w[5] = lP4.w[1] + lC; \
|
|
520 }
|
|
521 // A*A
|
|
522 // Full 128x128-bit product
|
|
523 #define __sqr128_to_256(P256, A) \
|
|
524 { \
|
|
525 UINT128 Qll, Qlh, Qhh; \
|
|
526 UINT64 TMP_C1, TMP_C2; \
|
|
527 \
|
|
528 __mul_64x64_to_128(Qhh, A.w[1], A.w[1]); \
|
|
529 __mul_64x64_to_128(Qlh, A.w[0], A.w[1]); \
|
|
530 Qhh.w[1] += (Qlh.w[1]>>63); \
|
|
531 Qlh.w[1] = (Qlh.w[1]+Qlh.w[1])|(Qlh.w[0]>>63); \
|
|
532 Qlh.w[0] += Qlh.w[0]; \
|
|
533 __mul_64x64_to_128(Qll, A.w[0], A.w[0]); \
|
|
534 \
|
|
535 __add_carry_out((P256).w[1],TMP_C1, Qlh.w[0], Qll.w[1]); \
|
|
536 (P256).w[0] = Qll.w[0]; \
|
|
537 __add_carry_in_out((P256).w[2],TMP_C2, Qlh.w[1], Qhh.w[0], TMP_C1); \
|
|
538 (P256).w[3] = Qhh.w[1]+TMP_C2; \
|
|
539 }
|
|
540 #define __mul_128x128_to_256_low_high(PQh, PQl, A, B) \
|
|
541 { \
|
|
542 UINT128 Qll, Qlh; \
|
|
543 UINT64 Phl, Phh, C1, C2; \
|
|
544 \
|
|
545 __mul_64x128_full(Phl, Qll, A.w[0], B); \
|
|
546 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
|
|
547 (PQl).w[0] = Qll.w[0]; \
|
|
548 __add_carry_out((PQl).w[1],C1, Qlh.w[0], Qll.w[1]); \
|
|
549 __add_carry_in_out((PQh).w[0],C2, Qlh.w[1], Phl, C1); \
|
|
550 (PQh).w[1] = Phh + C2; \
|
|
551 }
|
|
552 #define __mul_256x256_to_512(P, A, B) \
|
|
553 { \
|
|
554 UINT512 P0,P1,P2,P3; \
|
|
555 UINT64 CY; \
|
|
556 __mul_64x256_to_320(P0, (A).w[0], B); \
|
|
557 __mul_64x256_to_320(P1, (A).w[1], B); \
|
|
558 __mul_64x256_to_320(P2, (A).w[2], B); \
|
|
559 __mul_64x256_to_320(P3, (A).w[3], B); \
|
|
560 (P).w[0] = P0.w[0]; \
|
|
561 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
|
|
562 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
|
|
563 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
|
|
564 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
|
|
565 (P).w[5] = P1.w[4] + CY; \
|
|
566 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
|
|
567 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
|
|
568 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
|
|
569 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
|
|
570 (P).w[6] = P2.w[4] + CY; \
|
|
571 __add_carry_out((P).w[3],CY,P3.w[0],(P).w[3]); \
|
|
572 __add_carry_in_out((P).w[4],CY,P3.w[1],(P).w[4],CY); \
|
|
573 __add_carry_in_out((P).w[5],CY,P3.w[2],(P).w[5],CY); \
|
|
574 __add_carry_in_out((P).w[6],CY,P3.w[3],(P).w[6],CY); \
|
|
575 (P).w[7] = P3.w[4] + CY; \
|
|
576 }
|
|
577 #define __mul_192x256_to_448(P, A, B) \
|
|
578 { \
|
|
579 UINT512 P0,P1,P2; \
|
|
580 UINT64 CY; \
|
|
581 __mul_64x256_to_320(P0, (A).w[0], B); \
|
|
582 __mul_64x256_to_320(P1, (A).w[1], B); \
|
|
583 __mul_64x256_to_320(P2, (A).w[2], B); \
|
|
584 (P).w[0] = P0.w[0]; \
|
|
585 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
|
|
586 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
|
|
587 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
|
|
588 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
|
|
589 (P).w[5] = P1.w[4] + CY; \
|
|
590 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
|
|
591 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
|
|
592 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
|
|
593 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
|
|
594 (P).w[6] = P2.w[4] + CY; \
|
|
595 }
|
|
596 #define __mul_320x320_to_640(P, A, B) \
|
|
597 { \
|
|
598 UINT512 P0,P1,P2,P3; \
|
|
599 UINT64 CY; \
|
|
600 __mul_256x256_to_512((P), (A), B); \
|
|
601 __mul_64x256_to_320(P1, (A).w[4], B); \
|
|
602 __mul_64x256_to_320(P2, (B).w[4], A); \
|
|
603 __mul_64x64_to_128(P3, (A).w[4], (B).w[4]); \
|
|
604 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
|
|
605 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
|
|
606 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
|
|
607 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
|
|
608 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
|
|
609 P3.w[1] += CY; \
|
|
610 __add_carry_out((P).w[4],CY,(P).w[4],P0.w[0]); \
|
|
611 __add_carry_in_out((P).w[5],CY,(P).w[5],P0.w[1],CY); \
|
|
612 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[2],CY); \
|
|
613 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[3],CY); \
|
|
614 __add_carry_in_out((P).w[8],CY,P3.w[0],P0.w[4],CY); \
|
|
615 (P).w[9] = P3.w[1] + CY; \
|
|
616 }
|
|
617 #define __mul_384x384_to_768(P, A, B) \
|
|
618 { \
|
|
619 UINT512 P0,P1,P2,P3; \
|
|
620 UINT64 CY; \
|
|
621 __mul_320x320_to_640((P), (A), B); \
|
|
622 __mul_64x320_to_384(P1, (A).w[5], B); \
|
|
623 __mul_64x320_to_384(P2, (B).w[5], A); \
|
|
624 __mul_64x64_to_128(P3, (A).w[5], (B).w[5]); \
|
|
625 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
|
|
626 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
|
|
627 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
|
|
628 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
|
|
629 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
|
|
630 __add_carry_in_out((P0).w[5],CY,P1.w[5],P2.w[5],CY); \
|
|
631 P3.w[1] += CY; \
|
|
632 __add_carry_out((P).w[5],CY,(P).w[5],P0.w[0]); \
|
|
633 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[1],CY); \
|
|
634 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[2],CY); \
|
|
635 __add_carry_in_out((P).w[8],CY,(P).w[8],P0.w[3],CY); \
|
|
636 __add_carry_in_out((P).w[9],CY,(P).w[9],P0.w[4],CY); \
|
|
637 __add_carry_in_out((P).w[10],CY,P3.w[0],P0.w[5],CY); \
|
|
638 (P).w[11] = P3.w[1] + CY; \
|
|
639 }
|
|
640 #define __mul_64x128_short(Ql, A, B) \
|
|
641 { \
|
|
642 UINT64 ALBH_L; \
|
|
643 \
|
|
644 __mul_64x64_to_64(ALBH_L, (A),(B).w[1]); \
|
|
645 __mul_64x64_to_128((Ql), (A), (B).w[0]); \
|
|
646 \
|
|
647 (Ql).w[1] += ALBH_L; \
|
|
648 }
|
|
649 #define __scale128_10(D,_TMP) \
|
|
650 { \
|
|
651 UINT128 _TMP2,_TMP8; \
|
|
652 _TMP2.w[1] = (_TMP.w[1]<<1)|(_TMP.w[0]>>63); \
|
|
653 _TMP2.w[0] = _TMP.w[0]<<1; \
|
|
654 _TMP8.w[1] = (_TMP.w[1]<<3)|(_TMP.w[0]>>61); \
|
|
655 _TMP8.w[0] = _TMP.w[0]<<3; \
|
|
656 __add_128_128(D, _TMP2, _TMP8); \
|
|
657 }
|
|
658 // 64x64-bit product
|
|
659 #define __mul_64x64_to_128MACH(P128, CX64, CY64) \
|
|
660 { \
|
|
661 UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
|
|
662 CXH = (CX64) >> 32; \
|
|
663 CXL = (UINT32)(CX64); \
|
|
664 CYH = (CY64) >> 32; \
|
|
665 CYL = (UINT32)(CY64); \
|
|
666 PM = CXH*CYL; \
|
|
667 PH = CXH*CYH; \
|
|
668 PL = CXL*CYL; \
|
|
669 PM2 = CXL*CYH; \
|
|
670 PH += (PM>>32); \
|
|
671 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
|
|
672 (P128).w[1] = PH + (PM>>32); \
|
|
673 (P128).w[0] = (PM<<32)+(UINT32)PL; \
|
|
674 }
|
|
675 // 64x64-bit product
|
|
676 #define __mul_64x64_to_128HIGH(P64, CX64, CY64) \
|
|
677 { \
|
|
678 UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
|
|
679 CXH = (CX64) >> 32; \
|
|
680 CXL = (UINT32)(CX64); \
|
|
681 CYH = (CY64) >> 32; \
|
|
682 CYL = (UINT32)(CY64); \
|
|
683 PM = CXH*CYL; \
|
|
684 PH = CXH*CYH; \
|
|
685 PL = CXL*CYL; \
|
|
686 PM2 = CXL*CYH; \
|
|
687 PH += (PM>>32); \
|
|
688 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
|
|
689 P64 = PH + (PM>>32); \
|
|
690 }
|
|
691 #define __mul_128x64_to_128(Q128, A64, B128) \
|
|
692 { \
|
|
693 UINT64 ALBH_L; \
|
|
694 ALBH_L = (A64) * (B128).w[1]; \
|
|
695 __mul_64x64_to_128MACH((Q128), (A64), (B128).w[0]); \
|
|
696 (Q128).w[1] += ALBH_L; \
|
|
697 }
|
|
698 // might simplify by calculating just QM2.w[0]
|
|
699 #define __mul_64x128_to_128(Ql, A, B) \
|
|
700 { \
|
|
701 UINT128 ALBL, ALBH, QM2; \
|
|
702 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
|
|
703 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
|
|
704 (Ql).w[0] = ALBL.w[0]; \
|
|
705 __add_128_64(QM2, ALBH, ALBL.w[1]); \
|
|
706 (Ql).w[1] = QM2.w[0]; \
|
|
707 }
|
|
708 /*********************************************************************
|
|
709 *
|
|
710 * BID Pack/Unpack Macros
|
|
711 *
|
|
712 *********************************************************************/
|
|
713 /////////////////////////////////////////
|
|
714 // BID64 definitions
|
|
715 ////////////////////////////////////////
|
|
716 #define DECIMAL_MAX_EXPON_64 767
|
|
717 #define DECIMAL_EXPONENT_BIAS 398
|
|
718 #define MAX_FORMAT_DIGITS 16
|
|
719 /////////////////////////////////////////
|
|
720 // BID128 definitions
|
|
721 ////////////////////////////////////////
|
|
722 #define DECIMAL_MAX_EXPON_128 12287
|
|
723 #define DECIMAL_EXPONENT_BIAS_128 6176
|
|
724 #define MAX_FORMAT_DIGITS_128 34
|
|
725 /////////////////////////////////////////
|
|
726 // BID32 definitions
|
|
727 ////////////////////////////////////////
|
|
728 #define DECIMAL_MAX_EXPON_32 191
|
|
729 #define DECIMAL_EXPONENT_BIAS_32 101
|
|
730 #define MAX_FORMAT_DIGITS_32 7
|
|
731 ////////////////////////////////////////
|
|
732 // Constant Definitions
|
|
733 ///////////////////////////////////////
|
|
734 #define SPECIAL_ENCODING_MASK64 0x6000000000000000ull
|
|
735 #define INFINITY_MASK64 0x7800000000000000ull
|
|
736 #define SINFINITY_MASK64 0xf800000000000000ull
|
|
737 #define SSNAN_MASK64 0xfc00000000000000ull
|
|
738 #define NAN_MASK64 0x7c00000000000000ull
|
|
739 #define SNAN_MASK64 0x7e00000000000000ull
|
|
740 #define QUIET_MASK64 0xfdffffffffffffffull
|
|
741 #define LARGE_COEFF_MASK64 0x0007ffffffffffffull
|
|
742 #define LARGE_COEFF_HIGH_BIT64 0x0020000000000000ull
|
|
743 #define SMALL_COEFF_MASK64 0x001fffffffffffffull
|
|
744 #define EXPONENT_MASK64 0x3ff
|
|
745 #define EXPONENT_SHIFT_LARGE64 51
|
|
746 #define EXPONENT_SHIFT_SMALL64 53
|
|
747 #define LARGEST_BID64 0x77fb86f26fc0ffffull
|
|
748 #define SMALLEST_BID64 0xf7fb86f26fc0ffffull
|
|
749 #define SMALL_COEFF_MASK128 0x0001ffffffffffffull
|
|
750 #define LARGE_COEFF_MASK128 0x00007fffffffffffull
|
|
751 #define EXPONENT_MASK128 0x3fff
|
|
752 #define LARGEST_BID128_HIGH 0x5fffed09bead87c0ull
|
|
753 #define LARGEST_BID128_LOW 0x378d8e63ffffffffull
|
|
754 #define SPECIAL_ENCODING_MASK32 0x60000000ul
|
|
755 #define INFINITY_MASK32 0x78000000ul
|
|
756 #define LARGE_COEFF_MASK32 0x007ffffful
|
|
757 #define LARGE_COEFF_HIGH_BIT32 0x00800000ul
|
|
758 #define SMALL_COEFF_MASK32 0x001ffffful
|
|
759 #define EXPONENT_MASK32 0xff
|
|
760 #define LARGEST_BID32 0x77f8967f
|
|
761 #define NAN_MASK32 0x7c000000
|
|
762 #define SNAN_MASK32 0x7e000000
|
|
763 #define MASK_BINARY_EXPONENT 0x7ff0000000000000ull
|
|
764 #define BINARY_EXPONENT_BIAS 0x3ff
|
|
765 #define UPPER_EXPON_LIMIT 51
|
|
766 // data needed for BID pack/unpack macros
|
|
767 extern UINT64 round_const_table[][19];
|
|
768 extern UINT128 reciprocals10_128[];
|
|
769 extern int recip_scale[];
|
|
770 extern UINT128 power10_table_128[];
|
|
771 extern int estimate_decimal_digits[];
|
|
772 extern int estimate_bin_expon[];
|
|
773 extern UINT64 power10_index_binexp[];
|
|
774 extern int short_recip_scale[];
|
|
775 extern UINT64 reciprocals10_64[];
|
|
776 extern UINT128 power10_index_binexp_128[];
|
|
777 extern UINT128 round_const_table_128[][36];
|
|
778
|
|
779
|
|
780 //////////////////////////////////////////////
|
|
781 // Status Flag Handling
|
|
782 /////////////////////////////////////////////
|
|
783 #define __set_status_flags(fpsc, status) *(fpsc) |= status
|
|
784 #define is_inexact(fpsc) ((*(fpsc))&INEXACT_EXCEPTION)
|
|
785
|
|
786 __BID_INLINE__ UINT64
|
|
787 unpack_BID64 (UINT64 * psign_x, int *pexponent_x,
|
|
788 UINT64 * pcoefficient_x, UINT64 x) {
|
|
789 UINT64 tmp, coeff;
|
|
790
|
|
791 *psign_x = x & 0x8000000000000000ull;
|
|
792
|
|
793 if ((x & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64) {
|
|
794 // special encodings
|
|
795 // coefficient
|
|
796 coeff = (x & LARGE_COEFF_MASK64) | LARGE_COEFF_HIGH_BIT64;
|
|
797
|
|
798 if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
|
|
799 *pexponent_x = 0;
|
|
800 *pcoefficient_x = x & 0xfe03ffffffffffffull;
|
|
801 if ((x & 0x0003ffffffffffffull) >= 1000000000000000ull)
|
|
802 *pcoefficient_x = x & 0xfe00000000000000ull;
|
|
803 if ((x & NAN_MASK64) == INFINITY_MASK64)
|
|
804 *pcoefficient_x = x & SINFINITY_MASK64;
|
|
805 return 0; // NaN or Infinity
|
|
806 }
|
|
807 // check for non-canonical values
|
|
808 if (coeff >= 10000000000000000ull)
|
|
809 coeff = 0;
|
|
810 *pcoefficient_x = coeff;
|
|
811 // get exponent
|
|
812 tmp = x >> EXPONENT_SHIFT_LARGE64;
|
|
813 *pexponent_x = (int) (tmp & EXPONENT_MASK64);
|
|
814 return coeff;
|
|
815 }
|
|
816 // exponent
|
|
817 tmp = x >> EXPONENT_SHIFT_SMALL64;
|
|
818 *pexponent_x = (int) (tmp & EXPONENT_MASK64);
|
|
819 // coefficient
|
|
820 *pcoefficient_x = (x & SMALL_COEFF_MASK64);
|
|
821
|
|
822 return *pcoefficient_x;
|
|
823 }
|
|
824
|
|
825 //
|
|
826 // BID64 pack macro (general form)
|
|
827 //
|
|
828 __BID_INLINE__ UINT64
|
|
829 get_BID64 (UINT64 sgn, int expon, UINT64 coeff, int rmode,
|
|
830 unsigned *fpsc) {
|
|
831 UINT128 Stemp, Q_low;
|
|
832 UINT64 QH, r, mask, C64, remainder_h, CY, carry;
|
|
833 int extra_digits, amount, amount2;
|
|
834 unsigned status;
|
|
835
|
|
836 if (coeff > 9999999999999999ull) {
|
|
837 expon++;
|
|
838 coeff = 1000000000000000ull;
|
|
839 }
|
|
840 // check for possible underflow/overflow
|
|
841 if (((unsigned) expon) >= 3 * 256) {
|
|
842 if (expon < 0) {
|
|
843 // underflow
|
|
844 if (expon + MAX_FORMAT_DIGITS < 0) {
|
|
845 #ifdef SET_STATUS_FLAGS
|
|
846 __set_status_flags (fpsc,
|
|
847 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
|
|
848 #endif
|
|
849 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
850 #ifndef IEEE_ROUND_NEAREST
|
|
851 if (rmode == ROUNDING_DOWN && sgn)
|
|
852 return 0x8000000000000001ull;
|
|
853 if (rmode == ROUNDING_UP && !sgn)
|
|
854 return 1ull;
|
|
855 #endif
|
|
856 #endif
|
|
857 // result is 0
|
|
858 return sgn;
|
|
859 }
|
|
860 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
861 #ifndef IEEE_ROUND_NEAREST
|
|
862 if (sgn && (unsigned) (rmode - 1) < 2)
|
|
863 rmode = 3 - rmode;
|
|
864 #endif
|
|
865 #endif
|
|
866 // get digits to be shifted out
|
|
867 extra_digits = -expon;
|
|
868 coeff += round_const_table[rmode][extra_digits];
|
|
869
|
|
870 // get coeff*(2^M[extra_digits])/10^extra_digits
|
|
871 __mul_64x128_full (QH, Q_low, coeff,
|
|
872 reciprocals10_128[extra_digits]);
|
|
873
|
|
874 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
|
|
875 amount = recip_scale[extra_digits];
|
|
876
|
|
877 C64 = QH >> amount;
|
|
878
|
|
879 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
880 #ifndef IEEE_ROUND_NEAREST
|
|
881 if (rmode == 0) //ROUNDING_TO_NEAREST
|
|
882 #endif
|
|
883 if (C64 & 1) {
|
|
884 // check whether fractional part of initial_P/10^extra_digits is exactly .5
|
|
885
|
|
886 // get remainder
|
|
887 amount2 = 64 - amount;
|
|
888 remainder_h = 0;
|
|
889 remainder_h--;
|
|
890 remainder_h >>= amount2;
|
|
891 remainder_h = remainder_h & QH;
|
|
892
|
|
893 if (!remainder_h
|
|
894 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
|
|
895 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
|
|
896 && Q_low.w[0] <
|
|
897 reciprocals10_128[extra_digits].w[0]))) {
|
|
898 C64--;
|
|
899 }
|
|
900 }
|
|
901 #endif
|
|
902
|
|
903 #ifdef SET_STATUS_FLAGS
|
|
904
|
|
905 if (is_inexact (fpsc))
|
|
906 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
|
|
907 else {
|
|
908 status = INEXACT_EXCEPTION;
|
|
909 // get remainder
|
|
910 remainder_h = QH << (64 - amount);
|
|
911
|
|
912 switch (rmode) {
|
|
913 case ROUNDING_TO_NEAREST:
|
|
914 case ROUNDING_TIES_AWAY:
|
|
915 // test whether fractional part is 0
|
|
916 if (remainder_h == 0x8000000000000000ull
|
|
917 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
|
|
918 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
|
|
919 && Q_low.w[0] <
|
|
920 reciprocals10_128[extra_digits].w[0])))
|
|
921 status = EXACT_STATUS;
|
|
922 break;
|
|
923 case ROUNDING_DOWN:
|
|
924 case ROUNDING_TO_ZERO:
|
|
925 if (!remainder_h
|
|
926 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
|
|
927 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
|
|
928 && Q_low.w[0] <
|
|
929 reciprocals10_128[extra_digits].w[0])))
|
|
930 status = EXACT_STATUS;
|
|
931 break;
|
|
932 default:
|
|
933 // round up
|
|
934 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
|
|
935 reciprocals10_128[extra_digits].w[0]);
|
|
936 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
|
|
937 reciprocals10_128[extra_digits].w[1], CY);
|
|
938 if ((remainder_h >> (64 - amount)) + carry >=
|
|
939 (((UINT64) 1) << amount))
|
|
940 status = EXACT_STATUS;
|
|
941 }
|
|
942
|
|
943 if (status != EXACT_STATUS)
|
|
944 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
|
|
945 }
|
|
946
|
|
947 #endif
|
|
948
|
|
949 return sgn | C64;
|
|
950 }
|
|
951 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
|
|
952 expon--;
|
|
953 coeff = (coeff << 3) + (coeff << 1);
|
|
954 }
|
|
955 if (expon > DECIMAL_MAX_EXPON_64) {
|
|
956 #ifdef SET_STATUS_FLAGS
|
|
957 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
|
|
958 #endif
|
|
959 // overflow
|
|
960 r = sgn | INFINITY_MASK64;
|
|
961 switch (rmode) {
|
|
962 case ROUNDING_DOWN:
|
|
963 if (!sgn)
|
|
964 r = LARGEST_BID64;
|
|
965 break;
|
|
966 case ROUNDING_TO_ZERO:
|
|
967 r = sgn | LARGEST_BID64;
|
|
968 break;
|
|
969 case ROUNDING_UP:
|
|
970 // round up
|
|
971 if (sgn)
|
|
972 r = SMALLEST_BID64;
|
|
973 }
|
|
974 return r;
|
|
975 }
|
|
976 }
|
|
977
|
|
978 mask = 1;
|
|
979 mask <<= EXPONENT_SHIFT_SMALL64;
|
|
980
|
|
981 // check whether coefficient fits in 10*5+3 bits
|
|
982 if (coeff < mask) {
|
|
983 r = expon;
|
|
984 r <<= EXPONENT_SHIFT_SMALL64;
|
|
985 r |= (coeff | sgn);
|
|
986 return r;
|
|
987 }
|
|
988 // special format
|
|
989
|
|
990 // eliminate the case coeff==10^16 after rounding
|
|
991 if (coeff == 10000000000000000ull) {
|
|
992 r = expon + 1;
|
|
993 r <<= EXPONENT_SHIFT_SMALL64;
|
|
994 r |= (1000000000000000ull | sgn);
|
|
995 return r;
|
|
996 }
|
|
997
|
|
998 r = expon;
|
|
999 r <<= EXPONENT_SHIFT_LARGE64;
|
|
1000 r |= (sgn | SPECIAL_ENCODING_MASK64);
|
|
1001 // add coeff, without leading bits
|
|
1002 mask = (mask >> 2) - 1;
|
|
1003 coeff &= mask;
|
|
1004 r |= coeff;
|
|
1005
|
|
1006 return r;
|
|
1007 }
|
|
1008
|
|
1009
|
|
1010
|
|
1011
|
|
1012 //
|
|
1013 // No overflow/underflow checking
|
|
1014 //
|
|
1015 __BID_INLINE__ UINT64
|
|
1016 fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
|
|
1017 UINT64 r, mask;
|
|
1018
|
|
1019 mask = 1;
|
|
1020 mask <<= EXPONENT_SHIFT_SMALL64;
|
|
1021
|
|
1022 // check whether coefficient fits in 10*5+3 bits
|
|
1023 if (coeff < mask) {
|
|
1024 r = expon;
|
|
1025 r <<= EXPONENT_SHIFT_SMALL64;
|
|
1026 r |= (coeff | sgn);
|
|
1027 return r;
|
|
1028 }
|
|
1029 // special format
|
|
1030
|
|
1031 // eliminate the case coeff==10^16 after rounding
|
|
1032 if (coeff == 10000000000000000ull) {
|
|
1033 r = expon + 1;
|
|
1034 r <<= EXPONENT_SHIFT_SMALL64;
|
|
1035 r |= (1000000000000000ull | sgn);
|
|
1036 return r;
|
|
1037 }
|
|
1038
|
|
1039 r = expon;
|
|
1040 r <<= EXPONENT_SHIFT_LARGE64;
|
|
1041 r |= (sgn | SPECIAL_ENCODING_MASK64);
|
|
1042 // add coeff, without leading bits
|
|
1043 mask = (mask >> 2) - 1;
|
|
1044 coeff &= mask;
|
|
1045 r |= coeff;
|
|
1046
|
|
1047 return r;
|
|
1048 }
|
|
1049
|
|
1050
|
|
1051 //
|
|
1052 // no underflow checking
|
|
1053 //
|
|
1054 __BID_INLINE__ UINT64
|
|
1055 fast_get_BID64_check_OF (UINT64 sgn, int expon, UINT64 coeff, int rmode,
|
|
1056 unsigned *fpsc) {
|
|
1057 UINT64 r, mask;
|
|
1058
|
|
1059 if (((unsigned) expon) >= 3 * 256 - 1) {
|
|
1060 if ((expon == 3 * 256 - 1) && coeff == 10000000000000000ull) {
|
|
1061 expon = 3 * 256;
|
|
1062 coeff = 1000000000000000ull;
|
|
1063 }
|
|
1064
|
|
1065 if (((unsigned) expon) >= 3 * 256) {
|
|
1066 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
|
|
1067 expon--;
|
|
1068 coeff = (coeff << 3) + (coeff << 1);
|
|
1069 }
|
|
1070 if (expon > DECIMAL_MAX_EXPON_64) {
|
|
1071 #ifdef SET_STATUS_FLAGS
|
|
1072 __set_status_flags (fpsc,
|
|
1073 OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
|
|
1074 #endif
|
|
1075 // overflow
|
|
1076 r = sgn | INFINITY_MASK64;
|
|
1077 switch (rmode) {
|
|
1078 case ROUNDING_DOWN:
|
|
1079 if (!sgn)
|
|
1080 r = LARGEST_BID64;
|
|
1081 break;
|
|
1082 case ROUNDING_TO_ZERO:
|
|
1083 r = sgn | LARGEST_BID64;
|
|
1084 break;
|
|
1085 case ROUNDING_UP:
|
|
1086 // round up
|
|
1087 if (sgn)
|
|
1088 r = SMALLEST_BID64;
|
|
1089 }
|
|
1090 return r;
|
|
1091 }
|
|
1092 }
|
|
1093 }
|
|
1094
|
|
1095 mask = 1;
|
|
1096 mask <<= EXPONENT_SHIFT_SMALL64;
|
|
1097
|
|
1098 // check whether coefficient fits in 10*5+3 bits
|
|
1099 if (coeff < mask) {
|
|
1100 r = expon;
|
|
1101 r <<= EXPONENT_SHIFT_SMALL64;
|
|
1102 r |= (coeff | sgn);
|
|
1103 return r;
|
|
1104 }
|
|
1105 // special format
|
|
1106
|
|
1107 // eliminate the case coeff==10^16 after rounding
|
|
1108 if (coeff == 10000000000000000ull) {
|
|
1109 r = expon + 1;
|
|
1110 r <<= EXPONENT_SHIFT_SMALL64;
|
|
1111 r |= (1000000000000000ull | sgn);
|
|
1112 return r;
|
|
1113 }
|
|
1114
|
|
1115 r = expon;
|
|
1116 r <<= EXPONENT_SHIFT_LARGE64;
|
|
1117 r |= (sgn | SPECIAL_ENCODING_MASK64);
|
|
1118 // add coeff, without leading bits
|
|
1119 mask = (mask >> 2) - 1;
|
|
1120 coeff &= mask;
|
|
1121 r |= coeff;
|
|
1122
|
|
1123 return r;
|
|
1124 }
|
|
1125
|
|
1126
|
|
1127 //
|
|
1128 // No overflow/underflow checking
|
|
1129 // or checking for coefficients equal to 10^16 (after rounding)
|
|
1130 //
|
|
1131 __BID_INLINE__ UINT64
|
|
1132 very_fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
|
|
1133 UINT64 r, mask;
|
|
1134
|
|
1135 mask = 1;
|
|
1136 mask <<= EXPONENT_SHIFT_SMALL64;
|
|
1137
|
|
1138 // check whether coefficient fits in 10*5+3 bits
|
|
1139 if (coeff < mask) {
|
|
1140 r = expon;
|
|
1141 r <<= EXPONENT_SHIFT_SMALL64;
|
|
1142 r |= (coeff | sgn);
|
|
1143 return r;
|
|
1144 }
|
|
1145 // special format
|
|
1146 r = expon;
|
|
1147 r <<= EXPONENT_SHIFT_LARGE64;
|
|
1148 r |= (sgn | SPECIAL_ENCODING_MASK64);
|
|
1149 // add coeff, without leading bits
|
|
1150 mask = (mask >> 2) - 1;
|
|
1151 coeff &= mask;
|
|
1152 r |= coeff;
|
|
1153
|
|
1154 return r;
|
|
1155 }
|
|
1156
|
|
1157 //
|
|
1158 // No overflow/underflow checking or checking for coefficients above 2^53
|
|
1159 //
|
|
1160 __BID_INLINE__ UINT64
|
|
1161 very_fast_get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff) {
|
|
1162 // no UF/OF
|
|
1163 UINT64 r;
|
|
1164
|
|
1165 r = expon;
|
|
1166 r <<= EXPONENT_SHIFT_SMALL64;
|
|
1167 r |= (coeff | sgn);
|
|
1168 return r;
|
|
1169 }
|
|
1170
|
|
1171
|
|
1172 //
|
|
1173 // This pack macro is used when underflow is known to occur
|
|
1174 //
|
|
1175 __BID_INLINE__ UINT64
|
|
1176 get_BID64_UF (UINT64 sgn, int expon, UINT64 coeff, UINT64 R, int rmode,
|
|
1177 unsigned *fpsc) {
|
|
1178 UINT128 C128, Q_low, Stemp;
|
|
1179 UINT64 C64, remainder_h, QH, carry, CY;
|
|
1180 int extra_digits, amount, amount2;
|
|
1181 unsigned status;
|
|
1182
|
|
1183 // underflow
|
|
1184 if (expon + MAX_FORMAT_DIGITS < 0) {
|
|
1185 #ifdef SET_STATUS_FLAGS
|
|
1186 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
|
|
1187 #endif
|
|
1188 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
1189 #ifndef IEEE_ROUND_NEAREST
|
|
1190 if (rmode == ROUNDING_DOWN && sgn)
|
|
1191 return 0x8000000000000001ull;
|
|
1192 if (rmode == ROUNDING_UP && !sgn)
|
|
1193 return 1ull;
|
|
1194 #endif
|
|
1195 #endif
|
|
1196 // result is 0
|
|
1197 return sgn;
|
|
1198 }
|
|
1199 // 10*coeff
|
|
1200 coeff = (coeff << 3) + (coeff << 1);
|
|
1201 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
1202 #ifndef IEEE_ROUND_NEAREST
|
|
1203 if (sgn && (unsigned) (rmode - 1) < 2)
|
|
1204 rmode = 3 - rmode;
|
|
1205 #endif
|
|
1206 #endif
|
|
1207 if (R)
|
|
1208 coeff |= 1;
|
|
1209 // get digits to be shifted out
|
|
1210 extra_digits = 1 - expon;
|
|
1211 C128.w[0] = coeff + round_const_table[rmode][extra_digits];
|
|
1212
|
|
1213 // get coeff*(2^M[extra_digits])/10^extra_digits
|
|
1214 __mul_64x128_full (QH, Q_low, C128.w[0],
|
|
1215 reciprocals10_128[extra_digits]);
|
|
1216
|
|
1217 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
|
|
1218 amount = recip_scale[extra_digits];
|
|
1219
|
|
1220 C64 = QH >> amount;
|
|
1221 //__shr_128(C128, Q_high, amount);
|
|
1222
|
|
1223 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
1224 #ifndef IEEE_ROUND_NEAREST
|
|
1225 if (rmode == 0) //ROUNDING_TO_NEAREST
|
|
1226 #endif
|
|
1227 if (C64 & 1) {
|
|
1228 // check whether fractional part of initial_P/10^extra_digits is exactly .5
|
|
1229
|
|
1230 // get remainder
|
|
1231 amount2 = 64 - amount;
|
|
1232 remainder_h = 0;
|
|
1233 remainder_h--;
|
|
1234 remainder_h >>= amount2;
|
|
1235 remainder_h = remainder_h & QH;
|
|
1236
|
|
1237 if (!remainder_h
|
|
1238 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
|
|
1239 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
|
|
1240 && Q_low.w[0] <
|
|
1241 reciprocals10_128[extra_digits].w[0]))) {
|
|
1242 C64--;
|
|
1243 }
|
|
1244 }
|
|
1245 #endif
|
|
1246
|
|
1247 #ifdef SET_STATUS_FLAGS
|
|
1248
|
|
1249 if (is_inexact (fpsc))
|
|
1250 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
|
|
1251 else {
|
|
1252 status = INEXACT_EXCEPTION;
|
|
1253 // get remainder
|
|
1254 remainder_h = QH << (64 - amount);
|
|
1255
|
|
1256 switch (rmode) {
|
|
1257 case ROUNDING_TO_NEAREST:
|
|
1258 case ROUNDING_TIES_AWAY:
|
|
1259 // test whether fractional part is 0
|
|
1260 if (remainder_h == 0x8000000000000000ull
|
|
1261 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
|
|
1262 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
|
|
1263 && Q_low.w[0] <
|
|
1264 reciprocals10_128[extra_digits].w[0])))
|
|
1265 status = EXACT_STATUS;
|
|
1266 break;
|
|
1267 case ROUNDING_DOWN:
|
|
1268 case ROUNDING_TO_ZERO:
|
|
1269 if (!remainder_h
|
|
1270 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
|
|
1271 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
|
|
1272 && Q_low.w[0] <
|
|
1273 reciprocals10_128[extra_digits].w[0])))
|
|
1274 status = EXACT_STATUS;
|
|
1275 break;
|
|
1276 default:
|
|
1277 // round up
|
|
1278 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
|
|
1279 reciprocals10_128[extra_digits].w[0]);
|
|
1280 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
|
|
1281 reciprocals10_128[extra_digits].w[1], CY);
|
|
1282 if ((remainder_h >> (64 - amount)) + carry >=
|
|
1283 (((UINT64) 1) << amount))
|
|
1284 status = EXACT_STATUS;
|
|
1285 }
|
|
1286
|
|
1287 if (status != EXACT_STATUS)
|
|
1288 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
|
|
1289 }
|
|
1290
|
|
1291 #endif
|
|
1292
|
|
1293 return sgn | C64;
|
|
1294
|
|
1295 }
|
|
1296
|
|
1297
|
|
1298
|
|
1299 //
|
|
1300 // This pack macro doesnot check for coefficients above 2^53
|
|
1301 //
|
|
1302 __BID_INLINE__ UINT64
|
|
1303 get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff,
|
|
1304 int rmode, unsigned *fpsc) {
|
|
1305 UINT128 C128, Q_low, Stemp;
|
|
1306 UINT64 r, mask, C64, remainder_h, QH, carry, CY;
|
|
1307 int extra_digits, amount, amount2;
|
|
1308 unsigned status;
|
|
1309
|
|
1310 // check for possible underflow/overflow
|
|
1311 if (((unsigned) expon) >= 3 * 256) {
|
|
1312 if (expon < 0) {
|
|
1313 // underflow
|
|
1314 if (expon + MAX_FORMAT_DIGITS < 0) {
|
|
1315 #ifdef SET_STATUS_FLAGS
|
|
1316 __set_status_flags (fpsc,
|
|
1317 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
|
|
1318 #endif
|
|
1319 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
1320 #ifndef IEEE_ROUND_NEAREST
|
|
1321 if (rmode == ROUNDING_DOWN && sgn)
|
|
1322 return 0x8000000000000001ull;
|
|
1323 if (rmode == ROUNDING_UP && !sgn)
|
|
1324 return 1ull;
|
|
1325 #endif
|
|
1326 #endif
|
|
1327 // result is 0
|
|
1328 return sgn;
|
|
1329 }
|
|
1330 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
1331 #ifndef IEEE_ROUND_NEAREST
|
|
1332 if (sgn && (unsigned) (rmode - 1) < 2)
|
|
1333 rmode = 3 - rmode;
|
|
1334 #endif
|
|
1335 #endif
|
|
1336 // get digits to be shifted out
|
|
1337 extra_digits = -expon;
|
|
1338 C128.w[0] = coeff + round_const_table[rmode][extra_digits];
|
|
1339
|
|
1340 // get coeff*(2^M[extra_digits])/10^extra_digits
|
|
1341 __mul_64x128_full (QH, Q_low, C128.w[0],
|
|
1342 reciprocals10_128[extra_digits]);
|
|
1343
|
|
1344 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
|
|
1345 amount = recip_scale[extra_digits];
|
|
1346
|
|
1347 C64 = QH >> amount;
|
|
1348
|
|
1349 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
1350 #ifndef IEEE_ROUND_NEAREST
|
|
1351 if (rmode == 0) //ROUNDING_TO_NEAREST
|
|
1352 #endif
|
|
1353 if (C64 & 1) {
|
|
1354 // check whether fractional part of initial_P/10^extra_digits is exactly .5
|
|
1355
|
|
1356 // get remainder
|
|
1357 amount2 = 64 - amount;
|
|
1358 remainder_h = 0;
|
|
1359 remainder_h--;
|
|
1360 remainder_h >>= amount2;
|
|
1361 remainder_h = remainder_h & QH;
|
|
1362
|
|
1363 if (!remainder_h
|
|
1364 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
|
|
1365 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
|
|
1366 && Q_low.w[0] <
|
|
1367 reciprocals10_128[extra_digits].w[0]))) {
|
|
1368 C64--;
|
|
1369 }
|
|
1370 }
|
|
1371 #endif
|
|
1372
|
|
1373 #ifdef SET_STATUS_FLAGS
|
|
1374
|
|
1375 if (is_inexact (fpsc))
|
|
1376 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
|
|
1377 else {
|
|
1378 status = INEXACT_EXCEPTION;
|
|
1379 // get remainder
|
|
1380 remainder_h = QH << (64 - amount);
|
|
1381
|
|
1382 switch (rmode) {
|
|
1383 case ROUNDING_TO_NEAREST:
|
|
1384 case ROUNDING_TIES_AWAY:
|
|
1385 // test whether fractional part is 0
|
|
1386 if (remainder_h == 0x8000000000000000ull
|
|
1387 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
|
|
1388 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
|
|
1389 && Q_low.w[0] <
|
|
1390 reciprocals10_128[extra_digits].w[0])))
|
|
1391 status = EXACT_STATUS;
|
|
1392 break;
|
|
1393 case ROUNDING_DOWN:
|
|
1394 case ROUNDING_TO_ZERO:
|
|
1395 if (!remainder_h
|
|
1396 && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
|
|
1397 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
|
|
1398 && Q_low.w[0] <
|
|
1399 reciprocals10_128[extra_digits].w[0])))
|
|
1400 status = EXACT_STATUS;
|
|
1401 break;
|
|
1402 default:
|
|
1403 // round up
|
|
1404 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
|
|
1405 reciprocals10_128[extra_digits].w[0]);
|
|
1406 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
|
|
1407 reciprocals10_128[extra_digits].w[1], CY);
|
|
1408 if ((remainder_h >> (64 - amount)) + carry >=
|
|
1409 (((UINT64) 1) << amount))
|
|
1410 status = EXACT_STATUS;
|
|
1411 }
|
|
1412
|
|
1413 if (status != EXACT_STATUS)
|
|
1414 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
|
|
1415 }
|
|
1416
|
|
1417 #endif
|
|
1418
|
|
1419 return sgn | C64;
|
|
1420 }
|
|
1421
|
|
1422 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
|
|
1423 expon--;
|
|
1424 coeff = (coeff << 3) + (coeff << 1);
|
|
1425 }
|
|
1426 if (expon > DECIMAL_MAX_EXPON_64) {
|
|
1427 #ifdef SET_STATUS_FLAGS
|
|
1428 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
|
|
1429 #endif
|
|
1430 // overflow
|
|
1431 r = sgn | INFINITY_MASK64;
|
|
1432 switch (rmode) {
|
|
1433 case ROUNDING_DOWN:
|
|
1434 if (!sgn)
|
|
1435 r = LARGEST_BID64;
|
|
1436 break;
|
|
1437 case ROUNDING_TO_ZERO:
|
|
1438 r = sgn | LARGEST_BID64;
|
|
1439 break;
|
|
1440 case ROUNDING_UP:
|
|
1441 // round up
|
|
1442 if (sgn)
|
|
1443 r = SMALLEST_BID64;
|
|
1444 }
|
|
1445 return r;
|
|
1446 } else {
|
|
1447 mask = 1;
|
|
1448 mask <<= EXPONENT_SHIFT_SMALL64;
|
|
1449 if (coeff >= mask) {
|
|
1450 r = expon;
|
|
1451 r <<= EXPONENT_SHIFT_LARGE64;
|
|
1452 r |= (sgn | SPECIAL_ENCODING_MASK64);
|
|
1453 // add coeff, without leading bits
|
|
1454 mask = (mask >> 2) - 1;
|
|
1455 coeff &= mask;
|
|
1456 r |= coeff;
|
|
1457 return r;
|
|
1458 }
|
|
1459 }
|
|
1460 }
|
|
1461
|
|
1462 r = expon;
|
|
1463 r <<= EXPONENT_SHIFT_SMALL64;
|
|
1464 r |= (coeff | sgn);
|
|
1465
|
|
1466 return r;
|
|
1467 }
|
|
1468
|
|
1469
|
|
1470 /*****************************************************************************
|
|
1471 *
|
|
1472 * BID128 pack/unpack macros
|
|
1473 *
|
|
1474 *****************************************************************************/
|
|
1475
|
|
1476 //
|
|
1477 // Macro for handling BID128 underflow
|
|
1478 // sticky bit given as additional argument
|
|
1479 //
|
|
1480 __BID_INLINE__ UINT128 *
|
|
1481 handle_UF_128_rem (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
|
|
1482 UINT64 R, unsigned *prounding_mode, unsigned *fpsc) {
|
|
1483 UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1, CQ2, CQ8;
|
|
1484 UINT64 carry, CY;
|
|
1485 int ed2, amount;
|
|
1486 unsigned rmode, status;
|
|
1487
|
|
1488 // UF occurs
|
|
1489 if (expon + MAX_FORMAT_DIGITS_128 < 0) {
|
|
1490 #ifdef SET_STATUS_FLAGS
|
|
1491 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
|
|
1492 #endif
|
|
1493 pres->w[1] = sgn;
|
|
1494 pres->w[0] = 0;
|
|
1495 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
1496 #ifndef IEEE_ROUND_NEAREST
|
|
1497 if ((sgn && *prounding_mode == ROUNDING_DOWN)
|
|
1498 || (!sgn && *prounding_mode == ROUNDING_UP))
|
|
1499 pres->w[0] = 1ull;
|
|
1500 #endif
|
|
1501 #endif
|
|
1502 return pres;
|
|
1503 }
|
|
1504 // CQ *= 10
|
|
1505 CQ2.w[1] = (CQ.w[1] << 1) | (CQ.w[0] >> 63);
|
|
1506 CQ2.w[0] = CQ.w[0] << 1;
|
|
1507 CQ8.w[1] = (CQ.w[1] << 3) | (CQ.w[0] >> 61);
|
|
1508 CQ8.w[0] = CQ.w[0] << 3;
|
|
1509 __add_128_128 (CQ, CQ2, CQ8);
|
|
1510
|
|
1511 // add remainder
|
|
1512 if (R)
|
|
1513 CQ.w[0] |= 1;
|
|
1514
|
|
1515 ed2 = 1 - expon;
|
|
1516 // add rounding constant to CQ
|
|
1517 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
1518 #ifndef IEEE_ROUND_NEAREST
|
|
1519 rmode = *prounding_mode;
|
|
1520 if (sgn && (unsigned) (rmode - 1) < 2)
|
|
1521 rmode = 3 - rmode;
|
|
1522 #else
|
|
1523 rmode = 0;
|
|
1524 #endif
|
|
1525 #else
|
|
1526 rmode = 0;
|
|
1527 #endif
|
|
1528 T128 = round_const_table_128[rmode][ed2];
|
|
1529 __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
|
|
1530 CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
|
|
1531
|
|
1532 TP128 = reciprocals10_128[ed2];
|
|
1533 __mul_128x128_full (Qh, Ql, CQ, TP128);
|
|
1534 amount = recip_scale[ed2];
|
|
1535
|
|
1536 if (amount >= 64) {
|
|
1537 CQ.w[0] = Qh.w[1] >> (amount - 64);
|
|
1538 CQ.w[1] = 0;
|
|
1539 } else {
|
|
1540 __shr_128 (CQ, Qh, amount);
|
|
1541 }
|
|
1542
|
|
1543 expon = 0;
|
|
1544
|
|
1545 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
1546 #ifndef IEEE_ROUND_NEAREST
|
|
1547 if (!(*prounding_mode))
|
|
1548 #endif
|
|
1549 if (CQ.w[0] & 1) {
|
|
1550 // check whether fractional part of initial_P/10^ed1 is exactly .5
|
|
1551
|
|
1552 // get remainder
|
|
1553 __shl_128_long (Qh1, Qh, (128 - amount));
|
|
1554
|
|
1555 if (!Qh1.w[1] && !Qh1.w[0]
|
|
1556 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
|
|
1557 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
|
|
1558 && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
|
|
1559 CQ.w[0]--;
|
|
1560 }
|
|
1561 }
|
|
1562 #endif
|
|
1563
|
|
1564 #ifdef SET_STATUS_FLAGS
|
|
1565
|
|
1566 if (is_inexact (fpsc))
|
|
1567 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
|
|
1568 else {
|
|
1569 status = INEXACT_EXCEPTION;
|
|
1570 // get remainder
|
|
1571 __shl_128_long (Qh1, Qh, (128 - amount));
|
|
1572
|
|
1573 switch (rmode) {
|
|
1574 case ROUNDING_TO_NEAREST:
|
|
1575 case ROUNDING_TIES_AWAY:
|
|
1576 // test whether fractional part is 0
|
|
1577 if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
|
|
1578 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
|
|
1579 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
|
|
1580 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
|
|
1581 status = EXACT_STATUS;
|
|
1582 break;
|
|
1583 case ROUNDING_DOWN:
|
|
1584 case ROUNDING_TO_ZERO:
|
|
1585 if ((!Qh1.w[1]) && (!Qh1.w[0])
|
|
1586 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
|
|
1587 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
|
|
1588 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
|
|
1589 status = EXACT_STATUS;
|
|
1590 break;
|
|
1591 default:
|
|
1592 // round up
|
|
1593 __add_carry_out (Stemp.w[0], CY, Ql.w[0],
|
|
1594 reciprocals10_128[ed2].w[0]);
|
|
1595 __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
|
|
1596 reciprocals10_128[ed2].w[1], CY);
|
|
1597 __shr_128_long (Qh, Qh1, (128 - amount));
|
|
1598 Tmp.w[0] = 1;
|
|
1599 Tmp.w[1] = 0;
|
|
1600 __shl_128_long (Tmp1, Tmp, amount);
|
|
1601 Qh.w[0] += carry;
|
|
1602 if (Qh.w[0] < carry)
|
|
1603 Qh.w[1]++;
|
|
1604 if (__unsigned_compare_ge_128 (Qh, Tmp1))
|
|
1605 status = EXACT_STATUS;
|
|
1606 }
|
|
1607
|
|
1608 if (status != EXACT_STATUS)
|
|
1609 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
|
|
1610 }
|
|
1611
|
|
1612 #endif
|
|
1613
|
|
1614 pres->w[1] = sgn | CQ.w[1];
|
|
1615 pres->w[0] = CQ.w[0];
|
|
1616
|
|
1617 return pres;
|
|
1618
|
|
1619 }
|
|
1620
|
|
1621
|
|
1622 //
|
|
1623 // Macro for handling BID128 underflow
|
|
1624 //
|
|
1625 __BID_INLINE__ UINT128 *
|
|
1626 handle_UF_128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
|
|
1627 unsigned *prounding_mode, unsigned *fpsc) {
|
|
1628 UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1;
|
|
1629 UINT64 carry, CY;
|
|
1630 int ed2, amount;
|
|
1631 unsigned rmode, status;
|
|
1632
|
|
1633 // UF occurs
|
|
1634 if (expon + MAX_FORMAT_DIGITS_128 < 0) {
|
|
1635 #ifdef SET_STATUS_FLAGS
|
|
1636 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
|
|
1637 #endif
|
|
1638 pres->w[1] = sgn;
|
|
1639 pres->w[0] = 0;
|
|
1640 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
1641 #ifndef IEEE_ROUND_NEAREST
|
|
1642 if ((sgn && *prounding_mode == ROUNDING_DOWN)
|
|
1643 || (!sgn && *prounding_mode == ROUNDING_UP))
|
|
1644 pres->w[0] = 1ull;
|
|
1645 #endif
|
|
1646 #endif
|
|
1647 return pres;
|
|
1648 }
|
|
1649
|
|
1650 ed2 = 0 - expon;
|
|
1651 // add rounding constant to CQ
|
|
1652 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
1653 #ifndef IEEE_ROUND_NEAREST
|
|
1654 rmode = *prounding_mode;
|
|
1655 if (sgn && (unsigned) (rmode - 1) < 2)
|
|
1656 rmode = 3 - rmode;
|
|
1657 #else
|
|
1658 rmode = 0;
|
|
1659 #endif
|
|
1660 #else
|
|
1661 rmode = 0;
|
|
1662 #endif
|
|
1663
|
|
1664 T128 = round_const_table_128[rmode][ed2];
|
|
1665 __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
|
|
1666 CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
|
|
1667
|
|
1668 TP128 = reciprocals10_128[ed2];
|
|
1669 __mul_128x128_full (Qh, Ql, CQ, TP128);
|
|
1670 amount = recip_scale[ed2];
|
|
1671
|
|
1672 if (amount >= 64) {
|
|
1673 CQ.w[0] = Qh.w[1] >> (amount - 64);
|
|
1674 CQ.w[1] = 0;
|
|
1675 } else {
|
|
1676 __shr_128 (CQ, Qh, amount);
|
|
1677 }
|
|
1678
|
|
1679 expon = 0;
|
|
1680
|
|
1681 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
1682 #ifndef IEEE_ROUND_NEAREST
|
|
1683 if (!(*prounding_mode))
|
|
1684 #endif
|
|
1685 if (CQ.w[0] & 1) {
|
|
1686 // check whether fractional part of initial_P/10^ed1 is exactly .5
|
|
1687
|
|
1688 // get remainder
|
|
1689 __shl_128_long (Qh1, Qh, (128 - amount));
|
|
1690
|
|
1691 if (!Qh1.w[1] && !Qh1.w[0]
|
|
1692 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
|
|
1693 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
|
|
1694 && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
|
|
1695 CQ.w[0]--;
|
|
1696 }
|
|
1697 }
|
|
1698 #endif
|
|
1699
|
|
1700 #ifdef SET_STATUS_FLAGS
|
|
1701
|
|
1702 if (is_inexact (fpsc))
|
|
1703 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
|
|
1704 else {
|
|
1705 status = INEXACT_EXCEPTION;
|
|
1706 // get remainder
|
|
1707 __shl_128_long (Qh1, Qh, (128 - amount));
|
|
1708
|
|
1709 switch (rmode) {
|
|
1710 case ROUNDING_TO_NEAREST:
|
|
1711 case ROUNDING_TIES_AWAY:
|
|
1712 // test whether fractional part is 0
|
|
1713 if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
|
|
1714 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
|
|
1715 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
|
|
1716 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
|
|
1717 status = EXACT_STATUS;
|
|
1718 break;
|
|
1719 case ROUNDING_DOWN:
|
|
1720 case ROUNDING_TO_ZERO:
|
|
1721 if ((!Qh1.w[1]) && (!Qh1.w[0])
|
|
1722 && (Ql.w[1] < reciprocals10_128[ed2].w[1]
|
|
1723 || (Ql.w[1] == reciprocals10_128[ed2].w[1]
|
|
1724 && Ql.w[0] < reciprocals10_128[ed2].w[0])))
|
|
1725 status = EXACT_STATUS;
|
|
1726 break;
|
|
1727 default:
|
|
1728 // round up
|
|
1729 __add_carry_out (Stemp.w[0], CY, Ql.w[0],
|
|
1730 reciprocals10_128[ed2].w[0]);
|
|
1731 __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
|
|
1732 reciprocals10_128[ed2].w[1], CY);
|
|
1733 __shr_128_long (Qh, Qh1, (128 - amount));
|
|
1734 Tmp.w[0] = 1;
|
|
1735 Tmp.w[1] = 0;
|
|
1736 __shl_128_long (Tmp1, Tmp, amount);
|
|
1737 Qh.w[0] += carry;
|
|
1738 if (Qh.w[0] < carry)
|
|
1739 Qh.w[1]++;
|
|
1740 if (__unsigned_compare_ge_128 (Qh, Tmp1))
|
|
1741 status = EXACT_STATUS;
|
|
1742 }
|
|
1743
|
|
1744 if (status != EXACT_STATUS)
|
|
1745 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
|
|
1746 }
|
|
1747
|
|
1748 #endif
|
|
1749
|
|
1750 pres->w[1] = sgn | CQ.w[1];
|
|
1751 pres->w[0] = CQ.w[0];
|
|
1752
|
|
1753 return pres;
|
|
1754
|
|
1755 }
|
|
1756
|
|
1757
|
|
1758
|
|
1759 //
|
|
1760 // BID128 unpack, input passed by value
|
|
1761 //
|
|
1762 __BID_INLINE__ UINT64
|
|
1763 unpack_BID128_value (UINT64 * psign_x, int *pexponent_x,
|
|
1764 UINT128 * pcoefficient_x, UINT128 x) {
|
|
1765 UINT128 coeff, T33, T34;
|
|
1766 UINT64 ex;
|
|
1767
|
|
1768 *psign_x = (x.w[1]) & 0x8000000000000000ull;
|
|
1769
|
|
1770 // special encodings
|
|
1771 if ((x.w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
|
|
1772 if ((x.w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
|
|
1773 // non-canonical input
|
|
1774 pcoefficient_x->w[0] = 0;
|
|
1775 pcoefficient_x->w[1] = 0;
|
|
1776 ex = (x.w[1]) >> 47;
|
|
1777 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
|
|
1778 return 0;
|
|
1779 }
|
|
1780 // 10^33
|
|
1781 T33 = power10_table_128[33];
|
|
1782 /*coeff.w[0] = x.w[0];
|
|
1783 coeff.w[1] = (x.w[1]) & LARGE_COEFF_MASK128;
|
|
1784 pcoefficient_x->w[0] = x.w[0];
|
|
1785 pcoefficient_x->w[1] = x.w[1];
|
|
1786 if (__unsigned_compare_ge_128 (coeff, T33)) // non-canonical
|
|
1787 pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128); */
|
|
1788
|
|
1789 pcoefficient_x->w[0] = x.w[0];
|
|
1790 pcoefficient_x->w[1] = (x.w[1]) & 0x00003fffffffffffull;
|
|
1791 if (__unsigned_compare_ge_128 ((*pcoefficient_x), T33)) // non-canonical
|
|
1792 {
|
|
1793 pcoefficient_x->w[1] = (x.w[1]) & 0xfe00000000000000ull;
|
|
1794 pcoefficient_x->w[0] = 0;
|
|
1795 } else
|
|
1796 pcoefficient_x->w[1] = (x.w[1]) & 0xfe003fffffffffffull;
|
|
1797 if ((x.w[1] & NAN_MASK64) == INFINITY_MASK64) {
|
|
1798 pcoefficient_x->w[0] = 0;
|
|
1799 pcoefficient_x->w[1] = x.w[1] & SINFINITY_MASK64;
|
|
1800 }
|
|
1801 *pexponent_x = 0;
|
|
1802 return 0; // NaN or Infinity
|
|
1803 }
|
|
1804
|
|
1805 coeff.w[0] = x.w[0];
|
|
1806 coeff.w[1] = (x.w[1]) & SMALL_COEFF_MASK128;
|
|
1807
|
|
1808 // 10^34
|
|
1809 T34 = power10_table_128[34];
|
|
1810 // check for non-canonical values
|
|
1811 if (__unsigned_compare_ge_128 (coeff, T34))
|
|
1812 coeff.w[0] = coeff.w[1] = 0;
|
|
1813
|
|
1814 pcoefficient_x->w[0] = coeff.w[0];
|
|
1815 pcoefficient_x->w[1] = coeff.w[1];
|
|
1816
|
|
1817 ex = (x.w[1]) >> 49;
|
|
1818 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
|
|
1819
|
|
1820 return coeff.w[0] | coeff.w[1];
|
|
1821 }
|
|
1822
|
|
1823
|
|
1824 //
|
|
1825 // BID128 unpack, input pased by reference
|
|
1826 //
|
|
1827 __BID_INLINE__ UINT64
|
|
1828 unpack_BID128 (UINT64 * psign_x, int *pexponent_x,
|
|
1829 UINT128 * pcoefficient_x, UINT128 * px) {
|
|
1830 UINT128 coeff, T33, T34;
|
|
1831 UINT64 ex;
|
|
1832
|
|
1833 *psign_x = (px->w[1]) & 0x8000000000000000ull;
|
|
1834
|
|
1835 // special encodings
|
|
1836 if ((px->w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
|
|
1837 if ((px->w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
|
|
1838 // non-canonical input
|
|
1839 pcoefficient_x->w[0] = 0;
|
|
1840 pcoefficient_x->w[1] = 0;
|
|
1841 ex = (px->w[1]) >> 47;
|
|
1842 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
|
|
1843 return 0;
|
|
1844 }
|
|
1845 // 10^33
|
|
1846 T33 = power10_table_128[33];
|
|
1847 coeff.w[0] = px->w[0];
|
|
1848 coeff.w[1] = (px->w[1]) & LARGE_COEFF_MASK128;
|
|
1849 pcoefficient_x->w[0] = px->w[0];
|
|
1850 pcoefficient_x->w[1] = px->w[1];
|
|
1851 if (__unsigned_compare_ge_128 (coeff, T33)) { // non-canonical
|
|
1852 pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128);
|
|
1853 pcoefficient_x->w[0] = 0;
|
|
1854 }
|
|
1855 *pexponent_x = 0;
|
|
1856 return 0; // NaN or Infinity
|
|
1857 }
|
|
1858
|
|
1859 coeff.w[0] = px->w[0];
|
|
1860 coeff.w[1] = (px->w[1]) & SMALL_COEFF_MASK128;
|
|
1861
|
|
1862 // 10^34
|
|
1863 T34 = power10_table_128[34];
|
|
1864 // check for non-canonical values
|
|
1865 if (__unsigned_compare_ge_128 (coeff, T34))
|
|
1866 coeff.w[0] = coeff.w[1] = 0;
|
|
1867
|
|
1868 pcoefficient_x->w[0] = coeff.w[0];
|
|
1869 pcoefficient_x->w[1] = coeff.w[1];
|
|
1870
|
|
1871 ex = (px->w[1]) >> 49;
|
|
1872 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
|
|
1873
|
|
1874 return coeff.w[0] | coeff.w[1];
|
|
1875 }
|
|
1876
|
|
1877 //
|
|
1878 // Pack macro checks for overflow, but not underflow
|
|
1879 //
|
|
1880 __BID_INLINE__ UINT128 *
|
|
1881 get_BID128_very_fast_OF (UINT128 * pres, UINT64 sgn, int expon,
|
|
1882 UINT128 coeff, unsigned *prounding_mode,
|
|
1883 unsigned *fpsc) {
|
|
1884 UINT128 T;
|
|
1885 UINT64 tmp, tmp2;
|
|
1886
|
|
1887 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
|
|
1888
|
|
1889 if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
|
|
1890 T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
|
|
1891 while (__unsigned_compare_gt_128 (T, coeff)
|
|
1892 && expon > DECIMAL_MAX_EXPON_128) {
|
|
1893 coeff.w[1] =
|
|
1894 (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
|
|
1895 (coeff.w[0] >> 63);
|
|
1896 tmp2 = coeff.w[0] << 3;
|
|
1897 coeff.w[0] = (coeff.w[0] << 1) + tmp2;
|
|
1898 if (coeff.w[0] < tmp2)
|
|
1899 coeff.w[1]++;
|
|
1900
|
|
1901 expon--;
|
|
1902 }
|
|
1903 }
|
|
1904 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
|
|
1905 // OF
|
|
1906 #ifdef SET_STATUS_FLAGS
|
|
1907 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
|
|
1908 #endif
|
|
1909 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
1910 #ifndef IEEE_ROUND_NEAREST
|
|
1911 if (*prounding_mode == ROUNDING_TO_ZERO
|
|
1912 || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
|
|
1913 &&
|
|
1914 *prounding_mode
|
|
1915 ==
|
|
1916 ROUNDING_DOWN))
|
|
1917 {
|
|
1918 pres->w[1] = sgn | LARGEST_BID128_HIGH;
|
|
1919 pres->w[0] = LARGEST_BID128_LOW;
|
|
1920 } else
|
|
1921 #endif
|
|
1922 #endif
|
|
1923 {
|
|
1924 pres->w[1] = sgn | INFINITY_MASK64;
|
|
1925 pres->w[0] = 0;
|
|
1926 }
|
|
1927 return pres;
|
|
1928 }
|
|
1929 }
|
|
1930
|
|
1931 pres->w[0] = coeff.w[0];
|
|
1932 tmp = expon;
|
|
1933 tmp <<= 49;
|
|
1934 pres->w[1] = sgn | tmp | coeff.w[1];
|
|
1935
|
|
1936 return pres;
|
|
1937 }
|
|
1938
|
|
1939
|
|
1940 //
|
|
1941 // No overflow/underflow checks
|
|
1942 // No checking for coefficient == 10^34 (rounding artifact)
|
|
1943 //
|
|
1944 __BID_INLINE__ UINT128 *
|
|
1945 get_BID128_very_fast (UINT128 * pres, UINT64 sgn, int expon,
|
|
1946 UINT128 coeff) {
|
|
1947 UINT64 tmp;
|
|
1948
|
|
1949 pres->w[0] = coeff.w[0];
|
|
1950 tmp = expon;
|
|
1951 tmp <<= 49;
|
|
1952 pres->w[1] = sgn | tmp | coeff.w[1];
|
|
1953
|
|
1954 return pres;
|
|
1955 }
|
|
1956
|
|
1957 //
|
|
1958 // No overflow/underflow checks
|
|
1959 //
|
|
1960 __BID_INLINE__ UINT128 *
|
|
1961 get_BID128_fast (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
|
|
1962 UINT64 tmp;
|
|
1963
|
|
1964 // coeff==10^34?
|
|
1965 if (coeff.w[1] == 0x0001ed09bead87c0ull
|
|
1966 && coeff.w[0] == 0x378d8e6400000000ull) {
|
|
1967 expon++;
|
|
1968 // set coefficient to 10^33
|
|
1969 coeff.w[1] = 0x0000314dc6448d93ull;
|
|
1970 coeff.w[0] = 0x38c15b0a00000000ull;
|
|
1971 }
|
|
1972
|
|
1973 pres->w[0] = coeff.w[0];
|
|
1974 tmp = expon;
|
|
1975 tmp <<= 49;
|
|
1976 pres->w[1] = sgn | tmp | coeff.w[1];
|
|
1977
|
|
1978 return pres;
|
|
1979 }
|
|
1980
|
|
1981 //
|
|
1982 // General BID128 pack macro
|
|
1983 //
|
|
1984 __BID_INLINE__ UINT128 *
|
|
1985 get_BID128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff,
|
|
1986 unsigned *prounding_mode, unsigned *fpsc) {
|
|
1987 UINT128 T;
|
|
1988 UINT64 tmp, tmp2;
|
|
1989
|
|
1990 // coeff==10^34?
|
|
1991 if (coeff.w[1] == 0x0001ed09bead87c0ull
|
|
1992 && coeff.w[0] == 0x378d8e6400000000ull) {
|
|
1993 expon++;
|
|
1994 // set coefficient to 10^33
|
|
1995 coeff.w[1] = 0x0000314dc6448d93ull;
|
|
1996 coeff.w[0] = 0x38c15b0a00000000ull;
|
|
1997 }
|
|
1998 // check OF, UF
|
|
1999 if (expon < 0 || expon > DECIMAL_MAX_EXPON_128) {
|
|
2000 // check UF
|
|
2001 if (expon < 0) {
|
|
2002 return handle_UF_128 (pres, sgn, expon, coeff, prounding_mode,
|
|
2003 fpsc);
|
|
2004 }
|
|
2005
|
|
2006 if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
|
|
2007 T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
|
|
2008 while (__unsigned_compare_gt_128 (T, coeff)
|
|
2009 && expon > DECIMAL_MAX_EXPON_128) {
|
|
2010 coeff.w[1] =
|
|
2011 (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
|
|
2012 (coeff.w[0] >> 63);
|
|
2013 tmp2 = coeff.w[0] << 3;
|
|
2014 coeff.w[0] = (coeff.w[0] << 1) + tmp2;
|
|
2015 if (coeff.w[0] < tmp2)
|
|
2016 coeff.w[1]++;
|
|
2017
|
|
2018 expon--;
|
|
2019 }
|
|
2020 }
|
|
2021 if (expon > DECIMAL_MAX_EXPON_128) {
|
|
2022 if (!(coeff.w[1] | coeff.w[0])) {
|
|
2023 pres->w[1] = sgn | (((UINT64) DECIMAL_MAX_EXPON_128) << 49);
|
|
2024 pres->w[0] = 0;
|
|
2025 return pres;
|
|
2026 }
|
|
2027 // OF
|
|
2028 #ifdef SET_STATUS_FLAGS
|
|
2029 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
|
|
2030 #endif
|
|
2031 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
2032 #ifndef IEEE_ROUND_NEAREST
|
|
2033 if (*prounding_mode == ROUNDING_TO_ZERO
|
|
2034 || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
|
|
2035 &&
|
|
2036 *prounding_mode
|
|
2037 ==
|
|
2038 ROUNDING_DOWN))
|
|
2039 {
|
|
2040 pres->w[1] = sgn | LARGEST_BID128_HIGH;
|
|
2041 pres->w[0] = LARGEST_BID128_LOW;
|
|
2042 } else
|
|
2043 #endif
|
|
2044 #endif
|
|
2045 {
|
|
2046 pres->w[1] = sgn | INFINITY_MASK64;
|
|
2047 pres->w[0] = 0;
|
|
2048 }
|
|
2049 return pres;
|
|
2050 }
|
|
2051 }
|
|
2052
|
|
2053 pres->w[0] = coeff.w[0];
|
|
2054 tmp = expon;
|
|
2055 tmp <<= 49;
|
|
2056 pres->w[1] = sgn | tmp | coeff.w[1];
|
|
2057
|
|
2058 return pres;
|
|
2059 }
|
|
2060
|
|
2061
|
|
2062 //
|
|
2063 // Macro used for conversions from string
|
|
2064 // (no additional arguments given for rounding mode, status flags)
|
|
2065 //
|
|
2066 __BID_INLINE__ UINT128 *
|
|
2067 get_BID128_string (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
|
|
2068 UINT128 D2, D8;
|
|
2069 UINT64 tmp;
|
|
2070 unsigned rmode = 0, status;
|
|
2071
|
|
2072 // coeff==10^34?
|
|
2073 if (coeff.w[1] == 0x0001ed09bead87c0ull
|
|
2074 && coeff.w[0] == 0x378d8e6400000000ull) {
|
|
2075 expon++;
|
|
2076 // set coefficient to 10^33
|
|
2077 coeff.w[1] = 0x0000314dc6448d93ull;
|
|
2078 coeff.w[0] = 0x38c15b0a00000000ull;
|
|
2079 }
|
|
2080 // check OF, UF
|
|
2081 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
|
|
2082 // check UF
|
|
2083 if (expon < 0)
|
|
2084 return handle_UF_128 (pres, sgn, expon, coeff, &rmode, &status);
|
|
2085
|
|
2086 // OF
|
|
2087
|
|
2088 if (expon < DECIMAL_MAX_EXPON_128 + 34) {
|
|
2089 while (expon > DECIMAL_MAX_EXPON_128 &&
|
|
2090 (coeff.w[1] < power10_table_128[33].w[1] ||
|
|
2091 (coeff.w[1] == power10_table_128[33].w[1]
|
|
2092 && coeff.w[0] < power10_table_128[33].w[0]))) {
|
|
2093 D2.w[1] = (coeff.w[1] << 1) | (coeff.w[0] >> 63);
|
|
2094 D2.w[0] = coeff.w[0] << 1;
|
|
2095 D8.w[1] = (coeff.w[1] << 3) | (coeff.w[0] >> 61);
|
|
2096 D8.w[0] = coeff.w[0] << 3;
|
|
2097
|
|
2098 __add_128_128 (coeff, D2, D8);
|
|
2099 expon--;
|
|
2100 }
|
|
2101 } else if (!(coeff.w[0] | coeff.w[1]))
|
|
2102 expon = DECIMAL_MAX_EXPON_128;
|
|
2103
|
|
2104 if (expon > DECIMAL_MAX_EXPON_128) {
|
|
2105 pres->w[1] = sgn | INFINITY_MASK64;
|
|
2106 pres->w[0] = 0;
|
|
2107 switch (rmode) {
|
|
2108 case ROUNDING_DOWN:
|
|
2109 if (!sgn) {
|
|
2110 pres->w[1] = LARGEST_BID128_HIGH;
|
|
2111 pres->w[0] = LARGEST_BID128_LOW;
|
|
2112 }
|
|
2113 break;
|
|
2114 case ROUNDING_TO_ZERO:
|
|
2115 pres->w[1] = sgn | LARGEST_BID128_HIGH;
|
|
2116 pres->w[0] = LARGEST_BID128_LOW;
|
|
2117 break;
|
|
2118 case ROUNDING_UP:
|
|
2119 // round up
|
|
2120 if (sgn) {
|
|
2121 pres->w[1] = sgn | LARGEST_BID128_HIGH;
|
|
2122 pres->w[0] = LARGEST_BID128_LOW;
|
|
2123 }
|
|
2124 break;
|
|
2125 }
|
|
2126
|
|
2127 return pres;
|
|
2128 }
|
|
2129 }
|
|
2130
|
|
2131 pres->w[0] = coeff.w[0];
|
|
2132 tmp = expon;
|
|
2133 tmp <<= 49;
|
|
2134 pres->w[1] = sgn | tmp | coeff.w[1];
|
|
2135
|
|
2136 return pres;
|
|
2137 }
|
|
2138
|
|
2139
|
|
2140
|
|
2141 /*****************************************************************************
|
|
2142 *
|
|
2143 * BID32 pack/unpack macros
|
|
2144 *
|
|
2145 *****************************************************************************/
|
|
2146
|
|
2147
|
|
2148 __BID_INLINE__ UINT32
|
|
2149 unpack_BID32 (UINT32 * psign_x, int *pexponent_x,
|
|
2150 UINT32 * pcoefficient_x, UINT32 x) {
|
|
2151 UINT32 tmp;
|
|
2152
|
|
2153 *psign_x = x & 0x80000000;
|
|
2154
|
|
2155 if ((x & SPECIAL_ENCODING_MASK32) == SPECIAL_ENCODING_MASK32) {
|
|
2156 // special encodings
|
|
2157 if ((x & INFINITY_MASK32) == INFINITY_MASK32) {
|
|
2158 *pcoefficient_x = x & 0xfe0fffff;
|
|
2159 if ((x & 0x000fffff) >= 1000000)
|
|
2160 *pcoefficient_x = x & 0xfe000000;
|
|
2161 if ((x & NAN_MASK32) == INFINITY_MASK32)
|
|
2162 *pcoefficient_x = x & 0xf8000000;
|
|
2163 *pexponent_x = 0;
|
|
2164 return 0; // NaN or Infinity
|
|
2165 }
|
|
2166 // coefficient
|
|
2167 *pcoefficient_x = (x & SMALL_COEFF_MASK32) | LARGE_COEFF_HIGH_BIT32;
|
|
2168 // check for non-canonical value
|
|
2169 if (*pcoefficient_x >= 10000000)
|
|
2170 *pcoefficient_x = 0;
|
|
2171 // get exponent
|
|
2172 tmp = x >> 21;
|
|
2173 *pexponent_x = tmp & EXPONENT_MASK32;
|
|
2174 return 1;
|
|
2175 }
|
|
2176 // exponent
|
|
2177 tmp = x >> 23;
|
|
2178 *pexponent_x = tmp & EXPONENT_MASK32;
|
|
2179 // coefficient
|
|
2180 *pcoefficient_x = (x & LARGE_COEFF_MASK32);
|
|
2181
|
|
2182 return *pcoefficient_x;
|
|
2183 }
|
|
2184
|
|
2185 //
|
|
2186 // General pack macro for BID32
|
|
2187 //
|
|
2188 __BID_INLINE__ UINT32
|
|
2189 get_BID32 (UINT32 sgn, int expon, UINT64 coeff, int rmode,
|
|
2190 unsigned *fpsc) {
|
|
2191 UINT128 Q;
|
|
2192 UINT64 C64, remainder_h, carry, Stemp;
|
|
2193 UINT32 r, mask;
|
|
2194 int extra_digits, amount, amount2;
|
|
2195 unsigned status;
|
|
2196
|
|
2197 if (coeff > 9999999ull) {
|
|
2198 expon++;
|
|
2199 coeff = 1000000ull;
|
|
2200 }
|
|
2201 // check for possible underflow/overflow
|
|
2202 if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
|
|
2203 if (expon < 0) {
|
|
2204 // underflow
|
|
2205 if (expon + MAX_FORMAT_DIGITS_32 < 0) {
|
|
2206 #ifdef SET_STATUS_FLAGS
|
|
2207 __set_status_flags (fpsc,
|
|
2208 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
|
|
2209 #endif
|
|
2210 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
2211 #ifndef IEEE_ROUND_NEAREST
|
|
2212 if (rmode == ROUNDING_DOWN && sgn)
|
|
2213 return 0x80000001;
|
|
2214 if (rmode == ROUNDING_UP && !sgn)
|
|
2215 return 1;
|
|
2216 #endif
|
|
2217 #endif
|
|
2218 // result is 0
|
|
2219 return sgn;
|
|
2220 }
|
|
2221 // get digits to be shifted out
|
|
2222 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
2223 rmode = 0;
|
|
2224 #endif
|
|
2225 #ifdef IEEE_ROUND_NEAREST
|
|
2226 rmode = 0;
|
|
2227 #endif
|
|
2228 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
2229 #ifndef IEEE_ROUND_NEAREST
|
|
2230 if (sgn && (unsigned) (rmode - 1) < 2)
|
|
2231 rmode = 3 - rmode;
|
|
2232 #endif
|
|
2233 #endif
|
|
2234
|
|
2235 extra_digits = -expon;
|
|
2236 coeff += round_const_table[rmode][extra_digits];
|
|
2237
|
|
2238 // get coeff*(2^M[extra_digits])/10^extra_digits
|
|
2239 __mul_64x64_to_128 (Q, coeff, reciprocals10_64[extra_digits]);
|
|
2240
|
|
2241 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
|
|
2242 amount = short_recip_scale[extra_digits];
|
|
2243
|
|
2244 C64 = Q.w[1] >> amount;
|
|
2245
|
|
2246 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
2247 #ifndef IEEE_ROUND_NEAREST
|
|
2248 if (rmode == 0) //ROUNDING_TO_NEAREST
|
|
2249 #endif
|
|
2250 if (C64 & 1) {
|
|
2251 // check whether fractional part of initial_P/10^extra_digits is exactly .5
|
|
2252
|
|
2253 // get remainder
|
|
2254 amount2 = 64 - amount;
|
|
2255 remainder_h = 0;
|
|
2256 remainder_h--;
|
|
2257 remainder_h >>= amount2;
|
|
2258 remainder_h = remainder_h & Q.w[1];
|
|
2259
|
|
2260 if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits])) {
|
|
2261 C64--;
|
|
2262 }
|
|
2263 }
|
|
2264 #endif
|
|
2265
|
|
2266 #ifdef SET_STATUS_FLAGS
|
|
2267
|
|
2268 if (is_inexact (fpsc))
|
|
2269 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
|
|
2270 else {
|
|
2271 status = INEXACT_EXCEPTION;
|
|
2272 // get remainder
|
|
2273 remainder_h = Q.w[1] << (64 - amount);
|
|
2274
|
|
2275 switch (rmode) {
|
|
2276 case ROUNDING_TO_NEAREST:
|
|
2277 case ROUNDING_TIES_AWAY:
|
|
2278 // test whether fractional part is 0
|
|
2279 if (remainder_h == 0x8000000000000000ull
|
|
2280 && (Q.w[0] < reciprocals10_64[extra_digits]))
|
|
2281 status = EXACT_STATUS;
|
|
2282 break;
|
|
2283 case ROUNDING_DOWN:
|
|
2284 case ROUNDING_TO_ZERO:
|
|
2285 if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits]))
|
|
2286 status = EXACT_STATUS;
|
|
2287 break;
|
|
2288 default:
|
|
2289 // round up
|
|
2290 __add_carry_out (Stemp, carry, Q.w[0],
|
|
2291 reciprocals10_64[extra_digits]);
|
|
2292 if ((remainder_h >> (64 - amount)) + carry >=
|
|
2293 (((UINT64) 1) << amount))
|
|
2294 status = EXACT_STATUS;
|
|
2295 }
|
|
2296
|
|
2297 if (status != EXACT_STATUS)
|
|
2298 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
|
|
2299 }
|
|
2300
|
|
2301 #endif
|
|
2302
|
|
2303 return sgn | (UINT32) C64;
|
|
2304 }
|
|
2305
|
|
2306 while (coeff < 1000000 && expon > DECIMAL_MAX_EXPON_32) {
|
|
2307 coeff = (coeff << 3) + (coeff << 1);
|
|
2308 expon--;
|
|
2309 }
|
|
2310 if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
|
|
2311 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
|
|
2312 // overflow
|
|
2313 r = sgn | INFINITY_MASK32;
|
|
2314 switch (rmode) {
|
|
2315 case ROUNDING_DOWN:
|
|
2316 if (!sgn)
|
|
2317 r = LARGEST_BID32;
|
|
2318 break;
|
|
2319 case ROUNDING_TO_ZERO:
|
|
2320 r = sgn | LARGEST_BID32;
|
|
2321 break;
|
|
2322 case ROUNDING_UP:
|
|
2323 // round up
|
|
2324 if (sgn)
|
|
2325 r = sgn | LARGEST_BID32;
|
|
2326 }
|
|
2327 return r;
|
|
2328 }
|
|
2329 }
|
|
2330
|
|
2331 mask = 1 << 23;
|
|
2332
|
|
2333 // check whether coefficient fits in DECIMAL_COEFF_FIT bits
|
|
2334 if (coeff < mask) {
|
|
2335 r = expon;
|
|
2336 r <<= 23;
|
|
2337 r |= ((UINT32) coeff | sgn);
|
|
2338 return r;
|
|
2339 }
|
|
2340 // special format
|
|
2341
|
|
2342 r = expon;
|
|
2343 r <<= 21;
|
|
2344 r |= (sgn | SPECIAL_ENCODING_MASK32);
|
|
2345 // add coeff, without leading bits
|
|
2346 mask = (1 << 21) - 1;
|
|
2347 r |= (((UINT32) coeff) & mask);
|
|
2348
|
|
2349 return r;
|
|
2350 }
|
|
2351
|
|
2352
|
|
2353
|
|
2354 //
|
|
2355 // no overflow/underflow checks
|
|
2356 //
|
|
2357 __BID_INLINE__ UINT32
|
|
2358 very_fast_get_BID32 (UINT32 sgn, int expon, UINT32 coeff) {
|
|
2359 UINT32 r, mask;
|
|
2360
|
|
2361 mask = 1 << 23;
|
|
2362
|
|
2363 // check whether coefficient fits in 10*2+3 bits
|
|
2364 if (coeff < mask) {
|
|
2365 r = expon;
|
|
2366 r <<= 23;
|
|
2367 r |= (coeff | sgn);
|
|
2368 return r;
|
|
2369 }
|
|
2370 // special format
|
|
2371 r = expon;
|
|
2372 r <<= 21;
|
|
2373 r |= (sgn | SPECIAL_ENCODING_MASK32);
|
|
2374 // add coeff, without leading bits
|
|
2375 mask = (1 << 21) - 1;
|
|
2376 coeff &= mask;
|
|
2377 r |= coeff;
|
|
2378
|
|
2379 return r;
|
|
2380 }
|
|
2381
|
|
2382
|
|
2383
|
|
2384 /*************************************************************
|
|
2385 *
|
|
2386 *************************************************************/
|
|
2387 typedef
|
|
2388 ALIGN (16)
|
|
2389 struct {
|
|
2390 UINT64 w[6];
|
|
2391 } UINT384;
|
|
2392 typedef ALIGN (16)
|
|
2393 struct {
|
|
2394 UINT64 w[8];
|
|
2395 } UINT512;
|
|
2396
|
|
2397 // #define P 34
|
|
2398 #define MASK_STEERING_BITS 0x6000000000000000ull
|
|
2399 #define MASK_BINARY_EXPONENT1 0x7fe0000000000000ull
|
|
2400 #define MASK_BINARY_SIG1 0x001fffffffffffffull
|
|
2401 #define MASK_BINARY_EXPONENT2 0x1ff8000000000000ull
|
|
2402 //used to take G[2:w+3] (sec 3.3)
|
|
2403 #define MASK_BINARY_SIG2 0x0007ffffffffffffull
|
|
2404 //used to mask out G4:T0 (sec 3.3)
|
|
2405 #define MASK_BINARY_OR2 0x0020000000000000ull
|
|
2406 //used to prefix 8+G4 to T (sec 3.3)
|
|
2407 #define UPPER_EXPON_LIMIT 51
|
|
2408 #define MASK_EXP 0x7ffe000000000000ull
|
|
2409 #define MASK_SPECIAL 0x7800000000000000ull
|
|
2410 #define MASK_NAN 0x7c00000000000000ull
|
|
2411 #define MASK_SNAN 0x7e00000000000000ull
|
|
2412 #define MASK_ANY_INF 0x7c00000000000000ull
|
|
2413 #define MASK_INF 0x7800000000000000ull
|
|
2414 #define MASK_SIGN 0x8000000000000000ull
|
|
2415 #define MASK_COEFF 0x0001ffffffffffffull
|
|
2416 #define BIN_EXP_BIAS (0x1820ull << 49)
|
|
2417
|
|
2418 #define EXP_MIN 0x0000000000000000ull
|
|
2419 // EXP_MIN = (-6176 + 6176) << 49
|
|
2420 #define EXP_MAX 0x5ffe000000000000ull
|
|
2421 // EXP_MAX = (6111 + 6176) << 49
|
|
2422 #define EXP_MAX_P1 0x6000000000000000ull
|
|
2423 // EXP_MAX + 1 = (6111 + 6176 + 1) << 49
|
|
2424 #define EXP_P1 0x0002000000000000ull
|
|
2425 // EXP_ P1= 1 << 49
|
|
2426 #define expmin -6176
|
|
2427 // min unbiased exponent
|
|
2428 #define expmax 6111
|
|
2429 // max unbiased exponent
|
|
2430 #define expmin16 -398
|
|
2431 // min unbiased exponent
|
|
2432 #define expmax16 369
|
|
2433 // max unbiased exponent
|
|
2434
|
|
2435 #define SIGNMASK32 0x80000000
|
|
2436 #define BID64_SIG_MAX 0x002386F26FC0ffffull
|
|
2437 #define SIGNMASK64 0x8000000000000000ull
|
|
2438
|
|
2439 // typedef unsigned int FPSC; // floating-point status and control
|
|
2440 // bit31:
|
|
2441 // bit30:
|
|
2442 // bit29:
|
|
2443 // bit28:
|
|
2444 // bit27:
|
|
2445 // bit26:
|
|
2446 // bit25:
|
|
2447 // bit24:
|
|
2448 // bit23:
|
|
2449 // bit22:
|
|
2450 // bit21:
|
|
2451 // bit20:
|
|
2452 // bit19:
|
|
2453 // bit18:
|
|
2454 // bit17:
|
|
2455 // bit16:
|
|
2456 // bit15:
|
|
2457 // bit14: RC:2
|
|
2458 // bit13: RC:1
|
|
2459 // bit12: RC:0
|
|
2460 // bit11: PM
|
|
2461 // bit10: UM
|
|
2462 // bit9: OM
|
|
2463 // bit8: ZM
|
|
2464 // bit7: DM
|
|
2465 // bit6: IM
|
|
2466 // bit5: PE
|
|
2467 // bit4: UE
|
|
2468 // bit3: OE
|
|
2469 // bit2: ZE
|
|
2470 // bit1: DE
|
|
2471 // bit0: IE
|
|
2472
|
|
2473 #define ROUNDING_MODE_MASK 0x00007000
|
|
2474
|
|
2475 typedef struct _DEC_DIGITS {
|
|
2476 unsigned int digits;
|
|
2477 UINT64 threshold_hi;
|
|
2478 UINT64 threshold_lo;
|
|
2479 unsigned int digits1;
|
|
2480 } DEC_DIGITS;
|
|
2481
|
|
2482 extern DEC_DIGITS nr_digits[];
|
|
2483 extern UINT64 midpoint64[];
|
|
2484 extern UINT128 midpoint128[];
|
|
2485 extern UINT192 midpoint192[];
|
|
2486 extern UINT256 midpoint256[];
|
|
2487 extern UINT64 ten2k64[];
|
|
2488 extern UINT128 ten2k128[];
|
|
2489 extern UINT256 ten2k256[];
|
|
2490 extern UINT128 ten2mk128[];
|
|
2491 extern UINT64 ten2mk64[];
|
|
2492 extern UINT128 ten2mk128trunc[];
|
|
2493 extern int shiftright128[];
|
|
2494 extern UINT64 maskhigh128[];
|
|
2495 extern UINT64 maskhigh128M[];
|
|
2496 extern UINT64 maskhigh192M[];
|
|
2497 extern UINT64 maskhigh256M[];
|
|
2498 extern UINT64 onehalf128[];
|
|
2499 extern UINT64 onehalf128M[];
|
|
2500 extern UINT64 onehalf192M[];
|
|
2501 extern UINT64 onehalf256M[];
|
|
2502 extern UINT128 ten2mk128M[];
|
|
2503 extern UINT128 ten2mk128truncM[];
|
|
2504 extern UINT192 ten2mk192truncM[];
|
|
2505 extern UINT256 ten2mk256truncM[];
|
|
2506 extern int shiftright128M[];
|
|
2507 extern int shiftright192M[];
|
|
2508 extern int shiftright256M[];
|
|
2509 extern UINT192 ten2mk192M[];
|
|
2510 extern UINT256 ten2mk256M[];
|
|
2511 extern unsigned char char_table2[];
|
|
2512 extern unsigned char char_table3[];
|
|
2513
|
|
2514 extern UINT64 ten2m3k64[];
|
|
2515 extern unsigned int shift_ten2m3k64[];
|
|
2516 extern UINT128 ten2m3k128[];
|
|
2517 extern unsigned int shift_ten2m3k128[];
|
|
2518
|
|
2519
|
|
2520
|
|
2521 /***************************************************************************
|
|
2522 *************** TABLES FOR GENERAL ROUNDING FUNCTIONS *********************
|
|
2523 ***************************************************************************/
|
|
2524
|
|
2525 extern UINT64 Kx64[];
|
|
2526 extern unsigned int Ex64m64[];
|
|
2527 extern UINT64 half64[];
|
|
2528 extern UINT64 mask64[];
|
|
2529 extern UINT64 ten2mxtrunc64[];
|
|
2530
|
|
2531 extern UINT128 Kx128[];
|
|
2532 extern unsigned int Ex128m128[];
|
|
2533 extern UINT64 half128[];
|
|
2534 extern UINT64 mask128[];
|
|
2535 extern UINT128 ten2mxtrunc128[];
|
|
2536
|
|
2537 extern UINT192 Kx192[];
|
|
2538 extern unsigned int Ex192m192[];
|
|
2539 extern UINT64 half192[];
|
|
2540 extern UINT64 mask192[];
|
|
2541 extern UINT192 ten2mxtrunc192[];
|
|
2542
|
|
2543 extern UINT256 Kx256[];
|
|
2544 extern unsigned int Ex256m256[];
|
|
2545 extern UINT64 half256[];
|
|
2546 extern UINT64 mask256[];
|
|
2547 extern UINT256 ten2mxtrunc256[];
|
|
2548
|
|
2549 typedef union __bid64_128 {
|
|
2550 UINT64 b64;
|
|
2551 UINT128 b128;
|
|
2552 } BID64_128;
|
|
2553
|
|
2554 BID64_128 bid_fma (unsigned int P0,
|
|
2555 BID64_128 x1, unsigned int P1,
|
|
2556 BID64_128 y1, unsigned int P2,
|
|
2557 BID64_128 z1, unsigned int P3,
|
|
2558 unsigned int rnd_mode, FPSC * fpsc);
|
|
2559
|
|
2560 #define P16 16
|
|
2561 #define P34 34
|
|
2562
|
|
2563 union __int_double {
|
|
2564 UINT64 i;
|
|
2565 double d;
|
|
2566 };
|
|
2567 typedef union __int_double int_double;
|
|
2568
|
|
2569
|
|
2570 union __int_float {
|
|
2571 UINT32 i;
|
|
2572 float d;
|
|
2573 };
|
|
2574 typedef union __int_float int_float;
|
|
2575
|
|
2576 #define SWAP(A,B,T) {\
|
|
2577 T = A; \
|
|
2578 A = B; \
|
|
2579 B = T; \
|
|
2580 }
|
|
2581
|
|
2582 // this macro will find coefficient_x to be in [2^A, 2^(A+1) )
|
|
2583 // ie it knows that it is A bits long
|
|
2584 #define NUMBITS(A, coefficient_x, tempx){\
|
|
2585 temp_x.d=(float)coefficient_x;\
|
|
2586 A=((tempx.i >>23) & EXPONENT_MASK32) - 0x7f;\
|
|
2587 }
|
|
2588
|
|
2589 enum class_types {
|
|
2590 signalingNaN,
|
|
2591 quietNaN,
|
|
2592 negativeInfinity,
|
|
2593 negativeNormal,
|
|
2594 negativeSubnormal,
|
|
2595 negativeZero,
|
|
2596 positiveZero,
|
|
2597 positiveSubnormal,
|
|
2598 positiveNormal,
|
|
2599 positiveInfinity
|
|
2600 };
|
|
2601
|
|
2602 typedef union {
|
|
2603 UINT64 ui64;
|
|
2604 double d;
|
|
2605 } BID_UI64DOUBLE;
|
|
2606
|
|
2607 #endif
|