Mercurial > hg > CbC > CbC_gcc
comparison libgcc/config/libbid/bid128_fma.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 04ced10e8804 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Copyright (C) 2007, 2009 Free Software Foundation, Inc. | |
2 | |
3 This file is part of GCC. | |
4 | |
5 GCC is free software; you can redistribute it and/or modify it under | |
6 the terms of the GNU General Public License as published by the Free | |
7 Software Foundation; either version 3, or (at your option) any later | |
8 version. | |
9 | |
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 for more details. | |
14 | |
15 Under Section 7 of GPL version 3, you are granted additional | |
16 permissions described in the GCC Runtime Library Exception, version | |
17 3.1, as published by the Free Software Foundation. | |
18 | |
19 You should have received a copy of the GNU General Public License and | |
20 a copy of the GCC Runtime Library Exception along with this program; | |
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 <http://www.gnu.org/licenses/>. */ | |
23 | |
24 /***************************************************************************** | |
25 * | |
26 * BID128 fma x * y + z | |
27 * | |
28 ****************************************************************************/ | |
29 | |
30 #include "bid_internal.h" | |
31 | |
32 static void | |
33 rounding_correction (unsigned int rnd_mode, | |
34 unsigned int is_inexact_lt_midpoint, | |
35 unsigned int is_inexact_gt_midpoint, | |
36 unsigned int is_midpoint_lt_even, | |
37 unsigned int is_midpoint_gt_even, | |
38 int unbexp, | |
39 UINT128 * ptrres, _IDEC_flags * ptrfpsf) { | |
40 // unbiased true exponent unbexp may be larger than emax | |
41 | |
42 UINT128 res = *ptrres; // expected to have the correct sign and coefficient | |
43 // (the exponent field is ignored, as unbexp is used instead) | |
44 UINT64 sign, exp; | |
45 UINT64 C_hi, C_lo; | |
46 | |
47 // general correction from RN to RA, RM, RP, RZ | |
48 // Note: if the result is negative, then is_inexact_lt_midpoint, | |
49 // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even | |
50 // have to be considered as if determined for the absolute value of the | |
51 // result (so they seem to be reversed) | |
52 | |
53 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || | |
54 is_midpoint_lt_even || is_midpoint_gt_even) { | |
55 *ptrfpsf |= INEXACT_EXCEPTION; | |
56 } | |
57 // apply correction to result calculated with unbounded exponent | |
58 sign = res.w[1] & MASK_SIGN; | |
59 exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax | |
60 C_hi = res.w[1] & MASK_COEFF; | |
61 C_lo = res.w[0]; | |
62 if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) || | |
63 ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) && | |
64 is_midpoint_gt_even))) || | |
65 (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) || | |
66 ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) && | |
67 is_midpoint_gt_even)))) { | |
68 // C = C + 1 | |
69 C_lo = C_lo + 1; | |
70 if (C_lo == 0) | |
71 C_hi = C_hi + 1; | |
72 if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) { | |
73 // C = 10^34 => rounding overflow | |
74 C_hi = 0x0000314dc6448d93ull; | |
75 C_lo = 0x38c15b0a00000000ull; // 10^33 | |
76 // exp = exp + EXP_P1; | |
77 unbexp = unbexp + 1; | |
78 exp = (UINT64) (unbexp + 6176) << 49; | |
79 } | |
80 } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) && | |
81 ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) || | |
82 (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) { | |
83 // C = C - 1 | |
84 C_lo = C_lo - 1; | |
85 if (C_lo == 0xffffffffffffffffull) | |
86 C_hi--; | |
87 // check if we crossed into the lower decade | |
88 if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) { | |
89 // C = 10^33 - 1 | |
90 if (exp > 0) { | |
91 C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1 | |
92 C_lo = 0x378d8e63ffffffffull; | |
93 // exp = exp - EXP_P1; | |
94 unbexp = unbexp - 1; | |
95 exp = (UINT64) (unbexp + 6176) << 49; | |
96 } else { // if exp = 0 | |
97 if (is_midpoint_lt_even || is_midpoint_lt_even || | |
98 is_inexact_gt_midpoint || is_inexact_gt_midpoint) // tiny & inexact | |
99 *ptrfpsf |= UNDERFLOW_EXCEPTION; | |
100 } | |
101 } | |
102 } else { | |
103 ; // the result is already correct | |
104 } | |
105 if (unbexp > expmax) { // 6111 | |
106 *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
107 exp = 0; | |
108 if (!sign) { // result is positive | |
109 if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf | |
110 C_hi = 0x7800000000000000ull; | |
111 C_lo = 0x0000000000000000ull; | |
112 } else { // res = +MAXFP = (10^34-1) * 10^emax | |
113 C_hi = 0x5fffed09bead87c0ull; | |
114 C_lo = 0x378d8e63ffffffffull; | |
115 } | |
116 } else { // result is negative | |
117 if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf | |
118 C_hi = 0xf800000000000000ull; | |
119 C_lo = 0x0000000000000000ull; | |
120 } else { // res = -MAXFP = -(10^34-1) * 10^emax | |
121 C_hi = 0xdfffed09bead87c0ull; | |
122 C_lo = 0x378d8e63ffffffffull; | |
123 } | |
124 } | |
125 } | |
126 // assemble the result | |
127 res.w[1] = sign | exp | C_hi; | |
128 res.w[0] = C_lo; | |
129 *ptrres = res; | |
130 } | |
131 | |
132 static void | |
133 add256 (UINT256 x, UINT256 y, UINT256 * pz) { | |
134 // *z = x + yl assume the sum fits in 256 bits | |
135 UINT256 z; | |
136 z.w[0] = x.w[0] + y.w[0]; | |
137 if (z.w[0] < x.w[0]) { | |
138 x.w[1]++; | |
139 if (x.w[1] == 0x0000000000000000ull) { | |
140 x.w[2]++; | |
141 if (x.w[2] == 0x0000000000000000ull) { | |
142 x.w[3]++; | |
143 } | |
144 } | |
145 } | |
146 z.w[1] = x.w[1] + y.w[1]; | |
147 if (z.w[1] < x.w[1]) { | |
148 x.w[2]++; | |
149 if (x.w[2] == 0x0000000000000000ull) { | |
150 x.w[3]++; | |
151 } | |
152 } | |
153 z.w[2] = x.w[2] + y.w[2]; | |
154 if (z.w[2] < x.w[2]) { | |
155 x.w[3]++; | |
156 } | |
157 z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible | |
158 *pz = z; | |
159 } | |
160 | |
161 static void | |
162 sub256 (UINT256 x, UINT256 y, UINT256 * pz) { | |
163 // *z = x - y; assume x >= y | |
164 UINT256 z; | |
165 z.w[0] = x.w[0] - y.w[0]; | |
166 if (z.w[0] > x.w[0]) { | |
167 x.w[1]--; | |
168 if (x.w[1] == 0xffffffffffffffffull) { | |
169 x.w[2]--; | |
170 if (x.w[2] == 0xffffffffffffffffull) { | |
171 x.w[3]--; | |
172 } | |
173 } | |
174 } | |
175 z.w[1] = x.w[1] - y.w[1]; | |
176 if (z.w[1] > x.w[1]) { | |
177 x.w[2]--; | |
178 if (x.w[2] == 0xffffffffffffffffull) { | |
179 x.w[3]--; | |
180 } | |
181 } | |
182 z.w[2] = x.w[2] - y.w[2]; | |
183 if (z.w[2] > x.w[2]) { | |
184 x.w[3]--; | |
185 } | |
186 z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y | |
187 *pz = z; | |
188 } | |
189 | |
190 | |
191 static int | |
192 nr_digits256 (UINT256 R256) { | |
193 int ind; | |
194 // determine the number of decimal digits in R256 | |
195 if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) { | |
196 // between 1 and 19 digits | |
197 for (ind = 1; ind <= 19; ind++) { | |
198 if (R256.w[0] < ten2k64[ind]) { | |
199 break; | |
200 } | |
201 } | |
202 // ind digits | |
203 } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && | |
204 (R256.w[1] < ten2k128[0].w[1] || | |
205 (R256.w[1] == ten2k128[0].w[1] | |
206 && R256.w[0] < ten2k128[0].w[0]))) { | |
207 // 20 digits | |
208 ind = 20; | |
209 } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) { | |
210 // between 21 and 38 digits | |
211 for (ind = 1; ind <= 18; ind++) { | |
212 if (R256.w[1] < ten2k128[ind].w[1] || | |
213 (R256.w[1] == ten2k128[ind].w[1] && | |
214 R256.w[0] < ten2k128[ind].w[0])) { | |
215 break; | |
216 } | |
217 } | |
218 // ind + 20 digits | |
219 ind = ind + 20; | |
220 } else if (R256.w[3] == 0x0 && | |
221 (R256.w[2] < ten2k256[0].w[2] || | |
222 (R256.w[2] == ten2k256[0].w[2] && | |
223 R256.w[1] < ten2k256[0].w[1]) || | |
224 (R256.w[2] == ten2k256[0].w[2] && | |
225 R256.w[1] == ten2k256[0].w[1] && | |
226 R256.w[0] < ten2k256[0].w[0]))) { | |
227 // 39 digits | |
228 ind = 39; | |
229 } else { | |
230 // between 40 and 68 digits | |
231 for (ind = 1; ind <= 29; ind++) { | |
232 if (R256.w[3] < ten2k256[ind].w[3] || | |
233 (R256.w[3] == ten2k256[ind].w[3] && | |
234 R256.w[2] < ten2k256[ind].w[2]) || | |
235 (R256.w[3] == ten2k256[ind].w[3] && | |
236 R256.w[2] == ten2k256[ind].w[2] && | |
237 R256.w[1] < ten2k256[ind].w[1]) || | |
238 (R256.w[3] == ten2k256[ind].w[3] && | |
239 R256.w[2] == ten2k256[ind].w[2] && | |
240 R256.w[1] == ten2k256[ind].w[1] && | |
241 R256.w[0] < ten2k256[ind].w[0])) { | |
242 break; | |
243 } | |
244 } | |
245 // ind + 39 digits | |
246 ind = ind + 39; | |
247 } | |
248 return (ind); | |
249 } | |
250 | |
251 // add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so | |
252 // use the rounding information from ptr_is_* to avoid a double rounding error | |
253 static void | |
254 add_and_round (int q3, | |
255 int q4, | |
256 int e4, | |
257 int delta, | |
258 int p34, | |
259 UINT64 z_sign, | |
260 UINT64 p_sign, | |
261 UINT128 C3, | |
262 UINT256 C4, | |
263 int rnd_mode, | |
264 int *ptr_is_midpoint_lt_even, | |
265 int *ptr_is_midpoint_gt_even, | |
266 int *ptr_is_inexact_lt_midpoint, | |
267 int *ptr_is_inexact_gt_midpoint, | |
268 _IDEC_flags * ptrfpsf, UINT128 * ptrres) { | |
269 | |
270 int scale; | |
271 int x0; | |
272 int ind; | |
273 UINT64 R64; | |
274 UINT128 P128, R128; | |
275 UINT192 P192, R192; | |
276 UINT256 R256; | |
277 int is_midpoint_lt_even = 0; | |
278 int is_midpoint_gt_even = 0; | |
279 int is_inexact_lt_midpoint = 0; | |
280 int is_inexact_gt_midpoint = 0; | |
281 int is_midpoint_lt_even0 = 0; | |
282 int is_midpoint_gt_even0 = 0; | |
283 int is_inexact_lt_midpoint0 = 0; | |
284 int is_inexact_gt_midpoint0 = 0; | |
285 int incr_exp = 0; | |
286 int is_tiny = 0; | |
287 int lt_half_ulp = 0; | |
288 int eq_half_ulp = 0; | |
289 // int gt_half_ulp = 0; | |
290 UINT128 res = *ptrres; | |
291 | |
292 // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66 | |
293 scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this | |
294 // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1 | |
295 | |
296 // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for | |
297 // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6)) | |
298 if (scale == 0) { | |
299 R256.w[3] = 0x0ull; | |
300 R256.w[2] = 0x0ull; | |
301 R256.w[1] = C3.w[1]; | |
302 R256.w[0] = C3.w[0]; | |
303 } else if (scale <= 19) { // 10^scale fits in 64 bits | |
304 P128.w[1] = 0; | |
305 P128.w[0] = ten2k64[scale]; | |
306 __mul_128x128_to_256 (R256, P128, C3); | |
307 } else if (scale <= 38) { // 10^scale fits in 128 bits | |
308 __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3); | |
309 } else if (scale <= 57) { // 39 <= scale <= 57 | |
310 // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits | |
311 // (10^67 has 223 bits; 10^69 has 230 bits); | |
312 // must split the computation: | |
313 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127 | |
314 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty | |
315 // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits | |
316 __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3); | |
317 // now multiply R128 by 10^38 | |
318 __mul_128x128_to_256 (R256, R128, ten2k128[18]); | |
319 } else { // 58 <= scale <= 66 | |
320 // 10^scale takes between 193 and 220 bits, | |
321 // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits) | |
322 // must split the computation: | |
323 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127 | |
324 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty | |
325 // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits | |
326 // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because | |
327 // 10^(scale-38) takes more than 64 bits, C3 will take less than 64 | |
328 __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]); | |
329 // now calculate 10*38 * 10^(scale-38) * C3 | |
330 __mul_128x128_to_256 (R256, R128, ten2k128[18]); | |
331 } | |
332 // C3 * 10^scale is now in R256 | |
333 | |
334 // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least | |
335 // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is | |
336 // possible | |
337 // add/subtract C4 and C3 * 10^scale; the exponent is e4 | |
338 if (p_sign == z_sign) { // R256 = C4 + R256 | |
339 // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact, | |
340 // but may require rounding | |
341 add256 (C4, R256, &R256); | |
342 } else { // if (p_sign != z_sign) { // R256 = C4 - R256 | |
343 // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or | |
344 // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact, | |
345 // but may require rounding | |
346 | |
347 // compare first R256 = C3 * 10^scale and C4 | |
348 if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) || | |
349 (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) || | |
350 (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] && | |
351 R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4 | |
352 // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact, | |
353 // but may require rounding | |
354 sub256 (R256, C4, &R256); | |
355 // flip p_sign too, because the result has the sign of z | |
356 p_sign = z_sign; | |
357 } else { // if C4 > C3 * 10^scale | |
358 // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact, | |
359 // but may require rounding | |
360 sub256 (C4, R256, &R256); | |
361 } | |
362 // if the result is pure zero, the sign depends on the rounding mode | |
363 // (x*y and z had opposite signs) | |
364 if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull && | |
365 R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) { | |
366 if (rnd_mode != ROUNDING_DOWN) | |
367 p_sign = 0x0000000000000000ull; | |
368 else | |
369 p_sign = 0x8000000000000000ull; | |
370 // the exponent is max (e4, expmin) | |
371 if (e4 < -6176) | |
372 e4 = expmin; | |
373 // assemble result | |
374 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49); | |
375 res.w[0] = 0x0; | |
376 *ptrres = res; | |
377 return; | |
378 } | |
379 } | |
380 | |
381 // determine the number of decimal digits in R256 | |
382 ind = nr_digits256 (R256); | |
383 | |
384 // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind; | |
385 // round to the destination precision, with unbounded exponent | |
386 | |
387 if (ind <= p34) { | |
388 // result rounded to the destination precision with unbounded exponent | |
389 // is exact | |
390 if (ind + e4 < p34 + expmin) { | |
391 is_tiny = 1; // applies to all rounding modes | |
392 } | |
393 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1]; | |
394 res.w[0] = R256.w[0]; | |
395 // Note: res is correct only if expmin <= e4 <= expmax | |
396 } else { // if (ind > p34) | |
397 // if more than P digits, round to nearest to P digits | |
398 // round R256 to p34 digits | |
399 x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68 | |
400 if (ind <= 38) { | |
401 P128.w[1] = R256.w[1]; | |
402 P128.w[0] = R256.w[0]; | |
403 round128_19_38 (ind, x0, P128, &R128, &incr_exp, | |
404 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
405 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); | |
406 } else if (ind <= 57) { | |
407 P192.w[2] = R256.w[2]; | |
408 P192.w[1] = R256.w[1]; | |
409 P192.w[0] = R256.w[0]; | |
410 round192_39_57 (ind, x0, P192, &R192, &incr_exp, | |
411 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
412 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); | |
413 R128.w[1] = R192.w[1]; | |
414 R128.w[0] = R192.w[0]; | |
415 } else { // if (ind <= 68) | |
416 round256_58_76 (ind, x0, R256, &R256, &incr_exp, | |
417 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
418 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); | |
419 R128.w[1] = R256.w[1]; | |
420 R128.w[0] = R256.w[0]; | |
421 } | |
422 // the rounded result has p34 = 34 digits | |
423 e4 = e4 + x0 + incr_exp; | |
424 if (rnd_mode == ROUNDING_TO_NEAREST) { | |
425 if (e4 < expmin) { | |
426 is_tiny = 1; // for other rounding modes apply correction | |
427 } | |
428 } else { | |
429 // for RM, RP, RZ, RA apply correction in order to determine tininess | |
430 // but do not save the result; apply the correction to | |
431 // (-1)^p_sign * significand * 10^0 | |
432 P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1]; | |
433 P128.w[0] = R128.w[0]; | |
434 rounding_correction (rnd_mode, | |
435 is_inexact_lt_midpoint, | |
436 is_inexact_gt_midpoint, is_midpoint_lt_even, | |
437 is_midpoint_gt_even, 0, &P128, ptrfpsf); | |
438 scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1 | |
439 // the number of digits in the significand is p34 = 34 | |
440 if (e4 + scale < expmin) { | |
441 is_tiny = 1; | |
442 } | |
443 } | |
444 ind = p34; // the number of decimal digits in the signifcand of res | |
445 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN | |
446 res.w[0] = R128.w[0]; | |
447 // Note: res is correct only if expmin <= e4 <= expmax | |
448 // set the inexact flag after rounding with bounded exponent, if any | |
449 } | |
450 // at this point we have the result rounded with unbounded exponent in | |
451 // res and we know its tininess: | |
452 // res = (-1)^p_sign * significand * 10^e4, | |
453 // where q (significand) = ind <= p34 | |
454 // Note: res is correct only if expmin <= e4 <= expmax | |
455 | |
456 // check for overflow if RN | |
457 if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) { | |
458 res.w[1] = p_sign | 0x7800000000000000ull; | |
459 res.w[0] = 0x0000000000000000ull; | |
460 *ptrres = res; | |
461 *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
462 return; // BID_RETURN (res) | |
463 } // else not overflow or not RN, so continue | |
464 | |
465 // if (e4 >= expmin) we have the result rounded with bounded exponent | |
466 if (e4 < expmin) { | |
467 x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res | |
468 // where the result rounded [at most] once is | |
469 // (-1)^p_sign * significand_res * 10^e4 | |
470 | |
471 // avoid double rounding error | |
472 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; | |
473 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; | |
474 is_midpoint_lt_even0 = is_midpoint_lt_even; | |
475 is_midpoint_gt_even0 = is_midpoint_gt_even; | |
476 is_inexact_lt_midpoint = 0; | |
477 is_inexact_gt_midpoint = 0; | |
478 is_midpoint_lt_even = 0; | |
479 is_midpoint_gt_even = 0; | |
480 | |
481 if (x0 > ind) { | |
482 // nothing is left of res when moving the decimal point left x0 digits | |
483 is_inexact_lt_midpoint = 1; | |
484 res.w[1] = p_sign | 0x0000000000000000ull; | |
485 res.w[0] = 0x0000000000000000ull; | |
486 e4 = expmin; | |
487 } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34 | |
488 // this is <, =, or > 1/2 ulp | |
489 // compare the ind-digit value in the significand of res with | |
490 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is | |
491 // less than, equal to, or greater than 1/2 ulp (significand of res) | |
492 R128.w[1] = res.w[1] & MASK_COEFF; | |
493 R128.w[0] = res.w[0]; | |
494 if (ind <= 19) { | |
495 if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp | |
496 lt_half_ulp = 1; | |
497 is_inexact_lt_midpoint = 1; | |
498 } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp | |
499 eq_half_ulp = 1; | |
500 is_midpoint_gt_even = 1; | |
501 } else { // > 1/2 ulp | |
502 // gt_half_ulp = 1; | |
503 is_inexact_gt_midpoint = 1; | |
504 } | |
505 } else { // if (ind <= 38) { | |
506 if (R128.w[1] < midpoint128[ind - 20].w[1] || | |
507 (R128.w[1] == midpoint128[ind - 20].w[1] && | |
508 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp | |
509 lt_half_ulp = 1; | |
510 is_inexact_lt_midpoint = 1; | |
511 } else if (R128.w[1] == midpoint128[ind - 20].w[1] && | |
512 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp | |
513 eq_half_ulp = 1; | |
514 is_midpoint_gt_even = 1; | |
515 } else { // > 1/2 ulp | |
516 // gt_half_ulp = 1; | |
517 is_inexact_gt_midpoint = 1; | |
518 } | |
519 } | |
520 if (lt_half_ulp || eq_half_ulp) { | |
521 // res = +0.0 * 10^expmin | |
522 res.w[1] = 0x0000000000000000ull; | |
523 res.w[0] = 0x0000000000000000ull; | |
524 } else { // if (gt_half_ulp) | |
525 // res = +1 * 10^expmin | |
526 res.w[1] = 0x0000000000000000ull; | |
527 res.w[0] = 0x0000000000000001ull; | |
528 } | |
529 res.w[1] = p_sign | res.w[1]; | |
530 e4 = expmin; | |
531 } else { // if (1 <= x0 <= ind - 1 <= 33) | |
532 // round the ind-digit result to ind - x0 digits | |
533 | |
534 if (ind <= 18) { // 2 <= ind <= 18 | |
535 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, | |
536 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
537 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); | |
538 res.w[1] = 0x0; | |
539 res.w[0] = R64; | |
540 } else if (ind <= 38) { | |
541 P128.w[1] = res.w[1] & MASK_COEFF; | |
542 P128.w[0] = res.w[0]; | |
543 round128_19_38 (ind, x0, P128, &res, &incr_exp, | |
544 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
545 &is_inexact_lt_midpoint, | |
546 &is_inexact_gt_midpoint); | |
547 } | |
548 e4 = e4 + x0; // expmin | |
549 // we want the exponent to be expmin, so if incr_exp = 1 then | |
550 // multiply the rounded result by 10 - it will still fit in 113 bits | |
551 if (incr_exp) { | |
552 // 64 x 128 -> 128 | |
553 P128.w[1] = res.w[1] & MASK_COEFF; | |
554 P128.w[0] = res.w[0]; | |
555 __mul_64x128_to_128 (res, ten2k64[1], P128); | |
556 } | |
557 res.w[1] = | |
558 p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF); | |
559 // avoid a double rounding error | |
560 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && | |
561 is_midpoint_lt_even) { // double rounding error upward | |
562 // res = res - 1 | |
563 res.w[0]--; | |
564 if (res.w[0] == 0xffffffffffffffffull) | |
565 res.w[1]--; | |
566 // Note: a double rounding error upward is not possible; for this | |
567 // the result after the first rounding would have to be 99...95 | |
568 // (35 digits in all), possibly followed by a number of zeros; this | |
569 // is not possible in Cases (2)-(6) or (15)-(17) which may get here | |
570 is_midpoint_lt_even = 0; | |
571 is_inexact_lt_midpoint = 1; | |
572 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && | |
573 is_midpoint_gt_even) { // double rounding error downward | |
574 // res = res + 1 | |
575 res.w[0]++; | |
576 if (res.w[0] == 0) | |
577 res.w[1]++; | |
578 is_midpoint_gt_even = 0; | |
579 is_inexact_gt_midpoint = 1; | |
580 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
581 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { | |
582 // if this second rounding was exact the result may still be | |
583 // inexact because of the first rounding | |
584 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { | |
585 is_inexact_gt_midpoint = 1; | |
586 } | |
587 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { | |
588 is_inexact_lt_midpoint = 1; | |
589 } | |
590 } else if (is_midpoint_gt_even && | |
591 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { | |
592 // pulled up to a midpoint | |
593 is_inexact_lt_midpoint = 1; | |
594 is_inexact_gt_midpoint = 0; | |
595 is_midpoint_lt_even = 0; | |
596 is_midpoint_gt_even = 0; | |
597 } else if (is_midpoint_lt_even && | |
598 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { | |
599 // pulled down to a midpoint | |
600 is_inexact_lt_midpoint = 0; | |
601 is_inexact_gt_midpoint = 1; | |
602 is_midpoint_lt_even = 0; | |
603 is_midpoint_gt_even = 0; | |
604 } else { | |
605 ; | |
606 } | |
607 } | |
608 } | |
609 // res contains the correct result | |
610 // apply correction if not rounding to nearest | |
611 if (rnd_mode != ROUNDING_TO_NEAREST) { | |
612 rounding_correction (rnd_mode, | |
613 is_inexact_lt_midpoint, is_inexact_gt_midpoint, | |
614 is_midpoint_lt_even, is_midpoint_gt_even, | |
615 e4, &res, ptrfpsf); | |
616 } | |
617 if (is_midpoint_lt_even || is_midpoint_gt_even || | |
618 is_inexact_lt_midpoint || is_inexact_gt_midpoint) { | |
619 // set the inexact flag | |
620 *ptrfpsf |= INEXACT_EXCEPTION; | |
621 if (is_tiny) | |
622 *ptrfpsf |= UNDERFLOW_EXCEPTION; | |
623 } | |
624 | |
625 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
626 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
627 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
628 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
629 *ptrres = res; | |
630 return; | |
631 } | |
632 | |
633 | |
634 #if DECIMAL_CALL_BY_REFERENCE | |
635 static void | |
636 bid128_ext_fma (int *ptr_is_midpoint_lt_even, | |
637 int *ptr_is_midpoint_gt_even, | |
638 int *ptr_is_inexact_lt_midpoint, | |
639 int *ptr_is_inexact_gt_midpoint, UINT128 * pres, | |
640 UINT128 * px, UINT128 * py, | |
641 UINT128 * | |
642 pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
643 _EXC_INFO_PARAM) { | |
644 UINT128 x = *px, y = *py, z = *pz; | |
645 #if !DECIMAL_GLOBAL_ROUNDING | |
646 unsigned int rnd_mode = *prnd_mode; | |
647 #endif | |
648 #else | |
649 static UINT128 | |
650 bid128_ext_fma (int *ptr_is_midpoint_lt_even, | |
651 int *ptr_is_midpoint_gt_even, | |
652 int *ptr_is_inexact_lt_midpoint, | |
653 int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y, | |
654 UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM | |
655 _EXC_MASKS_PARAM _EXC_INFO_PARAM) { | |
656 #endif | |
657 | |
658 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
659 UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign; | |
660 UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp; | |
661 int true_p_exp; | |
662 UINT128 C1, C2, C3; | |
663 UINT256 C4; | |
664 int q1 = 0, q2 = 0, q3 = 0, q4; | |
665 int e1, e2, e3, e4; | |
666 int scale, ind, delta, x0; | |
667 int p34 = P34; // used to modify the limit on the number of digits | |
668 BID_UI64DOUBLE tmp; | |
669 int x_nr_bits, y_nr_bits, z_nr_bits; | |
670 unsigned int save_fpsf; | |
671 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0; | |
672 int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; | |
673 int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0; | |
674 int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0; | |
675 int incr_exp = 0; | |
676 int lsb; | |
677 int lt_half_ulp = 0; | |
678 int eq_half_ulp = 0; | |
679 int gt_half_ulp = 0; | |
680 int is_tiny = 0; | |
681 UINT64 R64, tmp64; | |
682 UINT128 P128, R128; | |
683 UINT192 P192, R192; | |
684 UINT256 R256; | |
685 | |
686 // the following are based on the table of special cases for fma; the NaN | |
687 // behavior is similar to that of the IA-64 Architecture fma | |
688 | |
689 // identify cases where at least one operand is NaN | |
690 | |
691 BID_SWAP128 (x); | |
692 BID_SWAP128 (y); | |
693 BID_SWAP128 (z); | |
694 if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN | |
695 // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y) | |
696 // check first for non-canonical NaN payload | |
697 if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || | |
698 (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && | |
699 (y.w[0] > 0x38c15b09ffffffffull))) { | |
700 y.w[1] = y.w[1] & 0xffffc00000000000ull; | |
701 y.w[0] = 0x0ull; | |
702 } | |
703 if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN | |
704 // set invalid flag | |
705 *pfpsf |= INVALID_EXCEPTION; | |
706 // return quiet (y) | |
707 res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] | |
708 res.w[0] = y.w[0]; | |
709 } else { // y is QNaN | |
710 // return y | |
711 res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] | |
712 res.w[0] = y.w[0]; | |
713 // if z = SNaN or x = SNaN signal invalid exception | |
714 if ((z.w[1] & MASK_SNAN) == MASK_SNAN || | |
715 (x.w[1] & MASK_SNAN) == MASK_SNAN) { | |
716 // set invalid flag | |
717 *pfpsf |= INVALID_EXCEPTION; | |
718 } | |
719 } | |
720 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
721 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
722 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
723 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
724 BID_SWAP128 (res); | |
725 BID_RETURN (res) | |
726 } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN | |
727 // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z) | |
728 // check first for non-canonical NaN payload | |
729 if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || | |
730 (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && | |
731 (z.w[0] > 0x38c15b09ffffffffull))) { | |
732 z.w[1] = z.w[1] & 0xffffc00000000000ull; | |
733 z.w[0] = 0x0ull; | |
734 } | |
735 if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN | |
736 // set invalid flag | |
737 *pfpsf |= INVALID_EXCEPTION; | |
738 // return quiet (z) | |
739 res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] | |
740 res.w[0] = z.w[0]; | |
741 } else { // z is QNaN | |
742 // return z | |
743 res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] | |
744 res.w[0] = z.w[0]; | |
745 // if x = SNaN signal invalid exception | |
746 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { | |
747 // set invalid flag | |
748 *pfpsf |= INVALID_EXCEPTION; | |
749 } | |
750 } | |
751 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
752 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
753 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
754 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
755 BID_SWAP128 (res); | |
756 BID_RETURN (res) | |
757 } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN | |
758 // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x) | |
759 // check first for non-canonical NaN payload | |
760 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || | |
761 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) && | |
762 (x.w[0] > 0x38c15b09ffffffffull))) { | |
763 x.w[1] = x.w[1] & 0xffffc00000000000ull; | |
764 x.w[0] = 0x0ull; | |
765 } | |
766 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN | |
767 // set invalid flag | |
768 *pfpsf |= INVALID_EXCEPTION; | |
769 // return quiet (x) | |
770 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16] | |
771 res.w[0] = x.w[0]; | |
772 } else { // x is QNaN | |
773 // return x | |
774 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16] | |
775 res.w[0] = x.w[0]; | |
776 } | |
777 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
778 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
779 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
780 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
781 BID_SWAP128 (res); | |
782 BID_RETURN (res) | |
783 } | |
784 // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check | |
785 // for non-canonical values | |
786 | |
787 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative | |
788 C1.w[1] = x.w[1] & MASK_COEFF; | |
789 C1.w[0] = x.w[0]; | |
790 if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf | |
791 // if x is not infinity check for non-canonical values - treated as zero | |
792 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 | |
793 // non-canonical | |
794 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits | |
795 C1.w[1] = 0; // significand high | |
796 C1.w[0] = 0; // significand low | |
797 } else { // G0_G1 != 11 | |
798 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits | |
799 if (C1.w[1] > 0x0001ed09bead87c0ull || | |
800 (C1.w[1] == 0x0001ed09bead87c0ull && | |
801 C1.w[0] > 0x378d8e63ffffffffull)) { | |
802 // x is non-canonical if coefficient is larger than 10^34 -1 | |
803 C1.w[1] = 0; | |
804 C1.w[0] = 0; | |
805 } else { // canonical | |
806 ; | |
807 } | |
808 } | |
809 } | |
810 y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative | |
811 C2.w[1] = y.w[1] & MASK_COEFF; | |
812 C2.w[0] = y.w[0]; | |
813 if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf | |
814 // if y is not infinity check for non-canonical values - treated as zero | |
815 if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 | |
816 // non-canonical | |
817 y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits | |
818 C2.w[1] = 0; // significand high | |
819 C2.w[0] = 0; // significand low | |
820 } else { // G0_G1 != 11 | |
821 y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits | |
822 if (C2.w[1] > 0x0001ed09bead87c0ull || | |
823 (C2.w[1] == 0x0001ed09bead87c0ull && | |
824 C2.w[0] > 0x378d8e63ffffffffull)) { | |
825 // y is non-canonical if coefficient is larger than 10^34 -1 | |
826 C2.w[1] = 0; | |
827 C2.w[0] = 0; | |
828 } else { // canonical | |
829 ; | |
830 } | |
831 } | |
832 } | |
833 z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative | |
834 C3.w[1] = z.w[1] & MASK_COEFF; | |
835 C3.w[0] = z.w[0]; | |
836 if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf | |
837 // if z is not infinity check for non-canonical values - treated as zero | |
838 if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11 | |
839 // non-canonical | |
840 z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits | |
841 C3.w[1] = 0; // significand high | |
842 C3.w[0] = 0; // significand low | |
843 } else { // G0_G1 != 11 | |
844 z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits | |
845 if (C3.w[1] > 0x0001ed09bead87c0ull || | |
846 (C3.w[1] == 0x0001ed09bead87c0ull && | |
847 C3.w[0] > 0x378d8e63ffffffffull)) { | |
848 // z is non-canonical if coefficient is larger than 10^34 -1 | |
849 C3.w[1] = 0; | |
850 C3.w[0] = 0; | |
851 } else { // canonical | |
852 ; | |
853 } | |
854 } | |
855 } | |
856 | |
857 p_sign = x_sign ^ y_sign; // sign of the product | |
858 | |
859 // identify cases where at least one operand is infinity | |
860 | |
861 if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf | |
862 if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf | |
863 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf | |
864 if (p_sign == z_sign) { | |
865 res.w[1] = z_sign | MASK_INF; | |
866 res.w[0] = 0x0; | |
867 } else { | |
868 // return QNaN Indefinite | |
869 res.w[1] = 0x7c00000000000000ull; | |
870 res.w[0] = 0x0000000000000000ull; | |
871 // set invalid flag | |
872 *pfpsf |= INVALID_EXCEPTION; | |
873 } | |
874 } else { // z = 0 or z = f | |
875 res.w[1] = p_sign | MASK_INF; | |
876 res.w[0] = 0x0; | |
877 } | |
878 } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f | |
879 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf | |
880 if (p_sign == z_sign) { | |
881 res.w[1] = z_sign | MASK_INF; | |
882 res.w[0] = 0x0; | |
883 } else { | |
884 // return QNaN Indefinite | |
885 res.w[1] = 0x7c00000000000000ull; | |
886 res.w[0] = 0x0000000000000000ull; | |
887 // set invalid flag | |
888 *pfpsf |= INVALID_EXCEPTION; | |
889 } | |
890 } else { // z = 0 or z = f | |
891 res.w[1] = p_sign | MASK_INF; | |
892 res.w[0] = 0x0; | |
893 } | |
894 } else { // y = 0 | |
895 // return QNaN Indefinite | |
896 res.w[1] = 0x7c00000000000000ull; | |
897 res.w[0] = 0x0000000000000000ull; | |
898 // set invalid flag | |
899 *pfpsf |= INVALID_EXCEPTION; | |
900 } | |
901 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
902 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
903 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
904 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
905 BID_SWAP128 (res); | |
906 BID_RETURN (res) | |
907 } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf | |
908 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf | |
909 // x = f, necessarily | |
910 if ((p_sign != z_sign) | |
911 || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) { | |
912 // return QNaN Indefinite | |
913 res.w[1] = 0x7c00000000000000ull; | |
914 res.w[0] = 0x0000000000000000ull; | |
915 // set invalid flag | |
916 *pfpsf |= INVALID_EXCEPTION; | |
917 } else { | |
918 res.w[1] = z_sign | MASK_INF; | |
919 res.w[0] = 0x0; | |
920 } | |
921 } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0 | |
922 // z = 0, f, inf | |
923 // return QNaN Indefinite | |
924 res.w[1] = 0x7c00000000000000ull; | |
925 res.w[0] = 0x0000000000000000ull; | |
926 // set invalid flag | |
927 *pfpsf |= INVALID_EXCEPTION; | |
928 } else { | |
929 // x = f and z = 0, f, necessarily | |
930 res.w[1] = p_sign | MASK_INF; | |
931 res.w[0] = 0x0; | |
932 } | |
933 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
934 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
935 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
936 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
937 BID_SWAP128 (res); | |
938 BID_RETURN (res) | |
939 } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf | |
940 // x = 0, f and y = 0, f, necessarily | |
941 res.w[1] = z_sign | MASK_INF; | |
942 res.w[0] = 0x0; | |
943 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
944 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
945 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
946 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
947 BID_SWAP128 (res); | |
948 BID_RETURN (res) | |
949 } | |
950 | |
951 true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176; | |
952 if (true_p_exp < -6176) | |
953 p_exp = 0; // cannot be less than EXP_MIN | |
954 else | |
955 p_exp = (UINT64) (true_p_exp + 6176) << 49; | |
956 | |
957 if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0 | |
958 // the result is 0 | |
959 if (p_exp < z_exp) | |
960 res.w[1] = p_exp; // preferred exponent | |
961 else | |
962 res.w[1] = z_exp; // preferred exponent | |
963 if (p_sign == z_sign) { | |
964 res.w[1] |= z_sign; | |
965 res.w[0] = 0x0; | |
966 } else { // x * y and z have opposite signs | |
967 if (rnd_mode == ROUNDING_DOWN) { | |
968 // res = -0.0 | |
969 res.w[1] |= MASK_SIGN; | |
970 res.w[0] = 0x0; | |
971 } else { | |
972 // res = +0.0 | |
973 // res.w[1] |= 0x0; | |
974 res.w[0] = 0x0; | |
975 } | |
976 } | |
977 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
978 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
979 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
980 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
981 BID_SWAP128 (res); | |
982 BID_RETURN (res) | |
983 } | |
984 // from this point on, we may need to know the number of decimal digits | |
985 // in the significands of x, y, z when x, y, z != 0 | |
986 | |
987 if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite) | |
988 // q1 = nr. of decimal digits in x | |
989 // determine first the nr. of bits in x | |
990 if (C1.w[1] == 0) { | |
991 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53 | |
992 // split the 64-bit value in two 32-bit halves to avoid rounding errors | |
993 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32 | |
994 tmp.d = (double) (C1.w[0] >> 32); // exact conversion | |
995 x_nr_bits = | |
996 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
997 } else { // x < 2^32 | |
998 tmp.d = (double) (C1.w[0]); // exact conversion | |
999 x_nr_bits = | |
1000 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1001 } | |
1002 } else { // if x < 2^53 | |
1003 tmp.d = (double) C1.w[0]; // exact conversion | |
1004 x_nr_bits = | |
1005 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1006 } | |
1007 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1]) | |
1008 tmp.d = (double) C1.w[1]; // exact conversion | |
1009 x_nr_bits = | |
1010 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1011 } | |
1012 q1 = nr_digits[x_nr_bits - 1].digits; | |
1013 if (q1 == 0) { | |
1014 q1 = nr_digits[x_nr_bits - 1].digits1; | |
1015 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi || | |
1016 (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi && | |
1017 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo)) | |
1018 q1++; | |
1019 } | |
1020 } | |
1021 | |
1022 if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite) | |
1023 if (C2.w[1] == 0) { | |
1024 if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53 | |
1025 // split the 64-bit value in two 32-bit halves to avoid rounding errors | |
1026 if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32 | |
1027 tmp.d = (double) (C2.w[0] >> 32); // exact conversion | |
1028 y_nr_bits = | |
1029 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1030 } else { // y < 2^32 | |
1031 tmp.d = (double) C2.w[0]; // exact conversion | |
1032 y_nr_bits = | |
1033 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1034 } | |
1035 } else { // if y < 2^53 | |
1036 tmp.d = (double) C2.w[0]; // exact conversion | |
1037 y_nr_bits = | |
1038 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1039 } | |
1040 } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1]) | |
1041 tmp.d = (double) C2.w[1]; // exact conversion | |
1042 y_nr_bits = | |
1043 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1044 } | |
1045 | |
1046 q2 = nr_digits[y_nr_bits].digits; | |
1047 if (q2 == 0) { | |
1048 q2 = nr_digits[y_nr_bits].digits1; | |
1049 if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi || | |
1050 (C2.w[1] == nr_digits[y_nr_bits].threshold_hi && | |
1051 C2.w[0] >= nr_digits[y_nr_bits].threshold_lo)) | |
1052 q2++; | |
1053 } | |
1054 } | |
1055 | |
1056 if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite) | |
1057 if (C3.w[1] == 0) { | |
1058 if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53 | |
1059 // split the 64-bit value in two 32-bit halves to avoid rounding errors | |
1060 if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32 | |
1061 tmp.d = (double) (C3.w[0] >> 32); // exact conversion | |
1062 z_nr_bits = | |
1063 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1064 } else { // z < 2^32 | |
1065 tmp.d = (double) C3.w[0]; // exact conversion | |
1066 z_nr_bits = | |
1067 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1068 } | |
1069 } else { // if z < 2^53 | |
1070 tmp.d = (double) C3.w[0]; // exact conversion | |
1071 z_nr_bits = | |
1072 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1073 } | |
1074 } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1]) | |
1075 tmp.d = (double) C3.w[1]; // exact conversion | |
1076 z_nr_bits = | |
1077 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
1078 } | |
1079 | |
1080 q3 = nr_digits[z_nr_bits].digits; | |
1081 if (q3 == 0) { | |
1082 q3 = nr_digits[z_nr_bits].digits1; | |
1083 if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi || | |
1084 (C3.w[1] == nr_digits[z_nr_bits].threshold_hi && | |
1085 C3.w[0] >= nr_digits[z_nr_bits].threshold_lo)) | |
1086 q3++; | |
1087 } | |
1088 } | |
1089 | |
1090 if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) || | |
1091 (C2.w[1] == 0x0 && C2.w[0] == 0x0)) { | |
1092 // x = 0 or y = 0 | |
1093 // z = f, necessarily; for 0 + z return z, with the preferred exponent | |
1094 // the result is z, but need to get the preferred exponent | |
1095 if (z_exp <= p_exp) { // the preferred exponent is z_exp | |
1096 res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1]; | |
1097 res.w[0] = C3.w[0]; | |
1098 } else { // if (p_exp < z_exp) the preferred exponent is p_exp | |
1099 // return (C3 * 10^scale) * 10^(z_exp - scale) | |
1100 // where scale = min (p34-q3, (z_exp-p_exp) >> 49) | |
1101 scale = p34 - q3; | |
1102 ind = (z_exp - p_exp) >> 49; | |
1103 if (ind < scale) | |
1104 scale = ind; | |
1105 if (scale == 0) { | |
1106 res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant | |
1107 res.w[0] = z.w[0]; | |
1108 } else if (q3 <= 19) { // z fits in 64 bits | |
1109 if (scale <= 19) { // 10^scale fits in 64 bits | |
1110 // 64 x 64 C3.w[0] * ten2k64[scale] | |
1111 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); | |
1112 } else { // 10^scale fits in 128 bits | |
1113 // 64 x 128 C3.w[0] * ten2k128[scale - 20] | |
1114 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); | |
1115 } | |
1116 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits | |
1117 // 64 x 128 ten2k64[scale] * C3 | |
1118 __mul_128x64_to_128 (res, ten2k64[scale], C3); | |
1119 } | |
1120 // subtract scale from the exponent | |
1121 z_exp = z_exp - ((UINT64) scale << 49); | |
1122 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
1123 } | |
1124 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
1125 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
1126 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
1127 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
1128 BID_SWAP128 (res); | |
1129 BID_RETURN (res) | |
1130 } else { | |
1131 ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f | |
1132 } | |
1133 | |
1134 e1 = (x_exp >> 49) - 6176; // unbiased exponent of x | |
1135 e2 = (y_exp >> 49) - 6176; // unbiased exponent of y | |
1136 e3 = (z_exp >> 49) - 6176; // unbiased exponent of z | |
1137 e4 = e1 + e2; // unbiased exponent of the exact x * y | |
1138 | |
1139 // calculate C1 * C2 and its number of decimal digits, q4 | |
1140 | |
1141 // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits | |
1142 // where 2 <= q1 + q2 <= 68 | |
1143 // calculate C4 = C1 * C2 and determine q | |
1144 C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0; | |
1145 if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits | |
1146 C4.w[0] = C1.w[0] * C2.w[0]; | |
1147 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 | |
1148 if (C4.w[0] < ten2k64[q1 + q2 - 1]) | |
1149 q4 = q1 + q2 - 1; // q4 in [1, 18] | |
1150 else | |
1151 q4 = q1 + q2; // q4 in [2, 19] | |
1152 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; | |
1153 } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits | |
1154 // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits | |
1155 __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]); | |
1156 // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20 | |
1157 if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1 | |
1158 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; | |
1159 q4 = 19; // 19 = q1 + q2 - 1 | |
1160 } else { | |
1161 // if (C4.w[1] == 0) | |
1162 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; | |
1163 // else | |
1164 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; | |
1165 q4 = 20; // 20 = q1 + q2 | |
1166 } | |
1167 } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38 | |
1168 // C4 = C1 * C2 fits in 64 or 128 bits | |
1169 // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits) | |
1170 // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits | |
1171 if (q1 <= 19) { | |
1172 __mul_128x64_to_128 (C4, C1.w[0], C2); | |
1173 } else { // q2 <= 19 | |
1174 __mul_128x64_to_128 (C4, C2.w[0], C1); | |
1175 } | |
1176 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 | |
1177 if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] || | |
1178 (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] && | |
1179 C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) { | |
1180 // if (C4.w[1] == 0) // q4 = 20, necessarily | |
1181 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64; | |
1182 // else | |
1183 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; | |
1184 q4 = q1 + q2 - 1; // q4 in [20, 37] | |
1185 } else { | |
1186 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; | |
1187 q4 = q1 + q2; // q4 in [21, 38] | |
1188 } | |
1189 } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits | |
1190 // both C1 and C2 fit in 128 bits (actually in 113 bits) | |
1191 // may replace this by 128x128_to192 | |
1192 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0 | |
1193 // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39 | |
1194 if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] || | |
1195 (C4.w[1] == ten2k128[18].w[1] | |
1196 && C4.w[0] < ten2k128[18].w[0]))) { | |
1197 // 18 = 38 - 20 = q1+q2-1 - 20 | |
1198 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; | |
1199 q4 = 38; // 38 = q1 + q2 - 1 | |
1200 } else { | |
1201 // if (C4.w[2] == 0) | |
1202 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; | |
1203 // else | |
1204 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; | |
1205 q4 = 39; // 39 = q1 + q2 | |
1206 } | |
1207 } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57 | |
1208 // C4 = C1 * C2 fits in 128 or 192 bits | |
1209 // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits) | |
1210 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one | |
1211 // may fit in 64 bits | |
1212 if (C1.w[1] == 0) { // C1 fits in 64 bits | |
1213 // __mul_64x128_full (REShi64, RESlo128, A64, B128) | |
1214 __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); | |
1215 } else if (C2.w[1] == 0) { // C2 fits in 64 bits | |
1216 // __mul_64x128_full (REShi64, RESlo128, A64, B128) | |
1217 __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); | |
1218 } else { // both C1 and C2 require 128 bits | |
1219 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1); | |
1220 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 | |
1221 } | |
1222 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 | |
1223 if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] || | |
1224 (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] && | |
1225 (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] || | |
1226 (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] && | |
1227 C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) { | |
1228 // if (C4.w[2] == 0) // q4 = 39, necessarily | |
1229 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128; | |
1230 // else | |
1231 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; | |
1232 q4 = q1 + q2 - 1; // q4 in [39, 56] | |
1233 } else { | |
1234 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; | |
1235 q4 = q1 + q2; // q4 in [40, 57] | |
1236 } | |
1237 } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits | |
1238 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one | |
1239 // may fit in 64 bits | |
1240 if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits | |
1241 __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192 | |
1242 } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits | |
1243 __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192 | |
1244 } else { // C1 * C2 will fit in 192 bits or in 256 bits | |
1245 __mul_128x128_to_256 (C4, C1, C2); | |
1246 } | |
1247 // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58 | |
1248 if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] || | |
1249 (C4.w[2] == ten2k256[18].w[2] | |
1250 && (C4.w[1] < ten2k256[18].w[1] | |
1251 || (C4.w[1] == ten2k256[18].w[1] | |
1252 && C4.w[0] < ten2k256[18].w[0]))))) { | |
1253 // 18 = 57 - 39 = q1+q2-1 - 39 | |
1254 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; | |
1255 q4 = 57; // 57 = q1 + q2 - 1 | |
1256 } else { | |
1257 // if (C4.w[3] == 0) | |
1258 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; | |
1259 // else | |
1260 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; | |
1261 q4 = 58; // 58 = q1 + q2 | |
1262 } | |
1263 } else { // if 59 <= q1 + q2 <= 68 | |
1264 // C4 = C1 * C2 fits in 192 or 256 bits | |
1265 // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits) | |
1266 // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in | |
1267 // 64 bits | |
1268 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1); | |
1269 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0 | |
1270 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2 | |
1271 if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] || | |
1272 (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] && | |
1273 (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] || | |
1274 (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] && | |
1275 (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] || | |
1276 (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] && | |
1277 C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) { | |
1278 // if (C4.w[3] == 0) // q4 = 58, necessarily | |
1279 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192; | |
1280 // else | |
1281 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; | |
1282 q4 = q1 + q2 - 1; // q4 in [58, 67] | |
1283 } else { | |
1284 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256; | |
1285 q4 = q1 + q2; // q4 in [59, 68] | |
1286 } | |
1287 } | |
1288 | |
1289 if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0 | |
1290 save_fpsf = *pfpsf; // sticky bits - caller value must be preserved | |
1291 *pfpsf = 0; | |
1292 | |
1293 if (q4 > p34) { | |
1294 | |
1295 // truncate C4 to p34 digits into res | |
1296 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68 | |
1297 x0 = q4 - p34; | |
1298 if (q4 <= 38) { | |
1299 P128.w[1] = C4.w[1]; | |
1300 P128.w[0] = C4.w[0]; | |
1301 round128_19_38 (q4, x0, P128, &res, &incr_exp, | |
1302 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
1303 &is_inexact_lt_midpoint, | |
1304 &is_inexact_gt_midpoint); | |
1305 } else if (q4 <= 57) { // 35 <= q4 <= 57 | |
1306 P192.w[2] = C4.w[2]; | |
1307 P192.w[1] = C4.w[1]; | |
1308 P192.w[0] = C4.w[0]; | |
1309 round192_39_57 (q4, x0, P192, &R192, &incr_exp, | |
1310 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
1311 &is_inexact_lt_midpoint, | |
1312 &is_inexact_gt_midpoint); | |
1313 res.w[0] = R192.w[0]; | |
1314 res.w[1] = R192.w[1]; | |
1315 } else { // if (q4 <= 68) | |
1316 round256_58_76 (q4, x0, C4, &R256, &incr_exp, | |
1317 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
1318 &is_inexact_lt_midpoint, | |
1319 &is_inexact_gt_midpoint); | |
1320 res.w[0] = R256.w[0]; | |
1321 res.w[1] = R256.w[1]; | |
1322 } | |
1323 e4 = e4 + x0; | |
1324 if (incr_exp) { | |
1325 e4 = e4 + 1; | |
1326 } | |
1327 q4 = p34; | |
1328 // res is now the coefficient of the result rounded to the destination | |
1329 // precision, with unbounded exponent; the exponent is e4; q4=digits(res) | |
1330 } else { // if (q4 <= p34) | |
1331 // C4 * 10^e4 is the result rounded to the destination precision, with | |
1332 // unbounded exponent (which is exact) | |
1333 | |
1334 if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) { | |
1335 // e4 is too large, but can be brought within range by scaling up C4 | |
1336 scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2 | |
1337 // res = (C4 * 10^scale) * 10^expmax | |
1338 if (q4 <= 19) { // C4 fits in 64 bits | |
1339 if (scale <= 19) { // 10^scale fits in 64 bits | |
1340 // 64 x 64 C4.w[0] * ten2k64[scale] | |
1341 __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]); | |
1342 } else { // 10^scale fits in 128 bits | |
1343 // 64 x 128 C4.w[0] * ten2k128[scale - 20] | |
1344 __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]); | |
1345 } | |
1346 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits | |
1347 // 64 x 128 ten2k64[scale] * CC43 | |
1348 __mul_128x64_to_128 (res, ten2k64[scale], C4); | |
1349 } | |
1350 e4 = e4 - scale; // expmax | |
1351 q4 = q4 + scale; | |
1352 } else { | |
1353 res.w[1] = C4.w[1]; | |
1354 res.w[0] = C4.w[0]; | |
1355 } | |
1356 // res is the coefficient of the result rounded to the destination | |
1357 // precision, with unbounded exponent (it has q4 digits); the exponent | |
1358 // is e4 (exact result) | |
1359 } | |
1360 | |
1361 // check for overflow | |
1362 if (q4 + e4 > p34 + expmax) { | |
1363 if (rnd_mode == ROUNDING_TO_NEAREST) { | |
1364 res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf | |
1365 res.w[0] = 0x0000000000000000ull; | |
1366 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
1367 } else { | |
1368 res.w[1] = p_sign | res.w[1]; | |
1369 rounding_correction (rnd_mode, | |
1370 is_inexact_lt_midpoint, | |
1371 is_inexact_gt_midpoint, | |
1372 is_midpoint_lt_even, is_midpoint_gt_even, | |
1373 e4, &res, pfpsf); | |
1374 } | |
1375 *pfpsf |= save_fpsf; | |
1376 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
1377 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
1378 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
1379 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
1380 BID_SWAP128 (res); | |
1381 BID_RETURN (res) | |
1382 } | |
1383 // check for underflow | |
1384 if (q4 + e4 < expmin + P34) { | |
1385 is_tiny = 1; // the result is tiny | |
1386 if (e4 < expmin) { | |
1387 // if e4 < expmin, we must truncate more of res | |
1388 x0 = expmin - e4; // x0 >= 1 | |
1389 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; | |
1390 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; | |
1391 is_midpoint_lt_even0 = is_midpoint_lt_even; | |
1392 is_midpoint_gt_even0 = is_midpoint_gt_even; | |
1393 is_inexact_lt_midpoint = 0; | |
1394 is_inexact_gt_midpoint = 0; | |
1395 is_midpoint_lt_even = 0; | |
1396 is_midpoint_gt_even = 0; | |
1397 // the number of decimal digits in res is q4 | |
1398 if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits | |
1399 if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17 | |
1400 round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp, | |
1401 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
1402 &is_inexact_lt_midpoint, | |
1403 &is_inexact_gt_midpoint); | |
1404 if (incr_exp) { | |
1405 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 | |
1406 R64 = ten2k64[q4 - x0]; | |
1407 } | |
1408 // res.w[1] = 0; (from above) | |
1409 res.w[0] = R64; | |
1410 } else { // if (q4 <= 34) | |
1411 // 19 <= q4 <= 38 | |
1412 P128.w[1] = res.w[1]; | |
1413 P128.w[0] = res.w[0]; | |
1414 round128_19_38 (q4, x0, P128, &res, &incr_exp, | |
1415 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
1416 &is_inexact_lt_midpoint, | |
1417 &is_inexact_gt_midpoint); | |
1418 if (incr_exp) { | |
1419 // increase coefficient by a factor of 10; this will be <= 10^33 | |
1420 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 | |
1421 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 | |
1422 // res.w[1] = 0; | |
1423 res.w[0] = ten2k64[q4 - x0]; | |
1424 } else { // 20 <= q4 - x0 <= 37 | |
1425 res.w[0] = ten2k128[q4 - x0 - 20].w[0]; | |
1426 res.w[1] = ten2k128[q4 - x0 - 20].w[1]; | |
1427 } | |
1428 } | |
1429 } | |
1430 e4 = e4 + x0; // expmin | |
1431 } else if (x0 == q4) { | |
1432 // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin | |
1433 // determine relationship with 1/2 ulp | |
1434 if (q4 <= 19) { | |
1435 if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp | |
1436 lt_half_ulp = 1; | |
1437 is_inexact_lt_midpoint = 1; | |
1438 } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp | |
1439 eq_half_ulp = 1; | |
1440 is_midpoint_gt_even = 1; | |
1441 } else { // > 1/2 ulp | |
1442 // gt_half_ulp = 1; | |
1443 is_inexact_gt_midpoint = 1; | |
1444 } | |
1445 } else { // if (q4 <= 34) | |
1446 if (res.w[1] < midpoint128[q4 - 20].w[1] || | |
1447 (res.w[1] == midpoint128[q4 - 20].w[1] && | |
1448 res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp | |
1449 lt_half_ulp = 1; | |
1450 is_inexact_lt_midpoint = 1; | |
1451 } else if (res.w[1] == midpoint128[q4 - 20].w[1] && | |
1452 res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp | |
1453 eq_half_ulp = 1; | |
1454 is_midpoint_gt_even = 1; | |
1455 } else { // > 1/2 ulp | |
1456 // gt_half_ulp = 1; | |
1457 is_inexact_gt_midpoint = 1; | |
1458 } | |
1459 } | |
1460 if (lt_half_ulp || eq_half_ulp) { | |
1461 // res = +0.0 * 10^expmin | |
1462 res.w[1] = 0x0000000000000000ull; | |
1463 res.w[0] = 0x0000000000000000ull; | |
1464 } else { // if (gt_half_ulp) | |
1465 // res = +1 * 10^expmin | |
1466 res.w[1] = 0x0000000000000000ull; | |
1467 res.w[0] = 0x0000000000000001ull; | |
1468 } | |
1469 e4 = expmin; | |
1470 } else { // if (x0 > q4) | |
1471 // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin | |
1472 res.w[1] = 0; | |
1473 res.w[0] = 0; | |
1474 e4 = expmin; | |
1475 is_inexact_lt_midpoint = 1; | |
1476 } | |
1477 // avoid a double rounding error | |
1478 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && | |
1479 is_midpoint_lt_even) { // double rounding error upward | |
1480 // res = res - 1 | |
1481 res.w[0]--; | |
1482 if (res.w[0] == 0xffffffffffffffffull) | |
1483 res.w[1]--; | |
1484 // Note: a double rounding error upward is not possible; for this | |
1485 // the result after the first rounding would have to be 99...95 | |
1486 // (35 digits in all), possibly followed by a number of zeros; this | |
1487 // not possible for f * f + 0 | |
1488 is_midpoint_lt_even = 0; | |
1489 is_inexact_lt_midpoint = 1; | |
1490 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && | |
1491 is_midpoint_gt_even) { // double rounding error downward | |
1492 // res = res + 1 | |
1493 res.w[0]++; | |
1494 if (res.w[0] == 0) | |
1495 res.w[1]++; | |
1496 is_midpoint_gt_even = 0; | |
1497 is_inexact_gt_midpoint = 1; | |
1498 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
1499 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { | |
1500 // if this second rounding was exact the result may still be | |
1501 // inexact because of the first rounding | |
1502 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { | |
1503 is_inexact_gt_midpoint = 1; | |
1504 } | |
1505 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { | |
1506 is_inexact_lt_midpoint = 1; | |
1507 } | |
1508 } else if (is_midpoint_gt_even && | |
1509 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { | |
1510 // pulled up to a midpoint | |
1511 is_inexact_lt_midpoint = 1; | |
1512 is_inexact_gt_midpoint = 0; | |
1513 is_midpoint_lt_even = 0; | |
1514 is_midpoint_gt_even = 0; | |
1515 } else if (is_midpoint_lt_even && | |
1516 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { | |
1517 // pulled down to a midpoint | |
1518 is_inexact_lt_midpoint = 0; | |
1519 is_inexact_gt_midpoint = 1; | |
1520 is_midpoint_lt_even = 0; | |
1521 is_midpoint_gt_even = 0; | |
1522 } else { | |
1523 ; | |
1524 } | |
1525 } else { // if e4 >= emin then q4 < P and the result is tiny and exact | |
1526 if (e3 < e4) { | |
1527 // if (e3 < e4) the preferred exponent is e3 | |
1528 // return (C4 * 10^scale) * 10^(e4 - scale) | |
1529 // where scale = min (p34-q4, (e4 - e3)) | |
1530 scale = p34 - q4; | |
1531 ind = e4 - e3; | |
1532 if (ind < scale) | |
1533 scale = ind; | |
1534 if (scale == 0) { | |
1535 ; // res and e4 are unchanged | |
1536 } else if (q4 <= 19) { // C4 fits in 64 bits | |
1537 if (scale <= 19) { // 10^scale fits in 64 bits | |
1538 // 64 x 64 res.w[0] * ten2k64[scale] | |
1539 __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]); | |
1540 } else { // 10^scale fits in 128 bits | |
1541 // 64 x 128 res.w[0] * ten2k128[scale - 20] | |
1542 __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]); | |
1543 } | |
1544 } else { // res fits in 128 bits, but 10^scale must fit in 64 bits | |
1545 // 64 x 128 ten2k64[scale] * C3 | |
1546 __mul_128x64_to_128 (res, ten2k64[scale], res); | |
1547 } | |
1548 // subtract scale from the exponent | |
1549 e4 = e4 - scale; | |
1550 } | |
1551 } | |
1552 | |
1553 // check for inexact result | |
1554 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || | |
1555 is_midpoint_lt_even || is_midpoint_gt_even) { | |
1556 // set the inexact flag and the underflow flag | |
1557 *pfpsf |= INEXACT_EXCEPTION; | |
1558 *pfpsf |= UNDERFLOW_EXCEPTION; | |
1559 } | |
1560 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; | |
1561 if (rnd_mode != ROUNDING_TO_NEAREST) { | |
1562 rounding_correction (rnd_mode, | |
1563 is_inexact_lt_midpoint, | |
1564 is_inexact_gt_midpoint, | |
1565 is_midpoint_lt_even, is_midpoint_gt_even, | |
1566 e4, &res, pfpsf); | |
1567 } | |
1568 *pfpsf |= save_fpsf; | |
1569 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
1570 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
1571 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
1572 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
1573 BID_SWAP128 (res); | |
1574 BID_RETURN (res) | |
1575 } | |
1576 // no overflow, and no underflow for rounding to nearest | |
1577 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; | |
1578 | |
1579 if (rnd_mode != ROUNDING_TO_NEAREST) { | |
1580 rounding_correction (rnd_mode, | |
1581 is_inexact_lt_midpoint, | |
1582 is_inexact_gt_midpoint, | |
1583 is_midpoint_lt_even, is_midpoint_gt_even, | |
1584 e4, &res, pfpsf); | |
1585 // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ) | |
1586 if (e4 == expmin) { | |
1587 if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull || | |
1588 ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && | |
1589 res.w[0] < 0x38c15b0a00000000ull)) { | |
1590 is_tiny = 1; | |
1591 } | |
1592 } | |
1593 } | |
1594 | |
1595 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || | |
1596 is_midpoint_lt_even || is_midpoint_gt_even) { | |
1597 // set the inexact flag | |
1598 *pfpsf |= INEXACT_EXCEPTION; | |
1599 if (is_tiny) | |
1600 *pfpsf |= UNDERFLOW_EXCEPTION; | |
1601 } | |
1602 | |
1603 if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact | |
1604 // need to ensure that the result has the preferred exponent | |
1605 p_exp = res.w[1] & MASK_EXP; | |
1606 if (z_exp < p_exp) { // the preferred exponent is z_exp | |
1607 // signficand of res in C3 | |
1608 C3.w[1] = res.w[1] & MASK_COEFF; | |
1609 C3.w[0] = res.w[0]; | |
1610 // the number of decimal digits of x * y is q4 <= 34 | |
1611 // Note: the coefficient fits in 128 bits | |
1612 | |
1613 // return (C3 * 10^scale) * 10^(p_exp - scale) | |
1614 // where scale = min (p34-q4, (p_exp-z_exp) >> 49) | |
1615 scale = p34 - q4; | |
1616 ind = (p_exp - z_exp) >> 49; | |
1617 if (ind < scale) | |
1618 scale = ind; | |
1619 // subtract scale from the exponent | |
1620 p_exp = p_exp - ((UINT64) scale << 49); | |
1621 if (scale == 0) { | |
1622 ; // leave res unchanged | |
1623 } else if (q4 <= 19) { // x * y fits in 64 bits | |
1624 if (scale <= 19) { // 10^scale fits in 64 bits | |
1625 // 64 x 64 C3.w[0] * ten2k64[scale] | |
1626 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); | |
1627 } else { // 10^scale fits in 128 bits | |
1628 // 64 x 128 C3.w[0] * ten2k128[scale - 20] | |
1629 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); | |
1630 } | |
1631 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; | |
1632 } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits | |
1633 // 64 x 128 ten2k64[scale] * C3 | |
1634 __mul_128x64_to_128 (res, ten2k64[scale], C3); | |
1635 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; | |
1636 } | |
1637 } // else leave the result as it is, because p_exp <= z_exp | |
1638 } | |
1639 *pfpsf |= save_fpsf; | |
1640 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
1641 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
1642 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
1643 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
1644 BID_SWAP128 (res); | |
1645 BID_RETURN (res) | |
1646 } // else we have f * f + f | |
1647 | |
1648 // continue with x = f, y = f, z = f | |
1649 | |
1650 delta = q3 + e3 - q4 - e4; | |
1651 delta_ge_zero: | |
1652 if (delta >= 0) { | |
1653 | |
1654 if (p34 <= delta - 1 || // Case (1') | |
1655 (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A) | |
1656 // check for overflow, which can occur only in Case (1') | |
1657 if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) { | |
1658 // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary | |
1659 // condition for (q3 + e3) > (p34 + expmax) | |
1660 if (rnd_mode == ROUNDING_TO_NEAREST) { | |
1661 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf | |
1662 res.w[0] = 0x0000000000000000ull; | |
1663 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
1664 } else { | |
1665 if (p_sign == z_sign) { | |
1666 is_inexact_lt_midpoint = 1; | |
1667 } else { | |
1668 is_inexact_gt_midpoint = 1; | |
1669 } | |
1670 // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3) | |
1671 scale = p34 - q3; | |
1672 if (scale == 0) { | |
1673 res.w[1] = z_sign | C3.w[1]; | |
1674 res.w[0] = C3.w[0]; | |
1675 } else { | |
1676 if (q3 <= 19) { // C3 fits in 64 bits | |
1677 if (scale <= 19) { // 10^scale fits in 64 bits | |
1678 // 64 x 64 C3.w[0] * ten2k64[scale] | |
1679 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); | |
1680 } else { // 10^scale fits in 128 bits | |
1681 // 64 x 128 C3.w[0] * ten2k128[scale - 20] | |
1682 __mul_128x64_to_128 (res, C3.w[0], | |
1683 ten2k128[scale - 20]); | |
1684 } | |
1685 } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits | |
1686 // 64 x 128 ten2k64[scale] * C3 | |
1687 __mul_128x64_to_128 (res, ten2k64[scale], C3); | |
1688 } | |
1689 // the coefficient in res has q3 + scale = p34 digits | |
1690 } | |
1691 e3 = e3 - scale; | |
1692 res.w[1] = z_sign | res.w[1]; | |
1693 rounding_correction (rnd_mode, | |
1694 is_inexact_lt_midpoint, | |
1695 is_inexact_gt_midpoint, | |
1696 is_midpoint_lt_even, is_midpoint_gt_even, | |
1697 e3, &res, pfpsf); | |
1698 } | |
1699 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
1700 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
1701 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
1702 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
1703 BID_SWAP128 (res); | |
1704 BID_RETURN (res) | |
1705 } | |
1706 // res = z | |
1707 if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3) | |
1708 // return (C3 * 10^scale) * 10^(z_exp - scale) | |
1709 // where scale = min (p34-q3, z_exp-EMIN) | |
1710 scale = p34 - q3; | |
1711 ind = e3 + 6176; | |
1712 if (ind < scale) | |
1713 scale = ind; | |
1714 if (scale == 0) { | |
1715 res.w[1] = C3.w[1]; | |
1716 res.w[0] = C3.w[0]; | |
1717 } else if (q3 <= 19) { // z fits in 64 bits | |
1718 if (scale <= 19) { // 10^scale fits in 64 bits | |
1719 // 64 x 64 C3.w[0] * ten2k64[scale] | |
1720 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); | |
1721 } else { // 10^scale fits in 128 bits | |
1722 // 64 x 128 C3.w[0] * ten2k128[scale - 20] | |
1723 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); | |
1724 } | |
1725 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits | |
1726 // 64 x 128 ten2k64[scale] * C3 | |
1727 __mul_128x64_to_128 (res, ten2k64[scale], C3); | |
1728 } | |
1729 // the coefficient in res has q3 + scale digits | |
1730 // subtract scale from the exponent | |
1731 z_exp = z_exp - ((UINT64) scale << 49); | |
1732 e3 = e3 - scale; | |
1733 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
1734 if (scale + q3 < p34) | |
1735 *pfpsf |= UNDERFLOW_EXCEPTION; | |
1736 } else { | |
1737 scale = 0; | |
1738 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1]; | |
1739 res.w[0] = C3.w[0]; | |
1740 } | |
1741 | |
1742 // use the following to avoid double rounding errors when operating on | |
1743 // mixed formats in rounding to nearest, and for correcting the result | |
1744 // if not rounding to nearest | |
1745 if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) { | |
1746 // there is a gap of exactly one digit between the scaled C3 and C4 | |
1747 // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case | |
1748 if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) || | |
1749 (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) || | |
1750 (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] || | |
1751 C3.w[0] != ten2k128[q3 - 21].w[0]))) { | |
1752 // C3 * 10^ scale != 10^(q3-1) | |
1753 // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull || | |
1754 // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33 | |
1755 is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value | |
1756 } else { // if C3 * 10^scale = 10^(q3+scale-1) | |
1757 // ok from above e3 = (z_exp >> 49) - 6176; | |
1758 // the result is always inexact | |
1759 if (q4 == 1) { | |
1760 R64 = C4.w[0]; | |
1761 } else { | |
1762 // if q4 > 1 then truncate C4 from q4 digits to 1 digit; | |
1763 // x = q4-1, 1 <= x <= 67 and check if this operation is exact | |
1764 if (q4 <= 18) { // 2 <= q4 <= 18 | |
1765 round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, | |
1766 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
1767 &is_inexact_lt_midpoint, | |
1768 &is_inexact_gt_midpoint); | |
1769 } else if (q4 <= 38) { | |
1770 P128.w[1] = C4.w[1]; | |
1771 P128.w[0] = C4.w[0]; | |
1772 round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, | |
1773 &is_midpoint_lt_even, | |
1774 &is_midpoint_gt_even, | |
1775 &is_inexact_lt_midpoint, | |
1776 &is_inexact_gt_midpoint); | |
1777 R64 = R128.w[0]; // one decimal digit | |
1778 } else if (q4 <= 57) { | |
1779 P192.w[2] = C4.w[2]; | |
1780 P192.w[1] = C4.w[1]; | |
1781 P192.w[0] = C4.w[0]; | |
1782 round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, | |
1783 &is_midpoint_lt_even, | |
1784 &is_midpoint_gt_even, | |
1785 &is_inexact_lt_midpoint, | |
1786 &is_inexact_gt_midpoint); | |
1787 R64 = R192.w[0]; // one decimal digit | |
1788 } else { // if (q4 <= 68) | |
1789 round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, | |
1790 &is_midpoint_lt_even, | |
1791 &is_midpoint_gt_even, | |
1792 &is_inexact_lt_midpoint, | |
1793 &is_inexact_gt_midpoint); | |
1794 R64 = R256.w[0]; // one decimal digit | |
1795 } | |
1796 if (incr_exp) { | |
1797 R64 = 10; | |
1798 } | |
1799 } | |
1800 if (q4 == 1 && C4.w[0] == 5) { | |
1801 is_inexact_lt_midpoint = 0; | |
1802 is_inexact_gt_midpoint = 0; | |
1803 is_midpoint_lt_even = 1; | |
1804 is_midpoint_gt_even = 0; | |
1805 } else if ((e3 == expmin) || | |
1806 R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) { | |
1807 // result does not change | |
1808 is_inexact_lt_midpoint = 0; | |
1809 is_inexact_gt_midpoint = 1; | |
1810 is_midpoint_lt_even = 0; | |
1811 is_midpoint_gt_even = 0; | |
1812 } else { | |
1813 is_inexact_lt_midpoint = 1; | |
1814 is_inexact_gt_midpoint = 0; | |
1815 is_midpoint_lt_even = 0; | |
1816 is_midpoint_gt_even = 0; | |
1817 // result decremented is 10^(q3+scale) - 1 | |
1818 if ((q3 + scale) <= 19) { | |
1819 res.w[1] = 0; | |
1820 res.w[0] = ten2k64[q3 + scale]; | |
1821 } else { // if ((q3 + scale + 1) <= 35) | |
1822 res.w[1] = ten2k128[q3 + scale - 20].w[1]; | |
1823 res.w[0] = ten2k128[q3 + scale - 20].w[0]; | |
1824 } | |
1825 res.w[0] = res.w[0] - 1; // borrow never occurs | |
1826 z_exp = z_exp - EXP_P1; | |
1827 e3 = e3 - 1; | |
1828 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; | |
1829 } | |
1830 if (e3 == expmin) { | |
1831 if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) { | |
1832 ; // result not tiny (in round-to-nearest mode) | |
1833 } else { | |
1834 *pfpsf |= UNDERFLOW_EXCEPTION; | |
1835 } | |
1836 } | |
1837 } // end 10^(q3+scale-1) | |
1838 // set the inexact flag | |
1839 *pfpsf |= INEXACT_EXCEPTION; | |
1840 } else { | |
1841 if (p_sign == z_sign) { | |
1842 // if (z_sign), set as if for absolute value | |
1843 is_inexact_lt_midpoint = 1; | |
1844 } else { // if (p_sign != z_sign) | |
1845 // if (z_sign), set as if for absolute value | |
1846 is_inexact_gt_midpoint = 1; | |
1847 } | |
1848 *pfpsf |= INEXACT_EXCEPTION; | |
1849 } | |
1850 // the result is always inexact => set the inexact flag | |
1851 // Determine tininess: | |
1852 // if (exp > expmin) | |
1853 // the result is not tiny | |
1854 // else // if exp = emin | |
1855 // if (q3 + scale < p34) | |
1856 // the result is tiny | |
1857 // else // if (q3 + scale = p34) | |
1858 // if (C3 * 10^scale > 10^33) | |
1859 // the result is not tiny | |
1860 // else // if C3 * 10^scale = 10^33 | |
1861 // if (xy * z > 0) | |
1862 // the result is not tiny | |
1863 // else // if (xy * z < 0) | |
1864 // if (z > 0) | |
1865 // if rnd_mode != RP | |
1866 // the result is tiny | |
1867 // else // if RP | |
1868 // the result is not tiny | |
1869 // else // if (z < 0) | |
1870 // if rnd_mode != RM | |
1871 // the result is tiny | |
1872 // else // if RM | |
1873 // the result is not tiny | |
1874 // endif | |
1875 // endif | |
1876 // endif | |
1877 // endif | |
1878 // endif | |
1879 // endif | |
1880 if ((e3 == expmin && (q3 + scale) < p34) || | |
1881 (e3 == expmin && (q3 + scale) == p34 && | |
1882 (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && // 10^33_high | |
1883 res.w[0] == 0x38c15b0a00000000ull && // 10^33_low | |
1884 z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) || | |
1885 (z_sign && rnd_mode != ROUNDING_DOWN)))) { | |
1886 *pfpsf |= UNDERFLOW_EXCEPTION; | |
1887 } | |
1888 if (rnd_mode != ROUNDING_TO_NEAREST) { | |
1889 rounding_correction (rnd_mode, | |
1890 is_inexact_lt_midpoint, | |
1891 is_inexact_gt_midpoint, | |
1892 is_midpoint_lt_even, is_midpoint_gt_even, | |
1893 e3, &res, pfpsf); | |
1894 } | |
1895 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
1896 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
1897 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
1898 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
1899 BID_SWAP128 (res); | |
1900 BID_RETURN (res) | |
1901 | |
1902 } else if (p34 == delta) { // Case (1''B) | |
1903 | |
1904 // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3 | |
1905 // and C3 can be scaled up to p34 digits if needed | |
1906 | |
1907 // scale C3 to p34 digits if needed | |
1908 scale = p34 - q3; // 0 <= scale <= p34 - 1 | |
1909 if (scale == 0) { | |
1910 res.w[1] = C3.w[1]; | |
1911 res.w[0] = C3.w[0]; | |
1912 } else if (q3 <= 19) { // z fits in 64 bits | |
1913 if (scale <= 19) { // 10^scale fits in 64 bits | |
1914 // 64 x 64 C3.w[0] * ten2k64[scale] | |
1915 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); | |
1916 } else { // 10^scale fits in 128 bits | |
1917 // 64 x 128 C3.w[0] * ten2k128[scale - 20] | |
1918 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); | |
1919 } | |
1920 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits | |
1921 // 64 x 128 ten2k64[scale] * C3 | |
1922 __mul_128x64_to_128 (res, ten2k64[scale], C3); | |
1923 } | |
1924 // subtract scale from the exponent | |
1925 z_exp = z_exp - ((UINT64) scale << 49); | |
1926 e3 = e3 - scale; | |
1927 // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits | |
1928 | |
1929 // determine whether x * y is less than, equal to, or greater than | |
1930 // 1/2 ulp (z) | |
1931 if (q4 <= 19) { | |
1932 if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp | |
1933 lt_half_ulp = 1; | |
1934 } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp | |
1935 eq_half_ulp = 1; | |
1936 } else { // > 1/2 ulp | |
1937 gt_half_ulp = 1; | |
1938 } | |
1939 } else if (q4 <= 38) { | |
1940 if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] || | |
1941 (C4.w[1] == midpoint128[q4 - 20].w[1] && | |
1942 C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp | |
1943 lt_half_ulp = 1; | |
1944 } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] && | |
1945 C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp | |
1946 eq_half_ulp = 1; | |
1947 } else { // > 1/2 ulp | |
1948 gt_half_ulp = 1; | |
1949 } | |
1950 } else if (q4 <= 58) { | |
1951 if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] || | |
1952 (C4.w[2] == midpoint192[q4 - 39].w[2] && | |
1953 C4.w[1] < midpoint192[q4 - 39].w[1]) || | |
1954 (C4.w[2] == midpoint192[q4 - 39].w[2] && | |
1955 C4.w[1] == midpoint192[q4 - 39].w[1] && | |
1956 C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp | |
1957 lt_half_ulp = 1; | |
1958 } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] && | |
1959 C4.w[1] == midpoint192[q4 - 39].w[1] && | |
1960 C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp | |
1961 eq_half_ulp = 1; | |
1962 } else { // > 1/2 ulp | |
1963 gt_half_ulp = 1; | |
1964 } | |
1965 } else { | |
1966 if (C4.w[3] < midpoint256[q4 - 59].w[3] || | |
1967 (C4.w[3] == midpoint256[q4 - 59].w[3] && | |
1968 C4.w[2] < midpoint256[q4 - 59].w[2]) || | |
1969 (C4.w[3] == midpoint256[q4 - 59].w[3] && | |
1970 C4.w[2] == midpoint256[q4 - 59].w[2] && | |
1971 C4.w[1] < midpoint256[q4 - 59].w[1]) || | |
1972 (C4.w[3] == midpoint256[q4 - 59].w[3] && | |
1973 C4.w[2] == midpoint256[q4 - 59].w[2] && | |
1974 C4.w[1] == midpoint256[q4 - 59].w[1] && | |
1975 C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp | |
1976 lt_half_ulp = 1; | |
1977 } else if (C4.w[3] == midpoint256[q4 - 59].w[3] && | |
1978 C4.w[2] == midpoint256[q4 - 59].w[2] && | |
1979 C4.w[1] == midpoint256[q4 - 59].w[1] && | |
1980 C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp | |
1981 eq_half_ulp = 1; | |
1982 } else { // > 1/2 ulp | |
1983 gt_half_ulp = 1; | |
1984 } | |
1985 } | |
1986 | |
1987 if (p_sign == z_sign) { | |
1988 if (lt_half_ulp) { | |
1989 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
1990 // use the following to avoid double rounding errors when operating on | |
1991 // mixed formats in rounding to nearest | |
1992 is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value | |
1993 } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) { | |
1994 // add 1 ulp to the significand | |
1995 res.w[0]++; | |
1996 if (res.w[0] == 0x0ull) | |
1997 res.w[1]++; | |
1998 // check for rounding overflow, when coeff == 10^34 | |
1999 if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull && | |
2000 res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34 | |
2001 e3 = e3 + 1; | |
2002 // coeff = 10^33 | |
2003 z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP; | |
2004 res.w[1] = 0x0000314dc6448d93ull; | |
2005 res.w[0] = 0x38c15b0a00000000ull; | |
2006 } | |
2007 // end add 1 ulp | |
2008 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
2009 if (eq_half_ulp) { | |
2010 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value | |
2011 } else { | |
2012 is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value | |
2013 } | |
2014 } else { // if (eq_half_ulp && !(res.w[0] & 0x01)) | |
2015 // leave unchanged | |
2016 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
2017 is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value | |
2018 } | |
2019 // the result is always inexact, and never tiny | |
2020 // set the inexact flag | |
2021 *pfpsf |= INEXACT_EXCEPTION; | |
2022 // check for overflow | |
2023 if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) { | |
2024 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf | |
2025 res.w[0] = 0x0000000000000000ull; | |
2026 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
2027 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
2028 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
2029 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
2030 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
2031 BID_SWAP128 (res); | |
2032 BID_RETURN (res) | |
2033 } | |
2034 if (rnd_mode != ROUNDING_TO_NEAREST) { | |
2035 rounding_correction (rnd_mode, | |
2036 is_inexact_lt_midpoint, | |
2037 is_inexact_gt_midpoint, | |
2038 is_midpoint_lt_even, is_midpoint_gt_even, | |
2039 e3, &res, pfpsf); | |
2040 z_exp = res.w[1] & MASK_EXP; | |
2041 } | |
2042 } else { // if (p_sign != z_sign) | |
2043 // consider two cases, because C3 * 10^scale = 10^33 is a special case | |
2044 if (res.w[1] != 0x0000314dc6448d93ull || | |
2045 res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33 | |
2046 if (lt_half_ulp) { | |
2047 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
2048 // use the following to avoid double rounding errors when operating | |
2049 // on mixed formats in rounding to nearest | |
2050 is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value | |
2051 } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) { | |
2052 // subtract 1 ulp from the significand | |
2053 res.w[0]--; | |
2054 if (res.w[0] == 0xffffffffffffffffull) | |
2055 res.w[1]--; | |
2056 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
2057 if (eq_half_ulp) { | |
2058 is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value | |
2059 } else { | |
2060 is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value | |
2061 } | |
2062 } else { // if (eq_half_ulp && !(res.w[0] & 0x01)) | |
2063 // leave unchanged | |
2064 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
2065 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value | |
2066 } | |
2067 // the result is always inexact, and never tiny | |
2068 // check for overflow for RN | |
2069 if (e3 > expmax) { | |
2070 if (rnd_mode == ROUNDING_TO_NEAREST) { | |
2071 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf | |
2072 res.w[0] = 0x0000000000000000ull; | |
2073 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
2074 } else { | |
2075 rounding_correction (rnd_mode, | |
2076 is_inexact_lt_midpoint, | |
2077 is_inexact_gt_midpoint, | |
2078 is_midpoint_lt_even, | |
2079 is_midpoint_gt_even, e3, &res, | |
2080 pfpsf); | |
2081 } | |
2082 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
2083 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
2084 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
2085 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
2086 BID_SWAP128 (res); | |
2087 BID_RETURN (res) | |
2088 } | |
2089 // set the inexact flag | |
2090 *pfpsf |= INEXACT_EXCEPTION; | |
2091 if (rnd_mode != ROUNDING_TO_NEAREST) { | |
2092 rounding_correction (rnd_mode, | |
2093 is_inexact_lt_midpoint, | |
2094 is_inexact_gt_midpoint, | |
2095 is_midpoint_lt_even, | |
2096 is_midpoint_gt_even, e3, &res, pfpsf); | |
2097 } | |
2098 z_exp = res.w[1] & MASK_EXP; | |
2099 } else { // if C3 * 10^scale = 10^33 | |
2100 e3 = (z_exp >> 49) - 6176; | |
2101 if (e3 > expmin) { | |
2102 // the result is exact if exp > expmin and C4 = d*10^(q4-1), | |
2103 // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact | |
2104 if (q4 == 1) { | |
2105 // if q4 = 1 the result is exact | |
2106 // result coefficient = 10^34 - C4 | |
2107 res.w[1] = 0x0001ed09bead87c0ull; | |
2108 res.w[0] = 0x378d8e6400000000ull - C4.w[0]; | |
2109 z_exp = z_exp - EXP_P1; | |
2110 e3 = e3 - 1; | |
2111 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
2112 } else { | |
2113 // if q4 > 1 then truncate C4 from q4 digits to 1 digit; | |
2114 // x = q4-1, 1 <= x <= 67 and check if this operation is exact | |
2115 if (q4 <= 18) { // 2 <= q4 <= 18 | |
2116 round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp, | |
2117 &is_midpoint_lt_even, | |
2118 &is_midpoint_gt_even, | |
2119 &is_inexact_lt_midpoint, | |
2120 &is_inexact_gt_midpoint); | |
2121 } else if (q4 <= 38) { | |
2122 P128.w[1] = C4.w[1]; | |
2123 P128.w[0] = C4.w[0]; | |
2124 round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp, | |
2125 &is_midpoint_lt_even, | |
2126 &is_midpoint_gt_even, | |
2127 &is_inexact_lt_midpoint, | |
2128 &is_inexact_gt_midpoint); | |
2129 R64 = R128.w[0]; // one decimal digit | |
2130 } else if (q4 <= 57) { | |
2131 P192.w[2] = C4.w[2]; | |
2132 P192.w[1] = C4.w[1]; | |
2133 P192.w[0] = C4.w[0]; | |
2134 round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp, | |
2135 &is_midpoint_lt_even, | |
2136 &is_midpoint_gt_even, | |
2137 &is_inexact_lt_midpoint, | |
2138 &is_inexact_gt_midpoint); | |
2139 R64 = R192.w[0]; // one decimal digit | |
2140 } else { // if (q4 <= 68) | |
2141 round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp, | |
2142 &is_midpoint_lt_even, | |
2143 &is_midpoint_gt_even, | |
2144 &is_inexact_lt_midpoint, | |
2145 &is_inexact_gt_midpoint); | |
2146 R64 = R256.w[0]; // one decimal digit | |
2147 } | |
2148 if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
2149 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { | |
2150 // the result is exact: 10^34 - R64 | |
2151 // incr_exp = 0 with certainty | |
2152 z_exp = z_exp - EXP_P1; | |
2153 e3 = e3 - 1; | |
2154 res.w[1] = | |
2155 z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull; | |
2156 res.w[0] = 0x378d8e6400000000ull - R64; | |
2157 } else { | |
2158 // We want R64 to be the top digit of C4, but we actually | |
2159 // obtained (C4 * 10^(-q4+1))RN; a correction may be needed, | |
2160 // because the top digit is (C4 * 10^(-q4+1))RZ | |
2161 // however, if incr_exp = 1 then R64 = 10 with certainty | |
2162 if (incr_exp) { | |
2163 R64 = 10; | |
2164 } | |
2165 // the result is inexact as C4 has more than 1 significant digit | |
2166 // and C3 * 10^scale = 10^33 | |
2167 // example of case that is treated here: | |
2168 // 100...0 * 10^e3 - 0.41 * 10^e3 = | |
2169 // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1) | |
2170 // note that (e3 > expmin} | |
2171 // in order to round, subtract R64 from 10^34 and then compare | |
2172 // C4 - R64 * 10^(q4-1) with 1/2 ulp | |
2173 // calculate 10^34 - R64 | |
2174 res.w[1] = 0x0001ed09bead87c0ull; | |
2175 res.w[0] = 0x378d8e6400000000ull - R64; | |
2176 z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand | |
2177 // calculate C4 - R64 * 10^(q4-1); this is a rare case and | |
2178 // R64 is small, 1 <= R64 <= 9 | |
2179 e3 = e3 - 1; | |
2180 if (is_inexact_lt_midpoint) { | |
2181 is_inexact_lt_midpoint = 0; | |
2182 is_inexact_gt_midpoint = 1; | |
2183 } else if (is_inexact_gt_midpoint) { | |
2184 is_inexact_gt_midpoint = 0; | |
2185 is_inexact_lt_midpoint = 1; | |
2186 } else if (is_midpoint_lt_even) { | |
2187 is_midpoint_lt_even = 0; | |
2188 is_midpoint_gt_even = 1; | |
2189 } else if (is_midpoint_gt_even) { | |
2190 is_midpoint_gt_even = 0; | |
2191 is_midpoint_lt_even = 1; | |
2192 } else { | |
2193 ; | |
2194 } | |
2195 // the result is always inexact, and never tiny | |
2196 // check for overflow for RN | |
2197 if (e3 > expmax) { | |
2198 if (rnd_mode == ROUNDING_TO_NEAREST) { | |
2199 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf | |
2200 res.w[0] = 0x0000000000000000ull; | |
2201 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
2202 } else { | |
2203 rounding_correction (rnd_mode, | |
2204 is_inexact_lt_midpoint, | |
2205 is_inexact_gt_midpoint, | |
2206 is_midpoint_lt_even, | |
2207 is_midpoint_gt_even, e3, &res, | |
2208 pfpsf); | |
2209 } | |
2210 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
2211 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
2212 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
2213 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
2214 BID_SWAP128 (res); | |
2215 BID_RETURN (res) | |
2216 } | |
2217 // set the inexact flag | |
2218 *pfpsf |= INEXACT_EXCEPTION; | |
2219 res.w[1] = | |
2220 z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; | |
2221 if (rnd_mode != ROUNDING_TO_NEAREST) { | |
2222 rounding_correction (rnd_mode, | |
2223 is_inexact_lt_midpoint, | |
2224 is_inexact_gt_midpoint, | |
2225 is_midpoint_lt_even, | |
2226 is_midpoint_gt_even, e3, &res, | |
2227 pfpsf); | |
2228 } | |
2229 z_exp = res.w[1] & MASK_EXP; | |
2230 } // end result is inexact | |
2231 } // end q4 > 1 | |
2232 } else { // if (e3 = emin) | |
2233 // if e3 = expmin the result is also tiny (the condition for | |
2234 // tininess is C4 > 050...0 [q4 digits] which is met because | |
2235 // the msd of C4 is not zero) | |
2236 // the result is tiny and inexact in all rounding modes; | |
2237 // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp, | |
2238 // gt_half_ulp to calculate) | |
2239 // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged | |
2240 | |
2241 // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp | |
2242 if (gt_half_ulp) { // res = 10^33 - 1 | |
2243 res.w[1] = 0x0000314dc6448d93ull; | |
2244 res.w[0] = 0x38c15b09ffffffffull; | |
2245 } else { | |
2246 res.w[1] = 0x0000314dc6448d93ull; | |
2247 res.w[0] = 0x38c15b0a00000000ull; | |
2248 } | |
2249 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
2250 *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later | |
2251 | |
2252 if (eq_half_ulp) { | |
2253 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value | |
2254 } else if (lt_half_ulp) { | |
2255 is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value | |
2256 } else { // if (gt_half_ulp) | |
2257 is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value | |
2258 } | |
2259 | |
2260 if (rnd_mode != ROUNDING_TO_NEAREST) { | |
2261 rounding_correction (rnd_mode, | |
2262 is_inexact_lt_midpoint, | |
2263 is_inexact_gt_midpoint, | |
2264 is_midpoint_lt_even, | |
2265 is_midpoint_gt_even, e3, &res, | |
2266 pfpsf); | |
2267 z_exp = res.w[1] & MASK_EXP; | |
2268 } | |
2269 } // end e3 = emin | |
2270 // set the inexact flag (if the result was not exact) | |
2271 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || | |
2272 is_midpoint_lt_even || is_midpoint_gt_even) | |
2273 *pfpsf |= INEXACT_EXCEPTION; | |
2274 } // end 10^33 | |
2275 } // end if (p_sign != z_sign) | |
2276 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1]; | |
2277 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
2278 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
2279 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
2280 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
2281 BID_SWAP128 (res); | |
2282 BID_RETURN (res) | |
2283 | |
2284 } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2) | |
2285 (q3 <= delta && delta + q4 <= p34) || // Case (3) | |
2286 (delta < q3 && p34 < delta + q4) || // Case (4) | |
2287 (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5) | |
2288 (delta + q4 < q3)) && // Case (6) | |
2289 !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6) | |
2290 | |
2291 // the result has the sign of z | |
2292 | |
2293 if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2) | |
2294 (delta < q3 && p34 < delta + q4)) { // Case (4) | |
2295 // round first the sum x * y + z with unbounded exponent | |
2296 // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1, | |
2297 // 1 <= scale <= 33 | |
2298 // calculate res = C3 * 10^scale | |
2299 scale = p34 - q3; | |
2300 x0 = delta + q4 - p34; | |
2301 } else if (delta + q4 < q3) { // Case (6) | |
2302 // make Case (6) look like Case (3) or Case (5) with scale = 0 | |
2303 // by scaling up C4 by 10^(q3 - delta - q4) | |
2304 scale = q3 - delta - q4; // 1 <= scale <= 33 | |
2305 if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits | |
2306 if (scale <= 19) { // 10^scale fits in 64 bits | |
2307 // 64 x 64 C4.w[0] * ten2k64[scale] | |
2308 __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]); | |
2309 } else { // 10^scale fits in 128 bits | |
2310 // 64 x 128 C4.w[0] * ten2k128[scale - 20] | |
2311 __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]); | |
2312 } | |
2313 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits | |
2314 // 64 x 128 ten2k64[scale] * C4 | |
2315 __mul_128x64_to_128 (P128, ten2k64[scale], C4); | |
2316 } | |
2317 C4.w[0] = P128.w[0]; | |
2318 C4.w[1] = P128.w[1]; | |
2319 // e4 does not need adjustment, as it is not used from this point on | |
2320 scale = 0; | |
2321 x0 = 0; | |
2322 // now Case (6) looks like Case (3) or Case (5) with scale = 0 | |
2323 } else { // if Case (3) or Case (5) | |
2324 // Note: Case (3) is similar to Case (2), but scale differs and the | |
2325 // result is exact, unless it is tiny (so x0 = 0 when calculating the | |
2326 // result with unbounded exponent) | |
2327 | |
2328 // calculate first the sum x * y + z with unbounded exponent (exact) | |
2329 // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1, | |
2330 // 1 <= scale <= 33 | |
2331 // calculate res = C3 * 10^scale | |
2332 scale = delta + q4 - q3; | |
2333 x0 = 0; | |
2334 // Note: the comments which follow refer [mainly] to Case (2)] | |
2335 } | |
2336 | |
2337 case2_repeat: | |
2338 if (scale == 0) { // this could happen e.g. if we return to case2_repeat | |
2339 // or in Case (4) | |
2340 res.w[1] = C3.w[1]; | |
2341 res.w[0] = C3.w[0]; | |
2342 } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits | |
2343 if (scale <= 19) { // 10^scale fits in 64 bits | |
2344 // 64 x 64 C3.w[0] * ten2k64[scale] | |
2345 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]); | |
2346 } else { // 10^scale fits in 128 bits | |
2347 // 64 x 128 C3.w[0] * ten2k128[scale - 20] | |
2348 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]); | |
2349 } | |
2350 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits | |
2351 // 64 x 128 ten2k64[scale] * C3 | |
2352 __mul_128x64_to_128 (res, ten2k64[scale], C3); | |
2353 } | |
2354 // e3 is already calculated | |
2355 e3 = e3 - scale; | |
2356 // now res = C3 * 10^scale and e3 = e3 - scale | |
2357 // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat | |
2358 // because the result was too small | |
2359 | |
2360 // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34, | |
2361 // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67) | |
2362 // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of | |
2363 // the rounding fits in 128 bits!) | |
2364 // x0 = delta + q4 - p34 (calculated before reaching case2_repeat) | |
2365 // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34 | |
2366 if (x0 == 0) { // this could happen only if we return to case2_repeat, or | |
2367 // for Case (3) or Case (6) | |
2368 R128.w[1] = C4.w[1]; | |
2369 R128.w[0] = C4.w[0]; | |
2370 } else if (q4 <= 18) { | |
2371 // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17 | |
2372 round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp, | |
2373 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2374 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); | |
2375 if (incr_exp) { | |
2376 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17 | |
2377 R64 = ten2k64[q4 - x0]; | |
2378 } | |
2379 R128.w[1] = 0; | |
2380 R128.w[0] = R64; | |
2381 } else if (q4 <= 38) { | |
2382 // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37 | |
2383 P128.w[1] = C4.w[1]; | |
2384 P128.w[0] = C4.w[0]; | |
2385 round128_19_38 (q4, x0, P128, &R128, &incr_exp, | |
2386 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2387 &is_inexact_lt_midpoint, | |
2388 &is_inexact_gt_midpoint); | |
2389 if (incr_exp) { | |
2390 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37 | |
2391 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 | |
2392 R128.w[0] = ten2k64[q4 - x0]; | |
2393 // R128.w[1] stays 0 | |
2394 } else { // 20 <= q4 - x0 <= 37 | |
2395 R128.w[0] = ten2k128[q4 - x0 - 20].w[0]; | |
2396 R128.w[1] = ten2k128[q4 - x0 - 20].w[1]; | |
2397 } | |
2398 } | |
2399 } else if (q4 <= 57) { | |
2400 // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56 | |
2401 P192.w[2] = C4.w[2]; | |
2402 P192.w[1] = C4.w[1]; | |
2403 P192.w[0] = C4.w[0]; | |
2404 round192_39_57 (q4, x0, P192, &R192, &incr_exp, | |
2405 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2406 &is_inexact_lt_midpoint, | |
2407 &is_inexact_gt_midpoint); | |
2408 // R192.w[2] is always 0 | |
2409 if (incr_exp) { | |
2410 // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52 | |
2411 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 | |
2412 R192.w[0] = ten2k64[q4 - x0]; | |
2413 // R192.w[1] stays 0 | |
2414 // R192.w[2] stays 0 | |
2415 } else { // 20 <= q4 - x0 <= 33 | |
2416 R192.w[0] = ten2k128[q4 - x0 - 20].w[0]; | |
2417 R192.w[1] = ten2k128[q4 - x0 - 20].w[1]; | |
2418 // R192.w[2] stays 0 | |
2419 } | |
2420 } | |
2421 R128.w[1] = R192.w[1]; | |
2422 R128.w[0] = R192.w[0]; | |
2423 } else { | |
2424 // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67 | |
2425 round256_58_76 (q4, x0, C4, &R256, &incr_exp, | |
2426 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2427 &is_inexact_lt_midpoint, | |
2428 &is_inexact_gt_midpoint); | |
2429 // R256.w[3] and R256.w[2] are always 0 | |
2430 if (incr_exp) { | |
2431 // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43 | |
2432 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19 | |
2433 R256.w[0] = ten2k64[q4 - x0]; | |
2434 // R256.w[1] stays 0 | |
2435 // R256.w[2] stays 0 | |
2436 // R256.w[3] stays 0 | |
2437 } else { // 20 <= q4 - x0 <= 33 | |
2438 R256.w[0] = ten2k128[q4 - x0 - 20].w[0]; | |
2439 R256.w[1] = ten2k128[q4 - x0 - 20].w[1]; | |
2440 // R256.w[2] stays 0 | |
2441 // R256.w[3] stays 0 | |
2442 } | |
2443 } | |
2444 R128.w[1] = R256.w[1]; | |
2445 R128.w[0] = R256.w[0]; | |
2446 } | |
2447 // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4, | |
2448 // rounded to nearest, which were copied into R128 | |
2449 if (z_sign == p_sign) { | |
2450 lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale | |
2451 // the sum can result in [up to] p34 or p34 + 1 digits | |
2452 res.w[0] = res.w[0] + R128.w[0]; | |
2453 res.w[1] = res.w[1] + R128.w[1]; | |
2454 if (res.w[0] < R128.w[0]) | |
2455 res.w[1]++; // carry | |
2456 // if res > 10^34 - 1 need to increase x0 and decrease scale by 1 | |
2457 if (res.w[1] > 0x0001ed09bead87c0ull || | |
2458 (res.w[1] == 0x0001ed09bead87c0ull && | |
2459 res.w[0] > 0x378d8e63ffffffffull)) { | |
2460 // avoid double rounding error | |
2461 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; | |
2462 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; | |
2463 is_midpoint_lt_even0 = is_midpoint_lt_even; | |
2464 is_midpoint_gt_even0 = is_midpoint_gt_even; | |
2465 is_inexact_lt_midpoint = 0; | |
2466 is_inexact_gt_midpoint = 0; | |
2467 is_midpoint_lt_even = 0; | |
2468 is_midpoint_gt_even = 0; | |
2469 P128.w[1] = res.w[1]; | |
2470 P128.w[0] = res.w[0]; | |
2471 round128_19_38 (35, 1, P128, &res, &incr_exp, | |
2472 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2473 &is_inexact_lt_midpoint, | |
2474 &is_inexact_gt_midpoint); | |
2475 // incr_exp is 0 with certainty in this case | |
2476 // avoid a double rounding error | |
2477 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && | |
2478 is_midpoint_lt_even) { // double rounding error upward | |
2479 // res = res - 1 | |
2480 res.w[0]--; | |
2481 if (res.w[0] == 0xffffffffffffffffull) | |
2482 res.w[1]--; | |
2483 // Note: a double rounding error upward is not possible; for this | |
2484 // the result after the first rounding would have to be 99...95 | |
2485 // (35 digits in all), possibly followed by a number of zeros; this | |
2486 // not possible in Cases (2)-(6) or (15)-(17) which may get here | |
2487 is_midpoint_lt_even = 0; | |
2488 is_inexact_lt_midpoint = 1; | |
2489 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && | |
2490 is_midpoint_gt_even) { // double rounding error downward | |
2491 // res = res + 1 | |
2492 res.w[0]++; | |
2493 if (res.w[0] == 0) | |
2494 res.w[1]++; | |
2495 is_midpoint_gt_even = 0; | |
2496 is_inexact_gt_midpoint = 1; | |
2497 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
2498 !is_inexact_lt_midpoint | |
2499 && !is_inexact_gt_midpoint) { | |
2500 // if this second rounding was exact the result may still be | |
2501 // inexact because of the first rounding | |
2502 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { | |
2503 is_inexact_gt_midpoint = 1; | |
2504 } | |
2505 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { | |
2506 is_inexact_lt_midpoint = 1; | |
2507 } | |
2508 } else if (is_midpoint_gt_even && | |
2509 (is_inexact_gt_midpoint0 | |
2510 || is_midpoint_lt_even0)) { | |
2511 // pulled up to a midpoint | |
2512 is_inexact_lt_midpoint = 1; | |
2513 is_inexact_gt_midpoint = 0; | |
2514 is_midpoint_lt_even = 0; | |
2515 is_midpoint_gt_even = 0; | |
2516 } else if (is_midpoint_lt_even && | |
2517 (is_inexact_lt_midpoint0 | |
2518 || is_midpoint_gt_even0)) { | |
2519 // pulled down to a midpoint | |
2520 is_inexact_lt_midpoint = 0; | |
2521 is_inexact_gt_midpoint = 1; | |
2522 is_midpoint_lt_even = 0; | |
2523 is_midpoint_gt_even = 0; | |
2524 } else { | |
2525 ; | |
2526 } | |
2527 // adjust exponent | |
2528 e3 = e3 + 1; | |
2529 if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
2530 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { | |
2531 if (is_midpoint_lt_even0 || is_midpoint_gt_even0 || | |
2532 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) { | |
2533 is_inexact_lt_midpoint = 1; | |
2534 } | |
2535 } | |
2536 } else { | |
2537 // this is the result rounded with unbounded exponent, unless a | |
2538 // correction is needed | |
2539 res.w[1] = res.w[1] & MASK_COEFF; | |
2540 if (lsb == 1) { | |
2541 if (is_midpoint_gt_even) { | |
2542 // res = res + 1 | |
2543 is_midpoint_gt_even = 0; | |
2544 is_midpoint_lt_even = 1; | |
2545 res.w[0]++; | |
2546 if (res.w[0] == 0x0) | |
2547 res.w[1]++; | |
2548 // check for rounding overflow | |
2549 if (res.w[1] == 0x0001ed09bead87c0ull && | |
2550 res.w[0] == 0x378d8e6400000000ull) { | |
2551 // res = 10^34 => rounding overflow | |
2552 res.w[1] = 0x0000314dc6448d93ull; | |
2553 res.w[0] = 0x38c15b0a00000000ull; // 10^33 | |
2554 e3++; | |
2555 } | |
2556 } else if (is_midpoint_lt_even) { | |
2557 // res = res - 1 | |
2558 is_midpoint_lt_even = 0; | |
2559 is_midpoint_gt_even = 1; | |
2560 res.w[0]--; | |
2561 if (res.w[0] == 0xffffffffffffffffull) | |
2562 res.w[1]--; | |
2563 // if the result is pure zero, the sign depends on the rounding | |
2564 // mode (x*y and z had opposite signs) | |
2565 if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) { | |
2566 if (rnd_mode != ROUNDING_DOWN) | |
2567 z_sign = 0x0000000000000000ull; | |
2568 else | |
2569 z_sign = 0x8000000000000000ull; | |
2570 // the exponent is max (e3, expmin) | |
2571 res.w[1] = 0x0; | |
2572 res.w[0] = 0x0; | |
2573 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
2574 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
2575 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
2576 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
2577 BID_SWAP128 (res); | |
2578 BID_RETURN (res) | |
2579 } | |
2580 } else { | |
2581 ; | |
2582 } | |
2583 } | |
2584 } | |
2585 } else { // if (z_sign != p_sign) | |
2586 lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4 | |
2587 // used to swap rounding indicators if p_sign != z_sign | |
2588 // the sum can result in [up to] p34 or p34 - 1 digits | |
2589 tmp64 = res.w[0]; | |
2590 res.w[0] = res.w[0] - R128.w[0]; | |
2591 res.w[1] = res.w[1] - R128.w[1]; | |
2592 if (res.w[0] > tmp64) | |
2593 res.w[1]--; // borrow | |
2594 // if res < 10^33 and exp > expmin need to decrease x0 and | |
2595 // increase scale by 1 | |
2596 if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull || | |
2597 (res.w[1] == 0x0000314dc6448d93ull && | |
2598 res.w[0] < 0x38c15b0a00000000ull)) || | |
2599 (is_inexact_lt_midpoint | |
2600 && res.w[1] == 0x0000314dc6448d93ull | |
2601 && res.w[0] == 0x38c15b0a00000000ull)) | |
2602 && x0 >= 1) { | |
2603 x0 = x0 - 1; | |
2604 // first restore e3, otherwise it will be too small | |
2605 e3 = e3 + scale; | |
2606 scale = scale + 1; | |
2607 is_inexact_lt_midpoint = 0; | |
2608 is_inexact_gt_midpoint = 0; | |
2609 is_midpoint_lt_even = 0; | |
2610 is_midpoint_gt_even = 0; | |
2611 incr_exp = 0; | |
2612 goto case2_repeat; | |
2613 } | |
2614 // else this is the result rounded with unbounded exponent; | |
2615 // because the result has opposite sign to that of C4 which was | |
2616 // rounded, need to change the rounding indicators | |
2617 if (is_inexact_lt_midpoint) { | |
2618 is_inexact_lt_midpoint = 0; | |
2619 is_inexact_gt_midpoint = 1; | |
2620 } else if (is_inexact_gt_midpoint) { | |
2621 is_inexact_gt_midpoint = 0; | |
2622 is_inexact_lt_midpoint = 1; | |
2623 } else if (lsb == 0) { | |
2624 if (is_midpoint_lt_even) { | |
2625 is_midpoint_lt_even = 0; | |
2626 is_midpoint_gt_even = 1; | |
2627 } else if (is_midpoint_gt_even) { | |
2628 is_midpoint_gt_even = 0; | |
2629 is_midpoint_lt_even = 1; | |
2630 } else { | |
2631 ; | |
2632 } | |
2633 } else if (lsb == 1) { | |
2634 if (is_midpoint_lt_even) { | |
2635 // res = res + 1 | |
2636 res.w[0]++; | |
2637 if (res.w[0] == 0x0) | |
2638 res.w[1]++; | |
2639 // check for rounding overflow | |
2640 if (res.w[1] == 0x0001ed09bead87c0ull && | |
2641 res.w[0] == 0x378d8e6400000000ull) { | |
2642 // res = 10^34 => rounding overflow | |
2643 res.w[1] = 0x0000314dc6448d93ull; | |
2644 res.w[0] = 0x38c15b0a00000000ull; // 10^33 | |
2645 e3++; | |
2646 } | |
2647 } else if (is_midpoint_gt_even) { | |
2648 // res = res - 1 | |
2649 res.w[0]--; | |
2650 if (res.w[0] == 0xffffffffffffffffull) | |
2651 res.w[1]--; | |
2652 // if the result is pure zero, the sign depends on the rounding | |
2653 // mode (x*y and z had opposite signs) | |
2654 if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) { | |
2655 if (rnd_mode != ROUNDING_DOWN) | |
2656 z_sign = 0x0000000000000000ull; | |
2657 else | |
2658 z_sign = 0x8000000000000000ull; | |
2659 // the exponent is max (e3, expmin) | |
2660 res.w[1] = 0x0; | |
2661 res.w[0] = 0x0; | |
2662 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
2663 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
2664 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
2665 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
2666 BID_SWAP128 (res); | |
2667 BID_RETURN (res) | |
2668 } | |
2669 } else { | |
2670 ; | |
2671 } | |
2672 } else { | |
2673 ; | |
2674 } | |
2675 } | |
2676 // check for underflow | |
2677 if (e3 == expmin) { // and if significand < 10^33 => result is tiny | |
2678 if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull || | |
2679 ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && | |
2680 res.w[0] < 0x38c15b0a00000000ull)) { | |
2681 is_tiny = 1; | |
2682 } | |
2683 } else if (e3 < expmin) { | |
2684 // the result is tiny, so we must truncate more of res | |
2685 is_tiny = 1; | |
2686 x0 = expmin - e3; | |
2687 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; | |
2688 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; | |
2689 is_midpoint_lt_even0 = is_midpoint_lt_even; | |
2690 is_midpoint_gt_even0 = is_midpoint_gt_even; | |
2691 is_inexact_lt_midpoint = 0; | |
2692 is_inexact_gt_midpoint = 0; | |
2693 is_midpoint_lt_even = 0; | |
2694 is_midpoint_gt_even = 0; | |
2695 // determine the number of decimal digits in res | |
2696 if (res.w[1] == 0x0) { | |
2697 // between 1 and 19 digits | |
2698 for (ind = 1; ind <= 19; ind++) { | |
2699 if (res.w[0] < ten2k64[ind]) { | |
2700 break; | |
2701 } | |
2702 } | |
2703 // ind digits | |
2704 } else if (res.w[1] < ten2k128[0].w[1] || | |
2705 (res.w[1] == ten2k128[0].w[1] | |
2706 && res.w[0] < ten2k128[0].w[0])) { | |
2707 // 20 digits | |
2708 ind = 20; | |
2709 } else { // between 21 and 38 digits | |
2710 for (ind = 1; ind <= 18; ind++) { | |
2711 if (res.w[1] < ten2k128[ind].w[1] || | |
2712 (res.w[1] == ten2k128[ind].w[1] && | |
2713 res.w[0] < ten2k128[ind].w[0])) { | |
2714 break; | |
2715 } | |
2716 } | |
2717 // ind + 20 digits | |
2718 ind = ind + 20; | |
2719 } | |
2720 | |
2721 // at this point ind >= x0; because delta >= 2 on this path, the case | |
2722 // ind = x0 can occur only in Case (2) or case (3), when C3 has one | |
2723 // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin), | |
2724 // the signs of x * y and z are opposite, and through cancellation | |
2725 // the most significant decimal digit in res has the weight | |
2726 // 10^(emin-1); however, it is clear that in this case the most | |
2727 // significant digit is 9, so the result before rounding is | |
2728 // 0.9... * 10^emin | |
2729 // Otherwise, ind > x0 because there are non-zero decimal digits in the | |
2730 // result with weight of at least 10^emin, and correction for underflow | |
2731 // can be carried out using the round*_*_2_* () routines | |
2732 if (x0 == ind) { // the result before rounding is 0.9... * 10^emin | |
2733 res.w[1] = 0x0; | |
2734 res.w[0] = 0x1; | |
2735 is_inexact_gt_midpoint = 1; | |
2736 } else if (ind <= 18) { // check that 2 <= ind | |
2737 // 2 <= ind <= 18, 1 <= x0 <= 17 | |
2738 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, | |
2739 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2740 &is_inexact_lt_midpoint, | |
2741 &is_inexact_gt_midpoint); | |
2742 if (incr_exp) { | |
2743 // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17 | |
2744 R64 = ten2k64[ind - x0]; | |
2745 } | |
2746 res.w[1] = 0; | |
2747 res.w[0] = R64; | |
2748 } else if (ind <= 38) { | |
2749 // 19 <= ind <= 38 | |
2750 P128.w[1] = res.w[1]; | |
2751 P128.w[0] = res.w[0]; | |
2752 round128_19_38 (ind, x0, P128, &res, &incr_exp, | |
2753 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2754 &is_inexact_lt_midpoint, | |
2755 &is_inexact_gt_midpoint); | |
2756 if (incr_exp) { | |
2757 // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37 | |
2758 if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19 | |
2759 res.w[0] = ten2k64[ind - x0]; | |
2760 // res.w[1] stays 0 | |
2761 } else { // 20 <= ind - x0 <= 37 | |
2762 res.w[0] = ten2k128[ind - x0 - 20].w[0]; | |
2763 res.w[1] = ten2k128[ind - x0 - 20].w[1]; | |
2764 } | |
2765 } | |
2766 } | |
2767 // avoid a double rounding error | |
2768 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && | |
2769 is_midpoint_lt_even) { // double rounding error upward | |
2770 // res = res - 1 | |
2771 res.w[0]--; | |
2772 if (res.w[0] == 0xffffffffffffffffull) | |
2773 res.w[1]--; | |
2774 // Note: a double rounding error upward is not possible; for this | |
2775 // the result after the first rounding would have to be 99...95 | |
2776 // (35 digits in all), possibly followed by a number of zeros; this | |
2777 // not possible in Cases (2)-(6) which may get here | |
2778 is_midpoint_lt_even = 0; | |
2779 is_inexact_lt_midpoint = 1; | |
2780 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && | |
2781 is_midpoint_gt_even) { // double rounding error downward | |
2782 // res = res + 1 | |
2783 res.w[0]++; | |
2784 if (res.w[0] == 0) | |
2785 res.w[1]++; | |
2786 is_midpoint_gt_even = 0; | |
2787 is_inexact_gt_midpoint = 1; | |
2788 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
2789 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { | |
2790 // if this second rounding was exact the result may still be | |
2791 // inexact because of the first rounding | |
2792 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { | |
2793 is_inexact_gt_midpoint = 1; | |
2794 } | |
2795 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { | |
2796 is_inexact_lt_midpoint = 1; | |
2797 } | |
2798 } else if (is_midpoint_gt_even && | |
2799 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { | |
2800 // pulled up to a midpoint | |
2801 is_inexact_lt_midpoint = 1; | |
2802 is_inexact_gt_midpoint = 0; | |
2803 is_midpoint_lt_even = 0; | |
2804 is_midpoint_gt_even = 0; | |
2805 } else if (is_midpoint_lt_even && | |
2806 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { | |
2807 // pulled down to a midpoint | |
2808 is_inexact_lt_midpoint = 0; | |
2809 is_inexact_gt_midpoint = 1; | |
2810 is_midpoint_lt_even = 0; | |
2811 is_midpoint_gt_even = 0; | |
2812 } else { | |
2813 ; | |
2814 } | |
2815 // adjust exponent | |
2816 e3 = e3 + x0; | |
2817 if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
2818 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { | |
2819 if (is_midpoint_lt_even0 || is_midpoint_gt_even0 || | |
2820 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) { | |
2821 is_inexact_lt_midpoint = 1; | |
2822 } | |
2823 } | |
2824 } else { | |
2825 ; // not underflow | |
2826 } | |
2827 // check for inexact result | |
2828 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || | |
2829 is_midpoint_lt_even || is_midpoint_gt_even) { | |
2830 // set the inexact flag | |
2831 *pfpsf |= INEXACT_EXCEPTION; | |
2832 if (is_tiny) | |
2833 *pfpsf |= UNDERFLOW_EXCEPTION; | |
2834 } | |
2835 // now check for significand = 10^34 (may have resulted from going | |
2836 // back to case2_repeat) | |
2837 if (res.w[1] == 0x0001ed09bead87c0ull && | |
2838 res.w[0] == 0x378d8e6400000000ull) { // if res = 10^34 | |
2839 res.w[1] = 0x0000314dc6448d93ull; // res = 10^33 | |
2840 res.w[0] = 0x38c15b0a00000000ull; | |
2841 e3 = e3 + 1; | |
2842 } | |
2843 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1]; | |
2844 // check for overflow | |
2845 if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) { | |
2846 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf | |
2847 res.w[0] = 0x0000000000000000ull; | |
2848 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
2849 } | |
2850 if (rnd_mode != ROUNDING_TO_NEAREST) { | |
2851 rounding_correction (rnd_mode, | |
2852 is_inexact_lt_midpoint, | |
2853 is_inexact_gt_midpoint, | |
2854 is_midpoint_lt_even, is_midpoint_gt_even, | |
2855 e3, &res, pfpsf); | |
2856 } | |
2857 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
2858 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
2859 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
2860 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
2861 BID_SWAP128 (res); | |
2862 BID_RETURN (res) | |
2863 | |
2864 } else { | |
2865 | |
2866 // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and | |
2867 // the signs of x*y and z are opposite; in these cases massive | |
2868 // cancellation can occur, so it is better to scale either C3 or C4 and | |
2869 // to perform the subtraction before rounding; rounding is performed | |
2870 // next, depending on the number of decimal digits in the result and on | |
2871 // the exponent value | |
2872 // Note: overlow is not possible in this case | |
2873 // this is similar to Cases (15), (16), and (17) | |
2874 | |
2875 if (delta + q4 < q3) { // from Case (6) | |
2876 // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and | |
2877 // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign) | |
2878 // and call add_and_round; delta stays positive | |
2879 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3 | |
2880 P128.w[1] = C3.w[1]; | |
2881 P128.w[0] = C3.w[0]; | |
2882 C3.w[1] = C4.w[1]; | |
2883 C3.w[0] = C4.w[0]; | |
2884 C4.w[1] = P128.w[1]; | |
2885 C4.w[0] = P128.w[0]; | |
2886 ind = q3; | |
2887 q3 = q4; | |
2888 q4 = ind; | |
2889 ind = e3; | |
2890 e3 = e4; | |
2891 e4 = ind; | |
2892 tmp_sign = z_sign; | |
2893 z_sign = p_sign; | |
2894 p_sign = tmp_sign; | |
2895 } else { // from Cases (2), (3), (4), (5) | |
2896 // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be | |
2897 // scaled up by q4 + delta - q3; this is the same as in Cases (15), | |
2898 // (16), and (17) if we just change the sign of delta | |
2899 delta = -delta; | |
2900 } | |
2901 add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4, | |
2902 rnd_mode, &is_midpoint_lt_even, | |
2903 &is_midpoint_gt_even, &is_inexact_lt_midpoint, | |
2904 &is_inexact_gt_midpoint, pfpsf, &res); | |
2905 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
2906 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
2907 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
2908 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
2909 BID_SWAP128 (res); | |
2910 BID_RETURN (res) | |
2911 | |
2912 } | |
2913 | |
2914 } else { // if delta < 0 | |
2915 | |
2916 delta = -delta; | |
2917 | |
2918 if (p34 < q4 && q4 <= delta) { // Case (7) | |
2919 | |
2920 // truncate C4 to p34 digits into res | |
2921 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68 | |
2922 x0 = q4 - p34; | |
2923 if (q4 <= 38) { | |
2924 P128.w[1] = C4.w[1]; | |
2925 P128.w[0] = C4.w[0]; | |
2926 round128_19_38 (q4, x0, P128, &res, &incr_exp, | |
2927 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2928 &is_inexact_lt_midpoint, | |
2929 &is_inexact_gt_midpoint); | |
2930 } else if (q4 <= 57) { // 35 <= q4 <= 57 | |
2931 P192.w[2] = C4.w[2]; | |
2932 P192.w[1] = C4.w[1]; | |
2933 P192.w[0] = C4.w[0]; | |
2934 round192_39_57 (q4, x0, P192, &R192, &incr_exp, | |
2935 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2936 &is_inexact_lt_midpoint, | |
2937 &is_inexact_gt_midpoint); | |
2938 res.w[0] = R192.w[0]; | |
2939 res.w[1] = R192.w[1]; | |
2940 } else { // if (q4 <= 68) | |
2941 round256_58_76 (q4, x0, C4, &R256, &incr_exp, | |
2942 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
2943 &is_inexact_lt_midpoint, | |
2944 &is_inexact_gt_midpoint); | |
2945 res.w[0] = R256.w[0]; | |
2946 res.w[1] = R256.w[1]; | |
2947 } | |
2948 e4 = e4 + x0; | |
2949 if (incr_exp) { | |
2950 e4 = e4 + 1; | |
2951 } | |
2952 if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
2953 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { | |
2954 // if C4 rounded to p34 digits is exact then the result is inexact, | |
2955 // in a way that depends on the signs of x * y and z | |
2956 if (p_sign == z_sign) { | |
2957 is_inexact_lt_midpoint = 1; | |
2958 } else { // if (p_sign != z_sign) | |
2959 if (res.w[1] != 0x0000314dc6448d93ull || | |
2960 res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33 | |
2961 is_inexact_gt_midpoint = 1; | |
2962 } else { // res = 10^33 and exact is a special case | |
2963 // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1 | |
2964 // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1 | |
2965 // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1 | |
2966 // Note: ulp is really ulp/10 (after borrow which propagates to msd) | |
2967 if (delta > p34 + 1) { // C3 < 1/2 | |
2968 // res = 10^33, unchanged | |
2969 is_inexact_gt_midpoint = 1; | |
2970 } else { // if (delta == p34 + 1) | |
2971 if (q3 <= 19) { | |
2972 if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp | |
2973 // res = 10^33, unchanged | |
2974 is_inexact_gt_midpoint = 1; | |
2975 } else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp | |
2976 // res = 10^33, unchanged | |
2977 is_midpoint_lt_even = 1; | |
2978 } else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp | |
2979 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 | |
2980 res.w[0] = 0x378d8e63ffffffffull; | |
2981 e4 = e4 - 1; | |
2982 is_inexact_lt_midpoint = 1; | |
2983 } | |
2984 } else { // if (20 <= q3 <=34) | |
2985 if (C3.w[1] < midpoint128[q3 - 20].w[1] || | |
2986 (C3.w[1] == midpoint128[q3 - 20].w[1] && | |
2987 C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp | |
2988 // res = 10^33, unchanged | |
2989 is_inexact_gt_midpoint = 1; | |
2990 } else if (C3.w[1] == midpoint128[q3 - 20].w[1] && | |
2991 C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp | |
2992 // res = 10^33, unchanged | |
2993 is_midpoint_lt_even = 1; | |
2994 } else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp | |
2995 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 | |
2996 res.w[0] = 0x378d8e63ffffffffull; | |
2997 e4 = e4 - 1; | |
2998 is_inexact_lt_midpoint = 1; | |
2999 } | |
3000 } | |
3001 } | |
3002 } | |
3003 } | |
3004 } else if (is_midpoint_lt_even) { | |
3005 if (z_sign != p_sign) { | |
3006 // needs correction: res = res - 1 | |
3007 res.w[0] = res.w[0] - 1; | |
3008 if (res.w[0] == 0xffffffffffffffffull) | |
3009 res.w[1]--; | |
3010 // if it is (10^33-1)*10^e4 then the corect result is | |
3011 // (10^34-1)*10(e4-1) | |
3012 if (res.w[1] == 0x0000314dc6448d93ull && | |
3013 res.w[0] == 0x38c15b09ffffffffull) { | |
3014 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 | |
3015 res.w[0] = 0x378d8e63ffffffffull; | |
3016 e4 = e4 - 1; | |
3017 } | |
3018 is_midpoint_lt_even = 0; | |
3019 is_inexact_lt_midpoint = 1; | |
3020 } else { // if (z_sign == p_sign) | |
3021 is_midpoint_lt_even = 0; | |
3022 is_inexact_gt_midpoint = 1; | |
3023 } | |
3024 } else if (is_midpoint_gt_even) { | |
3025 if (z_sign == p_sign) { | |
3026 // needs correction: res = res + 1 (cannot cross in the next binade) | |
3027 res.w[0] = res.w[0] + 1; | |
3028 if (res.w[0] == 0x0000000000000000ull) | |
3029 res.w[1]++; | |
3030 is_midpoint_gt_even = 0; | |
3031 is_inexact_gt_midpoint = 1; | |
3032 } else { // if (z_sign != p_sign) | |
3033 is_midpoint_gt_even = 0; | |
3034 is_inexact_lt_midpoint = 1; | |
3035 } | |
3036 } else { | |
3037 ; // the rounded result is already correct | |
3038 } | |
3039 // check for overflow | |
3040 if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) { | |
3041 res.w[1] = p_sign | 0x7800000000000000ull; | |
3042 res.w[0] = 0x0000000000000000ull; | |
3043 *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION); | |
3044 } else { // no overflow or not RN | |
3045 p_exp = ((UINT64) (e4 + 6176) << 49); | |
3046 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1]; | |
3047 } | |
3048 if (rnd_mode != ROUNDING_TO_NEAREST) { | |
3049 rounding_correction (rnd_mode, | |
3050 is_inexact_lt_midpoint, | |
3051 is_inexact_gt_midpoint, | |
3052 is_midpoint_lt_even, is_midpoint_gt_even, | |
3053 e4, &res, pfpsf); | |
3054 } | |
3055 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || | |
3056 is_midpoint_lt_even || is_midpoint_gt_even) { | |
3057 // set the inexact flag | |
3058 *pfpsf |= INEXACT_EXCEPTION; | |
3059 } | |
3060 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
3061 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
3062 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
3063 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
3064 BID_SWAP128 (res); | |
3065 BID_RETURN (res) | |
3066 | |
3067 } else if ((q4 <= p34 && p34 <= delta) || // Case (8) | |
3068 (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9) | |
3069 (q4 <= delta && delta + q3 <= p34) || // Case (10) | |
3070 (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13) | |
3071 (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14) | |
3072 (delta + q3 < q4 && q4 <= p34)) { // Case (18) | |
3073 | |
3074 // Case (8) is similar to Case (1), with C3 and C4 swapped | |
3075 // Case (9) is similar to Case (2), with C3 and C4 swapped | |
3076 // Case (10) is similar to Case (3), with C3 and C4 swapped | |
3077 // Case (13) is similar to Case (4), with C3 and C4 swapped | |
3078 // Case (14) is similar to Case (5), with C3 and C4 swapped | |
3079 // Case (18) is similar to Case (6), with C3 and C4 swapped | |
3080 | |
3081 // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp) | |
3082 // and go back to delta_ge_zero | |
3083 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3 | |
3084 P128.w[1] = C3.w[1]; | |
3085 P128.w[0] = C3.w[0]; | |
3086 C3.w[1] = C4.w[1]; | |
3087 C3.w[0] = C4.w[0]; | |
3088 C4.w[1] = P128.w[1]; | |
3089 C4.w[0] = P128.w[0]; | |
3090 ind = q3; | |
3091 q3 = q4; | |
3092 q4 = ind; | |
3093 ind = e3; | |
3094 e3 = e4; | |
3095 e4 = ind; | |
3096 tmp_sign = z_sign; | |
3097 z_sign = p_sign; | |
3098 p_sign = tmp_sign; | |
3099 tmp.ui64 = z_exp; | |
3100 z_exp = p_exp; | |
3101 p_exp = tmp.ui64; | |
3102 goto delta_ge_zero; | |
3103 | |
3104 } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11) | |
3105 (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12) | |
3106 | |
3107 // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3, | |
3108 // 1 <= x0 <= q3 - 1 <= p34 - 1 | |
3109 x0 = e4 - e3; // or x0 = delta + q3 - q4 | |
3110 if (q3 <= 18) { // 2 <= q3 <= 18 | |
3111 round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp, | |
3112 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
3113 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); | |
3114 // C3.w[1] = 0; | |
3115 C3.w[0] = R64; | |
3116 } else if (q3 <= 38) { | |
3117 round128_19_38 (q3, x0, C3, &R128, &incr_exp, | |
3118 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
3119 &is_inexact_lt_midpoint, | |
3120 &is_inexact_gt_midpoint); | |
3121 C3.w[1] = R128.w[1]; | |
3122 C3.w[0] = R128.w[0]; | |
3123 } | |
3124 // the rounded result has q3 - x0 digits | |
3125 // we want the exponent to be e4, so if incr_exp = 1 then | |
3126 // multiply the rounded result by 10 - it will still fit in 113 bits | |
3127 if (incr_exp) { | |
3128 // 64 x 128 -> 128 | |
3129 P128.w[1] = C3.w[1]; | |
3130 P128.w[0] = C3.w[0]; | |
3131 __mul_64x128_to_128 (C3, ten2k64[1], P128); | |
3132 } | |
3133 e3 = e3 + x0; // this is e4 | |
3134 // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3; | |
3135 // the result will have the sign of x * y; the exponent is e4 | |
3136 R256.w[3] = 0; | |
3137 R256.w[2] = 0; | |
3138 R256.w[1] = C3.w[1]; | |
3139 R256.w[0] = C3.w[0]; | |
3140 if (p_sign == z_sign) { // R256 = C4 + R256 | |
3141 add256 (C4, R256, &R256); | |
3142 } else { // if (p_sign != z_sign) { // R256 = C4 - R256 | |
3143 sub256 (C4, R256, &R256); // the result cannot be pure zero | |
3144 // because the result has opposite sign to that of R256 which was | |
3145 // rounded, need to change the rounding indicators | |
3146 lsb = C4.w[0] & 0x01; | |
3147 if (is_inexact_lt_midpoint) { | |
3148 is_inexact_lt_midpoint = 0; | |
3149 is_inexact_gt_midpoint = 1; | |
3150 } else if (is_inexact_gt_midpoint) { | |
3151 is_inexact_gt_midpoint = 0; | |
3152 is_inexact_lt_midpoint = 1; | |
3153 } else if (lsb == 0) { | |
3154 if (is_midpoint_lt_even) { | |
3155 is_midpoint_lt_even = 0; | |
3156 is_midpoint_gt_even = 1; | |
3157 } else if (is_midpoint_gt_even) { | |
3158 is_midpoint_gt_even = 0; | |
3159 is_midpoint_lt_even = 1; | |
3160 } else { | |
3161 ; | |
3162 } | |
3163 } else if (lsb == 1) { | |
3164 if (is_midpoint_lt_even) { | |
3165 // res = res + 1 | |
3166 R256.w[0]++; | |
3167 if (R256.w[0] == 0x0) { | |
3168 R256.w[1]++; | |
3169 if (R256.w[1] == 0x0) { | |
3170 R256.w[2]++; | |
3171 if (R256.w[2] == 0x0) { | |
3172 R256.w[3]++; | |
3173 } | |
3174 } | |
3175 } | |
3176 // no check for rounding overflow - R256 was a difference | |
3177 } else if (is_midpoint_gt_even) { | |
3178 // res = res - 1 | |
3179 R256.w[0]--; | |
3180 if (R256.w[0] == 0xffffffffffffffffull) { | |
3181 R256.w[1]--; | |
3182 if (R256.w[1] == 0xffffffffffffffffull) { | |
3183 R256.w[2]--; | |
3184 if (R256.w[2] == 0xffffffffffffffffull) { | |
3185 R256.w[3]--; | |
3186 } | |
3187 } | |
3188 } | |
3189 } else { | |
3190 ; | |
3191 } | |
3192 } else { | |
3193 ; | |
3194 } | |
3195 } | |
3196 // determine the number of decimal digits in R256 | |
3197 ind = nr_digits256 (R256); // ind >= p34 | |
3198 // if R256 is sum, then ind > p34; if R256 is a difference, then | |
3199 // ind >= p34; this means that we can calculate the result rounded to | |
3200 // the destination precision, with unbounded exponent, starting from R256 | |
3201 // and using the indicators from the rounding of C3 to avoid a double | |
3202 // rounding error | |
3203 | |
3204 if (ind < p34) { | |
3205 ; | |
3206 } else if (ind == p34) { | |
3207 // the result rounded to the destination precision with | |
3208 // unbounded exponent | |
3209 // is (-1)^p_sign * R256 * 10^e4 | |
3210 res.w[1] = R256.w[1]; | |
3211 res.w[0] = R256.w[0]; | |
3212 } else { // if (ind > p34) | |
3213 // if more than P digits, round to nearest to P digits | |
3214 // round R256 to p34 digits | |
3215 x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68 | |
3216 // save C3 rounding indicators to help avoid double rounding error | |
3217 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; | |
3218 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; | |
3219 is_midpoint_lt_even0 = is_midpoint_lt_even; | |
3220 is_midpoint_gt_even0 = is_midpoint_gt_even; | |
3221 // initialize rounding indicators | |
3222 is_inexact_lt_midpoint = 0; | |
3223 is_inexact_gt_midpoint = 0; | |
3224 is_midpoint_lt_even = 0; | |
3225 is_midpoint_gt_even = 0; | |
3226 // round to p34 digits; the result fits in 113 bits | |
3227 if (ind <= 38) { | |
3228 P128.w[1] = R256.w[1]; | |
3229 P128.w[0] = R256.w[0]; | |
3230 round128_19_38 (ind, x0, P128, &R128, &incr_exp, | |
3231 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
3232 &is_inexact_lt_midpoint, | |
3233 &is_inexact_gt_midpoint); | |
3234 } else if (ind <= 57) { | |
3235 P192.w[2] = R256.w[2]; | |
3236 P192.w[1] = R256.w[1]; | |
3237 P192.w[0] = R256.w[0]; | |
3238 round192_39_57 (ind, x0, P192, &R192, &incr_exp, | |
3239 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
3240 &is_inexact_lt_midpoint, | |
3241 &is_inexact_gt_midpoint); | |
3242 R128.w[1] = R192.w[1]; | |
3243 R128.w[0] = R192.w[0]; | |
3244 } else { // if (ind <= 68) | |
3245 round256_58_76 (ind, x0, R256, &R256, &incr_exp, | |
3246 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
3247 &is_inexact_lt_midpoint, | |
3248 &is_inexact_gt_midpoint); | |
3249 R128.w[1] = R256.w[1]; | |
3250 R128.w[0] = R256.w[0]; | |
3251 } | |
3252 // the rounded result has p34 = 34 digits | |
3253 e4 = e4 + x0 + incr_exp; | |
3254 | |
3255 res.w[1] = R128.w[1]; | |
3256 res.w[0] = R128.w[0]; | |
3257 | |
3258 // avoid a double rounding error | |
3259 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && | |
3260 is_midpoint_lt_even) { // double rounding error upward | |
3261 // res = res - 1 | |
3262 res.w[0]--; | |
3263 if (res.w[0] == 0xffffffffffffffffull) | |
3264 res.w[1]--; | |
3265 is_midpoint_lt_even = 0; | |
3266 is_inexact_lt_midpoint = 1; | |
3267 // Note: a double rounding error upward is not possible; for this | |
3268 // the result after the first rounding would have to be 99...95 | |
3269 // (35 digits in all), possibly followed by a number of zeros; this | |
3270 // not possible in Cases (2)-(6) or (15)-(17) which may get here | |
3271 // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent | |
3272 if (res.w[1] == 0x0000314dc6448d93ull && | |
3273 res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1 | |
3274 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1 | |
3275 res.w[0] = 0x378d8e63ffffffffull; | |
3276 e4--; | |
3277 } | |
3278 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && | |
3279 is_midpoint_gt_even) { // double rounding error downward | |
3280 // res = res + 1 | |
3281 res.w[0]++; | |
3282 if (res.w[0] == 0) | |
3283 res.w[1]++; | |
3284 is_midpoint_gt_even = 0; | |
3285 is_inexact_gt_midpoint = 1; | |
3286 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
3287 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { | |
3288 // if this second rounding was exact the result may still be | |
3289 // inexact because of the first rounding | |
3290 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { | |
3291 is_inexact_gt_midpoint = 1; | |
3292 } | |
3293 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { | |
3294 is_inexact_lt_midpoint = 1; | |
3295 } | |
3296 } else if (is_midpoint_gt_even && | |
3297 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { | |
3298 // pulled up to a midpoint | |
3299 is_inexact_lt_midpoint = 1; | |
3300 is_inexact_gt_midpoint = 0; | |
3301 is_midpoint_lt_even = 0; | |
3302 is_midpoint_gt_even = 0; | |
3303 } else if (is_midpoint_lt_even && | |
3304 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { | |
3305 // pulled down to a midpoint | |
3306 is_inexact_lt_midpoint = 0; | |
3307 is_inexact_gt_midpoint = 1; | |
3308 is_midpoint_lt_even = 0; | |
3309 is_midpoint_gt_even = 0; | |
3310 } else { | |
3311 ; | |
3312 } | |
3313 } | |
3314 | |
3315 // determine tininess | |
3316 if (rnd_mode == ROUNDING_TO_NEAREST) { | |
3317 if (e4 < expmin) { | |
3318 is_tiny = 1; // for other rounding modes apply correction | |
3319 } | |
3320 } else { | |
3321 // for RM, RP, RZ, RA apply correction in order to determine tininess | |
3322 // but do not save the result; apply the correction to | |
3323 // (-1)^p_sign * res * 10^0 | |
3324 P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1]; | |
3325 P128.w[0] = res.w[0]; | |
3326 rounding_correction (rnd_mode, | |
3327 is_inexact_lt_midpoint, | |
3328 is_inexact_gt_midpoint, | |
3329 is_midpoint_lt_even, is_midpoint_gt_even, | |
3330 0, &P128, pfpsf); | |
3331 scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1 | |
3332 // the number of digits in the significand is p34 = 34 | |
3333 if (e4 + scale < expmin) { | |
3334 is_tiny = 1; | |
3335 } | |
3336 } | |
3337 | |
3338 // the result rounded to the destination precision with unbounded exponent | |
3339 // is (-1)^p_sign * res * 10^e4 | |
3340 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN | |
3341 // res.w[0] unchanged; | |
3342 // Note: res is correct only if expmin <= e4 <= expmax | |
3343 ind = p34; // the number of decimal digits in the signifcand of res | |
3344 | |
3345 // at this point we have the result rounded with unbounded exponent in | |
3346 // res and we know its tininess: | |
3347 // res = (-1)^p_sign * significand * 10^e4, | |
3348 // where q (significand) = ind = p34 | |
3349 // Note: res is correct only if expmin <= e4 <= expmax | |
3350 | |
3351 // check for overflow if RN | |
3352 if (rnd_mode == ROUNDING_TO_NEAREST | |
3353 && (ind + e4) > (p34 + expmax)) { | |
3354 res.w[1] = p_sign | 0x7800000000000000ull; | |
3355 res.w[0] = 0x0000000000000000ull; | |
3356 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
3357 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
3358 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
3359 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
3360 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
3361 BID_SWAP128 (res); | |
3362 BID_RETURN (res) | |
3363 } // else not overflow or not RN, so continue | |
3364 | |
3365 // from this point on this is similar to the last part of the computation | |
3366 // for Cases (15), (16), (17) | |
3367 | |
3368 // if (e4 >= expmin) we have the result rounded with bounded exponent | |
3369 if (e4 < expmin) { | |
3370 x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res | |
3371 // where the result rounded [at most] once is | |
3372 // (-1)^p_sign * significand_res * 10^e4 | |
3373 | |
3374 // avoid double rounding error | |
3375 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; | |
3376 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; | |
3377 is_midpoint_lt_even0 = is_midpoint_lt_even; | |
3378 is_midpoint_gt_even0 = is_midpoint_gt_even; | |
3379 is_inexact_lt_midpoint = 0; | |
3380 is_inexact_gt_midpoint = 0; | |
3381 is_midpoint_lt_even = 0; | |
3382 is_midpoint_gt_even = 0; | |
3383 | |
3384 if (x0 > ind) { | |
3385 // nothing is left of res when moving the decimal point left x0 digits | |
3386 is_inexact_lt_midpoint = 1; | |
3387 res.w[1] = p_sign | 0x0000000000000000ull; | |
3388 res.w[0] = 0x0000000000000000ull; | |
3389 e4 = expmin; | |
3390 } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34 | |
3391 // this is <, =, or > 1/2 ulp | |
3392 // compare the ind-digit value in the significand of res with | |
3393 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is | |
3394 // less than, equal to, or greater than 1/2 ulp (significand of res) | |
3395 R128.w[1] = res.w[1] & MASK_COEFF; | |
3396 R128.w[0] = res.w[0]; | |
3397 if (ind <= 19) { | |
3398 if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp | |
3399 lt_half_ulp = 1; | |
3400 is_inexact_lt_midpoint = 1; | |
3401 } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp | |
3402 eq_half_ulp = 1; | |
3403 is_midpoint_gt_even = 1; | |
3404 } else { // > 1/2 ulp | |
3405 gt_half_ulp = 1; | |
3406 is_inexact_gt_midpoint = 1; | |
3407 } | |
3408 } else { // if (ind <= 38) | |
3409 if (R128.w[1] < midpoint128[ind - 20].w[1] || | |
3410 (R128.w[1] == midpoint128[ind - 20].w[1] && | |
3411 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp | |
3412 lt_half_ulp = 1; | |
3413 is_inexact_lt_midpoint = 1; | |
3414 } else if (R128.w[1] == midpoint128[ind - 20].w[1] && | |
3415 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp | |
3416 eq_half_ulp = 1; | |
3417 is_midpoint_gt_even = 1; | |
3418 } else { // > 1/2 ulp | |
3419 gt_half_ulp = 1; | |
3420 is_inexact_gt_midpoint = 1; | |
3421 } | |
3422 } | |
3423 if (lt_half_ulp || eq_half_ulp) { | |
3424 // res = +0.0 * 10^expmin | |
3425 res.w[1] = 0x0000000000000000ull; | |
3426 res.w[0] = 0x0000000000000000ull; | |
3427 } else { // if (gt_half_ulp) | |
3428 // res = +1 * 10^expmin | |
3429 res.w[1] = 0x0000000000000000ull; | |
3430 res.w[0] = 0x0000000000000001ull; | |
3431 } | |
3432 res.w[1] = p_sign | res.w[1]; | |
3433 e4 = expmin; | |
3434 } else { // if (1 <= x0 <= ind - 1 <= 33) | |
3435 // round the ind-digit result to ind - x0 digits | |
3436 | |
3437 if (ind <= 18) { // 2 <= ind <= 18 | |
3438 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp, | |
3439 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
3440 &is_inexact_lt_midpoint, | |
3441 &is_inexact_gt_midpoint); | |
3442 res.w[1] = 0x0; | |
3443 res.w[0] = R64; | |
3444 } else if (ind <= 38) { | |
3445 P128.w[1] = res.w[1] & MASK_COEFF; | |
3446 P128.w[0] = res.w[0]; | |
3447 round128_19_38 (ind, x0, P128, &res, &incr_exp, | |
3448 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
3449 &is_inexact_lt_midpoint, | |
3450 &is_inexact_gt_midpoint); | |
3451 } | |
3452 e4 = e4 + x0; // expmin | |
3453 // we want the exponent to be expmin, so if incr_exp = 1 then | |
3454 // multiply the rounded result by 10 - it will still fit in 113 bits | |
3455 if (incr_exp) { | |
3456 // 64 x 128 -> 128 | |
3457 P128.w[1] = res.w[1] & MASK_COEFF; | |
3458 P128.w[0] = res.w[0]; | |
3459 __mul_64x128_to_128 (res, ten2k64[1], P128); | |
3460 } | |
3461 res.w[1] = | |
3462 p_sign | ((UINT64) (e4 + 6176) << 49) | (res. | |
3463 w[1] & MASK_COEFF); | |
3464 // avoid a double rounding error | |
3465 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && | |
3466 is_midpoint_lt_even) { // double rounding error upward | |
3467 // res = res - 1 | |
3468 res.w[0]--; | |
3469 if (res.w[0] == 0xffffffffffffffffull) | |
3470 res.w[1]--; | |
3471 // Note: a double rounding error upward is not possible; for this | |
3472 // the result after the first rounding would have to be 99...95 | |
3473 // (35 digits in all), possibly followed by a number of zeros; this | |
3474 // not possible in this underflow case | |
3475 is_midpoint_lt_even = 0; | |
3476 is_inexact_lt_midpoint = 1; | |
3477 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && | |
3478 is_midpoint_gt_even) { // double rounding error downward | |
3479 // res = res + 1 | |
3480 res.w[0]++; | |
3481 if (res.w[0] == 0) | |
3482 res.w[1]++; | |
3483 is_midpoint_gt_even = 0; | |
3484 is_inexact_gt_midpoint = 1; | |
3485 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
3486 !is_inexact_lt_midpoint | |
3487 && !is_inexact_gt_midpoint) { | |
3488 // if this second rounding was exact the result may still be | |
3489 // inexact because of the first rounding | |
3490 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { | |
3491 is_inexact_gt_midpoint = 1; | |
3492 } | |
3493 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { | |
3494 is_inexact_lt_midpoint = 1; | |
3495 } | |
3496 } else if (is_midpoint_gt_even && | |
3497 (is_inexact_gt_midpoint0 | |
3498 || is_midpoint_lt_even0)) { | |
3499 // pulled up to a midpoint | |
3500 is_inexact_lt_midpoint = 1; | |
3501 is_inexact_gt_midpoint = 0; | |
3502 is_midpoint_lt_even = 0; | |
3503 is_midpoint_gt_even = 0; | |
3504 } else if (is_midpoint_lt_even && | |
3505 (is_inexact_lt_midpoint0 | |
3506 || is_midpoint_gt_even0)) { | |
3507 // pulled down to a midpoint | |
3508 is_inexact_lt_midpoint = 0; | |
3509 is_inexact_gt_midpoint = 1; | |
3510 is_midpoint_lt_even = 0; | |
3511 is_midpoint_gt_even = 0; | |
3512 } else { | |
3513 ; | |
3514 } | |
3515 } | |
3516 } | |
3517 // res contains the correct result | |
3518 // apply correction if not rounding to nearest | |
3519 if (rnd_mode != ROUNDING_TO_NEAREST) { | |
3520 rounding_correction (rnd_mode, | |
3521 is_inexact_lt_midpoint, | |
3522 is_inexact_gt_midpoint, | |
3523 is_midpoint_lt_even, is_midpoint_gt_even, | |
3524 e4, &res, pfpsf); | |
3525 } | |
3526 if (is_midpoint_lt_even || is_midpoint_gt_even || | |
3527 is_inexact_lt_midpoint || is_inexact_gt_midpoint) { | |
3528 // set the inexact flag | |
3529 *pfpsf |= INEXACT_EXCEPTION; | |
3530 if (is_tiny) | |
3531 *pfpsf |= UNDERFLOW_EXCEPTION; | |
3532 } | |
3533 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
3534 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
3535 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
3536 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
3537 BID_SWAP128 (res); | |
3538 BID_RETURN (res) | |
3539 | |
3540 } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15) | |
3541 (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16) | |
3542 (delta + q3 <= p34 && p34 < q4)) { // Case (17) | |
3543 | |
3544 // calculate first the result rounded to the destination precision, with | |
3545 // unbounded exponent | |
3546 | |
3547 add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4, | |
3548 rnd_mode, &is_midpoint_lt_even, | |
3549 &is_midpoint_gt_even, &is_inexact_lt_midpoint, | |
3550 &is_inexact_gt_midpoint, pfpsf, &res); | |
3551 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
3552 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
3553 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
3554 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
3555 BID_SWAP128 (res); | |
3556 BID_RETURN (res) | |
3557 | |
3558 } else { | |
3559 ; | |
3560 } | |
3561 | |
3562 } // end if delta < 0 | |
3563 | |
3564 *ptr_is_midpoint_lt_even = is_midpoint_lt_even; | |
3565 *ptr_is_midpoint_gt_even = is_midpoint_gt_even; | |
3566 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint; | |
3567 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint; | |
3568 BID_SWAP128 (res); | |
3569 BID_RETURN (res) | |
3570 | |
3571 } | |
3572 | |
3573 | |
3574 #if DECIMAL_CALL_BY_REFERENCE | |
3575 void | |
3576 bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz | |
3577 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3578 _EXC_INFO_PARAM) { | |
3579 UINT128 x = *px, y = *py, z = *pz; | |
3580 #if !DECIMAL_GLOBAL_ROUNDING | |
3581 unsigned int rnd_mode = *prnd_mode; | |
3582 #endif | |
3583 #else | |
3584 UINT128 | |
3585 bid128_fma (UINT128 x, UINT128 y, UINT128 z | |
3586 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3587 _EXC_INFO_PARAM) { | |
3588 #endif | |
3589 int is_midpoint_lt_even, is_midpoint_gt_even, | |
3590 is_inexact_lt_midpoint, is_inexact_gt_midpoint; | |
3591 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
3592 | |
3593 #if DECIMAL_CALL_BY_REFERENCE | |
3594 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3595 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, | |
3596 &res, &x, &y, &z | |
3597 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3598 _EXC_INFO_ARG); | |
3599 #else | |
3600 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3601 &is_inexact_lt_midpoint, | |
3602 &is_inexact_gt_midpoint, x, y, | |
3603 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3604 _EXC_INFO_ARG); | |
3605 #endif | |
3606 BID_RETURN (res); | |
3607 } | |
3608 | |
3609 | |
3610 #if DECIMAL_CALL_BY_REFERENCE | |
3611 void | |
3612 bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz | |
3613 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3614 _EXC_INFO_PARAM) { | |
3615 UINT64 x = *px, y = *py, z = *pz; | |
3616 #if !DECIMAL_GLOBAL_ROUNDING | |
3617 unsigned int rnd_mode = *prnd_mode; | |
3618 #endif | |
3619 #else | |
3620 UINT128 | |
3621 bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z | |
3622 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3623 _EXC_INFO_PARAM) { | |
3624 #endif | |
3625 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, | |
3626 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; | |
3627 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
3628 UINT128 x1, y1, z1; | |
3629 | |
3630 #if DECIMAL_CALL_BY_REFERENCE | |
3631 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3632 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3633 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3634 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3635 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, | |
3636 &res, &x1, &y1, &z1 | |
3637 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3638 _EXC_INFO_ARG); | |
3639 #else | |
3640 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3641 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3642 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3643 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3644 &is_inexact_lt_midpoint, | |
3645 &is_inexact_gt_midpoint, x1, y1, | |
3646 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3647 _EXC_INFO_ARG); | |
3648 #endif | |
3649 BID_RETURN (res); | |
3650 } | |
3651 | |
3652 | |
3653 #if DECIMAL_CALL_BY_REFERENCE | |
3654 void | |
3655 bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz | |
3656 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3657 _EXC_INFO_PARAM) { | |
3658 UINT64 x = *px, y = *py; | |
3659 UINT128 z = *pz; | |
3660 #if !DECIMAL_GLOBAL_ROUNDING | |
3661 unsigned int rnd_mode = *prnd_mode; | |
3662 #endif | |
3663 #else | |
3664 UINT128 | |
3665 bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z | |
3666 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3667 _EXC_INFO_PARAM) { | |
3668 #endif | |
3669 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, | |
3670 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; | |
3671 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
3672 UINT128 x1, y1; | |
3673 | |
3674 #if DECIMAL_CALL_BY_REFERENCE | |
3675 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3676 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3677 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3678 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, | |
3679 &res, &x1, &y1, &z | |
3680 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3681 _EXC_INFO_ARG); | |
3682 #else | |
3683 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3684 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3685 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3686 &is_inexact_lt_midpoint, | |
3687 &is_inexact_gt_midpoint, x1, y1, | |
3688 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3689 _EXC_INFO_ARG); | |
3690 #endif | |
3691 BID_RETURN (res); | |
3692 } | |
3693 | |
3694 | |
3695 #if DECIMAL_CALL_BY_REFERENCE | |
3696 void | |
3697 bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz | |
3698 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3699 _EXC_INFO_PARAM) { | |
3700 UINT64 x = *px, z = *pz; | |
3701 #if !DECIMAL_GLOBAL_ROUNDING | |
3702 unsigned int rnd_mode = *prnd_mode; | |
3703 #endif | |
3704 #else | |
3705 UINT128 | |
3706 bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z | |
3707 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3708 _EXC_INFO_PARAM) { | |
3709 #endif | |
3710 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, | |
3711 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; | |
3712 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
3713 UINT128 x1, z1; | |
3714 | |
3715 #if DECIMAL_CALL_BY_REFERENCE | |
3716 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3717 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3718 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3719 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, | |
3720 &res, &x1, py, &z1 | |
3721 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3722 _EXC_INFO_ARG); | |
3723 #else | |
3724 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3725 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3726 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3727 &is_inexact_lt_midpoint, | |
3728 &is_inexact_gt_midpoint, x1, y, | |
3729 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3730 _EXC_INFO_ARG); | |
3731 #endif | |
3732 BID_RETURN (res); | |
3733 } | |
3734 | |
3735 | |
3736 #if DECIMAL_CALL_BY_REFERENCE | |
3737 void | |
3738 bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz | |
3739 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3740 _EXC_INFO_PARAM) { | |
3741 UINT64 x = *px; | |
3742 #if !DECIMAL_GLOBAL_ROUNDING | |
3743 unsigned int rnd_mode = *prnd_mode; | |
3744 #endif | |
3745 #else | |
3746 UINT128 | |
3747 bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z | |
3748 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3749 _EXC_INFO_PARAM) { | |
3750 #endif | |
3751 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, | |
3752 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; | |
3753 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
3754 UINT128 x1; | |
3755 | |
3756 #if DECIMAL_CALL_BY_REFERENCE | |
3757 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3758 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3759 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, | |
3760 &res, &x1, py, pz | |
3761 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3762 _EXC_INFO_ARG); | |
3763 #else | |
3764 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3765 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3766 &is_inexact_lt_midpoint, | |
3767 &is_inexact_gt_midpoint, x1, y, | |
3768 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3769 _EXC_INFO_ARG); | |
3770 #endif | |
3771 BID_RETURN (res); | |
3772 } | |
3773 | |
3774 | |
3775 #if DECIMAL_CALL_BY_REFERENCE | |
3776 void | |
3777 bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz | |
3778 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3779 _EXC_INFO_PARAM) { | |
3780 UINT64 y = *py, z = *pz; | |
3781 #if !DECIMAL_GLOBAL_ROUNDING | |
3782 unsigned int rnd_mode = *prnd_mode; | |
3783 #endif | |
3784 #else | |
3785 UINT128 | |
3786 bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z | |
3787 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3788 _EXC_INFO_PARAM) { | |
3789 #endif | |
3790 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, | |
3791 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; | |
3792 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
3793 UINT128 y1, z1; | |
3794 | |
3795 #if DECIMAL_CALL_BY_REFERENCE | |
3796 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3797 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3798 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3799 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, | |
3800 &res, px, &y1, &z1 | |
3801 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3802 _EXC_INFO_ARG); | |
3803 #else | |
3804 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3805 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3806 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3807 &is_inexact_lt_midpoint, | |
3808 &is_inexact_gt_midpoint, x, y1, | |
3809 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3810 _EXC_INFO_ARG); | |
3811 #endif | |
3812 BID_RETURN (res); | |
3813 } | |
3814 | |
3815 | |
3816 #if DECIMAL_CALL_BY_REFERENCE | |
3817 void | |
3818 bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz | |
3819 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3820 _EXC_INFO_PARAM) { | |
3821 UINT64 y = *py; | |
3822 #if !DECIMAL_GLOBAL_ROUNDING | |
3823 unsigned int rnd_mode = *prnd_mode; | |
3824 #endif | |
3825 #else | |
3826 UINT128 | |
3827 bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z | |
3828 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3829 _EXC_INFO_PARAM) { | |
3830 #endif | |
3831 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, | |
3832 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; | |
3833 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
3834 UINT128 y1; | |
3835 | |
3836 #if DECIMAL_CALL_BY_REFERENCE | |
3837 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3838 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3839 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, | |
3840 &res, px, &y1, pz | |
3841 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3842 _EXC_INFO_ARG); | |
3843 #else | |
3844 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3845 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3846 &is_inexact_lt_midpoint, | |
3847 &is_inexact_gt_midpoint, x, y1, | |
3848 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3849 _EXC_INFO_ARG); | |
3850 #endif | |
3851 BID_RETURN (res); | |
3852 } | |
3853 | |
3854 | |
3855 #if DECIMAL_CALL_BY_REFERENCE | |
3856 void | |
3857 bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz | |
3858 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3859 _EXC_INFO_PARAM) { | |
3860 UINT64 z = *pz; | |
3861 #if !DECIMAL_GLOBAL_ROUNDING | |
3862 unsigned int rnd_mode = *prnd_mode; | |
3863 #endif | |
3864 #else | |
3865 UINT128 | |
3866 bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z | |
3867 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3868 _EXC_INFO_PARAM) { | |
3869 #endif | |
3870 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, | |
3871 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; | |
3872 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
3873 UINT128 z1; | |
3874 | |
3875 #if DECIMAL_CALL_BY_REFERENCE | |
3876 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3877 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3878 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint, | |
3879 &res, px, py, &z1 | |
3880 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3881 _EXC_INFO_ARG); | |
3882 #else | |
3883 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3884 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even, | |
3885 &is_inexact_lt_midpoint, | |
3886 &is_inexact_gt_midpoint, x, y, | |
3887 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3888 _EXC_INFO_ARG); | |
3889 #endif | |
3890 BID_RETURN (res); | |
3891 } | |
3892 | |
3893 // Note: bid128qqq_fma is represented by bid128_fma | |
3894 | |
3895 // Note: bid64ddd_fma is represented by bid64_fma | |
3896 | |
3897 #if DECIMAL_CALL_BY_REFERENCE | |
3898 void | |
3899 bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz | |
3900 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3901 _EXC_INFO_PARAM) { | |
3902 UINT64 x = *px, y = *py; | |
3903 #if !DECIMAL_GLOBAL_ROUNDING | |
3904 unsigned int rnd_mode = *prnd_mode; | |
3905 #endif | |
3906 #else | |
3907 UINT64 | |
3908 bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z | |
3909 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3910 _EXC_INFO_PARAM) { | |
3911 #endif | |
3912 UINT64 res1 = 0xbaddbaddbaddbaddull; | |
3913 UINT128 x1, y1; | |
3914 | |
3915 #if DECIMAL_CALL_BY_REFERENCE | |
3916 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3917 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3918 bid64qqq_fma (&res1, &x1, &y1, pz | |
3919 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3920 _EXC_INFO_ARG); | |
3921 #else | |
3922 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3923 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3924 res1 = bid64qqq_fma (x1, y1, z | |
3925 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3926 _EXC_INFO_ARG); | |
3927 #endif | |
3928 BID_RETURN (res1); | |
3929 } | |
3930 | |
3931 | |
3932 #if DECIMAL_CALL_BY_REFERENCE | |
3933 void | |
3934 bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz | |
3935 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3936 _EXC_INFO_PARAM) { | |
3937 UINT64 x = *px, z = *pz; | |
3938 #if !DECIMAL_GLOBAL_ROUNDING | |
3939 unsigned int rnd_mode = *prnd_mode; | |
3940 #endif | |
3941 #else | |
3942 UINT64 | |
3943 bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z | |
3944 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3945 _EXC_INFO_PARAM) { | |
3946 #endif | |
3947 UINT64 res1 = 0xbaddbaddbaddbaddull; | |
3948 UINT128 x1, z1; | |
3949 | |
3950 #if DECIMAL_CALL_BY_REFERENCE | |
3951 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3952 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3953 bid64qqq_fma (&res1, &x1, py, &z1 | |
3954 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3955 _EXC_INFO_ARG); | |
3956 #else | |
3957 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3958 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3959 res1 = bid64qqq_fma (x1, y, z1 | |
3960 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3961 _EXC_INFO_ARG); | |
3962 #endif | |
3963 BID_RETURN (res1); | |
3964 } | |
3965 | |
3966 | |
3967 #if DECIMAL_CALL_BY_REFERENCE | |
3968 void | |
3969 bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz | |
3970 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3971 _EXC_INFO_PARAM) { | |
3972 UINT64 x = *px; | |
3973 #if !DECIMAL_GLOBAL_ROUNDING | |
3974 unsigned int rnd_mode = *prnd_mode; | |
3975 #endif | |
3976 #else | |
3977 UINT64 | |
3978 bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z | |
3979 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
3980 _EXC_INFO_PARAM) { | |
3981 #endif | |
3982 UINT64 res1 = 0xbaddbaddbaddbaddull; | |
3983 UINT128 x1; | |
3984 | |
3985 #if DECIMAL_CALL_BY_REFERENCE | |
3986 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3987 bid64qqq_fma (&res1, &x1, py, pz | |
3988 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3989 _EXC_INFO_ARG); | |
3990 #else | |
3991 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
3992 res1 = bid64qqq_fma (x1, y, z | |
3993 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
3994 _EXC_INFO_ARG); | |
3995 #endif | |
3996 BID_RETURN (res1); | |
3997 } | |
3998 | |
3999 | |
4000 #if DECIMAL_CALL_BY_REFERENCE | |
4001 void | |
4002 bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz | |
4003 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
4004 _EXC_INFO_PARAM) { | |
4005 UINT64 y = *py, z = *pz; | |
4006 #if !DECIMAL_GLOBAL_ROUNDING | |
4007 unsigned int rnd_mode = *prnd_mode; | |
4008 #endif | |
4009 #else | |
4010 UINT64 | |
4011 bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z | |
4012 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
4013 _EXC_INFO_PARAM) { | |
4014 #endif | |
4015 UINT64 res1 = 0xbaddbaddbaddbaddull; | |
4016 UINT128 y1, z1; | |
4017 | |
4018 #if DECIMAL_CALL_BY_REFERENCE | |
4019 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
4020 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
4021 bid64qqq_fma (&res1, px, &y1, &z1 | |
4022 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
4023 _EXC_INFO_ARG); | |
4024 #else | |
4025 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
4026 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
4027 res1 = bid64qqq_fma (x, y1, z1 | |
4028 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
4029 _EXC_INFO_ARG); | |
4030 #endif | |
4031 BID_RETURN (res1); | |
4032 } | |
4033 | |
4034 | |
4035 #if DECIMAL_CALL_BY_REFERENCE | |
4036 void | |
4037 bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz | |
4038 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
4039 _EXC_INFO_PARAM) { | |
4040 UINT64 y = *py; | |
4041 #if !DECIMAL_GLOBAL_ROUNDING | |
4042 unsigned int rnd_mode = *prnd_mode; | |
4043 #endif | |
4044 #else | |
4045 UINT64 | |
4046 bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z | |
4047 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
4048 _EXC_INFO_PARAM) { | |
4049 #endif | |
4050 UINT64 res1 = 0xbaddbaddbaddbaddull; | |
4051 UINT128 y1; | |
4052 | |
4053 #if DECIMAL_CALL_BY_REFERENCE | |
4054 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
4055 bid64qqq_fma (&res1, px, &y1, pz | |
4056 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
4057 _EXC_INFO_ARG); | |
4058 #else | |
4059 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
4060 res1 = bid64qqq_fma (x, y1, z | |
4061 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
4062 _EXC_INFO_ARG); | |
4063 #endif | |
4064 BID_RETURN (res1); | |
4065 } | |
4066 | |
4067 | |
4068 #if DECIMAL_CALL_BY_REFERENCE | |
4069 void | |
4070 bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz | |
4071 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
4072 _EXC_INFO_PARAM) { | |
4073 UINT64 z = *pz; | |
4074 #if !DECIMAL_GLOBAL_ROUNDING | |
4075 unsigned int rnd_mode = *prnd_mode; | |
4076 #endif | |
4077 #else | |
4078 UINT64 | |
4079 bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z | |
4080 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
4081 _EXC_INFO_PARAM) { | |
4082 #endif | |
4083 UINT64 res1 = 0xbaddbaddbaddbaddull; | |
4084 UINT128 z1; | |
4085 | |
4086 #if DECIMAL_CALL_BY_REFERENCE | |
4087 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
4088 bid64qqq_fma (&res1, px, py, &z1 | |
4089 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
4090 _EXC_INFO_ARG); | |
4091 #else | |
4092 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
4093 res1 = bid64qqq_fma (x, y, z1 | |
4094 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
4095 _EXC_INFO_ARG); | |
4096 #endif | |
4097 BID_RETURN (res1); | |
4098 } | |
4099 | |
4100 | |
4101 #if DECIMAL_CALL_BY_REFERENCE | |
4102 void | |
4103 bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz | |
4104 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
4105 _EXC_INFO_PARAM) { | |
4106 UINT128 x = *px, y = *py, z = *pz; | |
4107 #if !DECIMAL_GLOBAL_ROUNDING | |
4108 unsigned int rnd_mode = *prnd_mode; | |
4109 #endif | |
4110 #else | |
4111 UINT64 | |
4112 bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z | |
4113 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
4114 _EXC_INFO_PARAM) { | |
4115 #endif | |
4116 int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0, | |
4117 is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0; | |
4118 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0, | |
4119 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; | |
4120 int incr_exp; | |
4121 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
4122 UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} }; | |
4123 UINT64 res1 = 0xbaddbaddbaddbaddull; | |
4124 unsigned int save_fpsf; // needed because of the call to bid128_ext_fma | |
4125 UINT64 sign; | |
4126 UINT64 exp; | |
4127 int unbexp; | |
4128 UINT128 C; | |
4129 BID_UI64DOUBLE tmp; | |
4130 int nr_bits; | |
4131 int q, x0; | |
4132 int scale; | |
4133 int lt_half_ulp = 0, eq_half_ulp = 0; | |
4134 | |
4135 // Note: for rounding modes other than RN or RA, the result can be obtained | |
4136 // by rounding first to BID128 and then to BID64 | |
4137 | |
4138 save_fpsf = *pfpsf; // sticky bits - caller value must be preserved | |
4139 *pfpsf = 0; | |
4140 | |
4141 #if DECIMAL_CALL_BY_REFERENCE | |
4142 bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0, | |
4143 &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0, | |
4144 &res, &x, &y, &z | |
4145 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
4146 _EXC_INFO_ARG); | |
4147 #else | |
4148 res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0, | |
4149 &is_inexact_lt_midpoint0, | |
4150 &is_inexact_gt_midpoint0, x, y, | |
4151 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
4152 _EXC_INFO_ARG); | |
4153 #endif | |
4154 | |
4155 if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) || | |
4156 (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible | |
4157 ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN) | |
4158 ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity | |
4159 #if DECIMAL_CALL_BY_REFERENCE | |
4160 bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG); | |
4161 #else | |
4162 res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG); | |
4163 #endif | |
4164 // determine the unbiased exponent of the result | |
4165 unbexp = ((res1 >> 53) & 0x3ff) - 398; | |
4166 | |
4167 // if subnormal, res1 must have exp = -398 | |
4168 // if tiny and inexact set underflow and inexact status flags | |
4169 if (!((res1 & MASK_NAN) == MASK_NAN) && // res1 not NaN | |
4170 (unbexp == -398) | |
4171 && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull) | |
4172 && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 | |
4173 || is_midpoint_lt_even0 || is_midpoint_gt_even0)) { | |
4174 // set the inexact flag and the underflow flag | |
4175 *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION); | |
4176 } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 || | |
4177 is_midpoint_lt_even0 || is_midpoint_gt_even0) { | |
4178 // set the inexact flag and the underflow flag | |
4179 *pfpsf |= INEXACT_EXCEPTION; | |
4180 } | |
4181 | |
4182 *pfpsf |= save_fpsf; | |
4183 BID_RETURN (res1); | |
4184 } // else continue, and use rounding to nearest to round to 16 digits | |
4185 | |
4186 // at this point the result is rounded to nearest (even or away) to 34 digits | |
4187 // (or less if exact), and it is zero or finite non-zero canonical [sub]normal | |
4188 sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative | |
4189 exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits | |
4190 unbexp = (exp >> 49) - 6176; | |
4191 C.w[1] = res.w[HIGH_128W] & MASK_COEFF; | |
4192 C.w[0] = res.w[LOW_128W]; | |
4193 | |
4194 if ((C.w[1] == 0x0 && C.w[0] == 0x0) || // result is zero | |
4195 (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) { | |
4196 // clear under/overflow | |
4197 #if DECIMAL_CALL_BY_REFERENCE | |
4198 bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG); | |
4199 #else | |
4200 res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG); | |
4201 #endif | |
4202 *pfpsf |= save_fpsf; | |
4203 BID_RETURN (res1); | |
4204 } // else continue | |
4205 | |
4206 // -398 - 34 <= unbexp <= 369 + 15 | |
4207 if (rnd_mode == ROUNDING_TIES_AWAY) { | |
4208 // apply correction, if needed, to make the result rounded to nearest-even | |
4209 if (is_midpoint_gt_even) { | |
4210 // res = res - 1 | |
4211 res1--; // res1 is now even | |
4212 } // else the result is already correctly rounded to nearest-even | |
4213 } | |
4214 // at this point the result is finite, non-zero canonical normal or subnormal, | |
4215 // and in most cases overflow or underflow will not occur | |
4216 | |
4217 // determine the number of digits q in the result | |
4218 // q = nr. of decimal digits in x | |
4219 // determine first the nr. of bits in x | |
4220 if (C.w[1] == 0) { | |
4221 if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53 | |
4222 // split the 64-bit value in two 32-bit halves to avoid rounding errors | |
4223 if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32 | |
4224 tmp.d = (double) (C.w[0] >> 32); // exact conversion | |
4225 nr_bits = | |
4226 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
4227 } else { // x < 2^32 | |
4228 tmp.d = (double) (C.w[0]); // exact conversion | |
4229 nr_bits = | |
4230 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
4231 } | |
4232 } else { // if x < 2^53 | |
4233 tmp.d = (double) C.w[0]; // exact conversion | |
4234 nr_bits = | |
4235 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
4236 } | |
4237 } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1]) | |
4238 tmp.d = (double) C.w[1]; // exact conversion | |
4239 nr_bits = | |
4240 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
4241 } | |
4242 q = nr_digits[nr_bits - 1].digits; | |
4243 if (q == 0) { | |
4244 q = nr_digits[nr_bits - 1].digits1; | |
4245 if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi || | |
4246 (C.w[1] == nr_digits[nr_bits - 1].threshold_hi && | |
4247 C.w[0] >= nr_digits[nr_bits - 1].threshold_lo)) | |
4248 q++; | |
4249 } | |
4250 // if q > 16, round to nearest even to 16 digits (but for underflow it may | |
4251 // have to be truncated even more) | |
4252 if (q > 16) { | |
4253 x0 = q - 16; | |
4254 if (q <= 18) { | |
4255 round64_2_18 (q, x0, C.w[0], &res1, &incr_exp, | |
4256 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
4257 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); | |
4258 } else { // 19 <= q <= 34 | |
4259 round128_19_38 (q, x0, C, &res128, &incr_exp, | |
4260 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
4261 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); | |
4262 res1 = res128.w[0]; // the result fits in 64 bits | |
4263 } | |
4264 unbexp = unbexp + x0; | |
4265 if (incr_exp) | |
4266 unbexp++; | |
4267 q = 16; // need to set in case denormalization is necessary | |
4268 } else { | |
4269 // the result does not require a second rounding (and it must have | |
4270 // been exact in the first rounding, since q <= 16) | |
4271 res1 = C.w[0]; | |
4272 } | |
4273 | |
4274 // avoid a double rounding error | |
4275 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && | |
4276 is_midpoint_lt_even) { // double rounding error upward | |
4277 // res = res - 1 | |
4278 res1--; // res1 becomes odd | |
4279 is_midpoint_lt_even = 0; | |
4280 is_inexact_lt_midpoint = 1; | |
4281 if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1 | |
4282 res1 = 0x002386f26fc0ffffull; // 10^16 - 1 | |
4283 unbexp--; | |
4284 } | |
4285 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && | |
4286 is_midpoint_gt_even) { // double rounding error downward | |
4287 // res = res + 1 | |
4288 res1++; // res1 becomes odd (so it cannot be 10^16) | |
4289 is_midpoint_gt_even = 0; | |
4290 is_inexact_gt_midpoint = 1; | |
4291 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
4292 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { | |
4293 // if this second rounding was exact the result may still be | |
4294 // inexact because of the first rounding | |
4295 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { | |
4296 is_inexact_gt_midpoint = 1; | |
4297 } | |
4298 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { | |
4299 is_inexact_lt_midpoint = 1; | |
4300 } | |
4301 } else if (is_midpoint_gt_even && | |
4302 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { | |
4303 // pulled up to a midpoint | |
4304 is_inexact_lt_midpoint = 1; | |
4305 is_inexact_gt_midpoint = 0; | |
4306 is_midpoint_lt_even = 0; | |
4307 is_midpoint_gt_even = 0; | |
4308 } else if (is_midpoint_lt_even && | |
4309 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { | |
4310 // pulled down to a midpoint | |
4311 is_inexact_lt_midpoint = 0; | |
4312 is_inexact_gt_midpoint = 1; | |
4313 is_midpoint_lt_even = 0; | |
4314 is_midpoint_gt_even = 0; | |
4315 } else { | |
4316 ; | |
4317 } | |
4318 // this is the result rounded correctly to nearest even, with unbounded exp. | |
4319 | |
4320 // check for overflow | |
4321 if (q + unbexp > P16 + expmax16) { | |
4322 res1 = sign | 0x7800000000000000ull; | |
4323 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION); | |
4324 *pfpsf |= save_fpsf; | |
4325 BID_RETURN (res1) | |
4326 } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16 | |
4327 // not overflow; the result must be exact, and we can multiply res1 by | |
4328 // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits | |
4329 scale = unbexp - expmax16; | |
4330 res1 = res1 * ten2k64[scale]; // res1 * 10^scale | |
4331 unbexp = expmax16; // unbexp - scale | |
4332 } else { | |
4333 ; // continue | |
4334 } | |
4335 | |
4336 // check for underflow | |
4337 if (q + unbexp < P16 + expmin16) { | |
4338 if (unbexp < expmin16) { | |
4339 // we must truncate more of res | |
4340 x0 = expmin16 - unbexp; // x0 >= 1 | |
4341 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint; | |
4342 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint; | |
4343 is_midpoint_lt_even0 = is_midpoint_lt_even; | |
4344 is_midpoint_gt_even0 = is_midpoint_gt_even; | |
4345 is_inexact_lt_midpoint = 0; | |
4346 is_inexact_gt_midpoint = 0; | |
4347 is_midpoint_lt_even = 0; | |
4348 is_midpoint_gt_even = 0; | |
4349 // the number of decimal digits in res1 is q | |
4350 if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits | |
4351 // 2 <= q <= 16, 1 <= x0 <= 15 | |
4352 round64_2_18 (q, x0, res1, &res1, &incr_exp, | |
4353 &is_midpoint_lt_even, &is_midpoint_gt_even, | |
4354 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint); | |
4355 if (incr_exp) { | |
4356 // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15 | |
4357 res1 = ten2k64[q - x0]; | |
4358 } | |
4359 unbexp = unbexp + x0; // expmin16 | |
4360 } else if (x0 == q) { | |
4361 // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin | |
4362 // determine relationship with 1/2 ulp | |
4363 // q <= 16 | |
4364 if (res1 < midpoint64[q - 1]) { // < 1/2 ulp | |
4365 lt_half_ulp = 1; | |
4366 is_inexact_lt_midpoint = 1; | |
4367 } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp | |
4368 eq_half_ulp = 1; | |
4369 is_midpoint_gt_even = 1; | |
4370 } else { // > 1/2 ulp | |
4371 // gt_half_ulp = 1; | |
4372 is_inexact_gt_midpoint = 1; | |
4373 } | |
4374 if (lt_half_ulp || eq_half_ulp) { | |
4375 // res = +0.0 * 10^expmin16 | |
4376 res1 = 0x0000000000000000ull; | |
4377 } else { // if (gt_half_ulp) | |
4378 // res = +1 * 10^expmin16 | |
4379 res1 = 0x0000000000000001ull; | |
4380 } | |
4381 unbexp = expmin16; | |
4382 } else { // if (x0 > q) | |
4383 // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin | |
4384 res1 = 0x0000000000000000ull; | |
4385 unbexp = expmin16; | |
4386 is_inexact_lt_midpoint = 1; | |
4387 } | |
4388 // avoid a double rounding error | |
4389 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) && | |
4390 is_midpoint_lt_even) { // double rounding error upward | |
4391 // res = res - 1 | |
4392 res1--; // res1 becomes odd | |
4393 is_midpoint_lt_even = 0; | |
4394 is_inexact_lt_midpoint = 1; | |
4395 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) && | |
4396 is_midpoint_gt_even) { // double rounding error downward | |
4397 // res = res + 1 | |
4398 res1++; // res1 becomes odd | |
4399 is_midpoint_gt_even = 0; | |
4400 is_inexact_gt_midpoint = 1; | |
4401 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even && | |
4402 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) { | |
4403 // if this rounding was exact the result may still be | |
4404 // inexact because of the previous roundings | |
4405 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) { | |
4406 is_inexact_gt_midpoint = 1; | |
4407 } | |
4408 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) { | |
4409 is_inexact_lt_midpoint = 1; | |
4410 } | |
4411 } else if (is_midpoint_gt_even && | |
4412 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) { | |
4413 // pulled up to a midpoint | |
4414 is_inexact_lt_midpoint = 1; | |
4415 is_inexact_gt_midpoint = 0; | |
4416 is_midpoint_lt_even = 0; | |
4417 is_midpoint_gt_even = 0; | |
4418 } else if (is_midpoint_lt_even && | |
4419 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) { | |
4420 // pulled down to a midpoint | |
4421 is_inexact_lt_midpoint = 0; | |
4422 is_inexact_gt_midpoint = 1; | |
4423 is_midpoint_lt_even = 0; | |
4424 is_midpoint_gt_even = 0; | |
4425 } else { | |
4426 ; | |
4427 } | |
4428 } | |
4429 // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16) | |
4430 // and the result is tiny and exact | |
4431 | |
4432 // check for inexact result | |
4433 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || | |
4434 is_midpoint_lt_even || is_midpoint_gt_even || | |
4435 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 || | |
4436 is_midpoint_lt_even0 || is_midpoint_gt_even0) { | |
4437 // set the inexact flag and the underflow flag | |
4438 *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION); | |
4439 } | |
4440 } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint || | |
4441 is_midpoint_lt_even || is_midpoint_gt_even) { | |
4442 *pfpsf |= INEXACT_EXCEPTION; | |
4443 } | |
4444 // this is the result rounded correctly to nearest, with bounded exponent | |
4445 | |
4446 if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction | |
4447 // res = res + 1 | |
4448 res1++; // res1 is now odd | |
4449 } // else the result is already correct | |
4450 | |
4451 // assemble the result | |
4452 if (res1 < 0x0020000000000000ull) { // res < 2^53 | |
4453 res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1; | |
4454 } else { // res1 >= 2^53 | |
4455 res1 = sign | MASK_STEERING_BITS | | |
4456 ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2); | |
4457 } | |
4458 *pfpsf |= save_fpsf; | |
4459 BID_RETURN (res1); | |
4460 } |