comparison libgcc/config/libbid/bid128_fma.c @ 0:a06113de4d67

first commit
author kent <kent@cr.ie.u-ryukyu.ac.jp>
date Fri, 17 Jul 2009 14:47:48 +0900
parents
children 04ced10e8804
comparison
equal deleted inserted replaced
-1:000000000000 0:a06113de4d67
1 /* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
9
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 /*****************************************************************************
25 *
26 * BID128 fma x * y + z
27 *
28 ****************************************************************************/
29
30 #include "bid_internal.h"
31
32 static void
33 rounding_correction (unsigned int rnd_mode,
34 unsigned int is_inexact_lt_midpoint,
35 unsigned int is_inexact_gt_midpoint,
36 unsigned int is_midpoint_lt_even,
37 unsigned int is_midpoint_gt_even,
38 int unbexp,
39 UINT128 * ptrres, _IDEC_flags * ptrfpsf) {
40 // unbiased true exponent unbexp may be larger than emax
41
42 UINT128 res = *ptrres; // expected to have the correct sign and coefficient
43 // (the exponent field is ignored, as unbexp is used instead)
44 UINT64 sign, exp;
45 UINT64 C_hi, C_lo;
46
47 // general correction from RN to RA, RM, RP, RZ
48 // Note: if the result is negative, then is_inexact_lt_midpoint,
49 // is_inexact_gt_midpoint, is_midpoint_lt_even, and is_midpoint_gt_even
50 // have to be considered as if determined for the absolute value of the
51 // result (so they seem to be reversed)
52
53 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
54 is_midpoint_lt_even || is_midpoint_gt_even) {
55 *ptrfpsf |= INEXACT_EXCEPTION;
56 }
57 // apply correction to result calculated with unbounded exponent
58 sign = res.w[1] & MASK_SIGN;
59 exp = (UINT64) (unbexp + 6176) << 49; // valid only if expmin<=unbexp<=expmax
60 C_hi = res.w[1] & MASK_COEFF;
61 C_lo = res.w[0];
62 if ((!sign && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) ||
63 ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_UP) &&
64 is_midpoint_gt_even))) ||
65 (sign && ((rnd_mode == ROUNDING_DOWN && is_inexact_lt_midpoint) ||
66 ((rnd_mode == ROUNDING_TIES_AWAY || rnd_mode == ROUNDING_DOWN) &&
67 is_midpoint_gt_even)))) {
68 // C = C + 1
69 C_lo = C_lo + 1;
70 if (C_lo == 0)
71 C_hi = C_hi + 1;
72 if (C_hi == 0x0001ed09bead87c0ull && C_lo == 0x378d8e6400000000ull) {
73 // C = 10^34 => rounding overflow
74 C_hi = 0x0000314dc6448d93ull;
75 C_lo = 0x38c15b0a00000000ull; // 10^33
76 // exp = exp + EXP_P1;
77 unbexp = unbexp + 1;
78 exp = (UINT64) (unbexp + 6176) << 49;
79 }
80 } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) &&
81 ((sign && (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TO_ZERO)) ||
82 (!sign && (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TO_ZERO)))) {
83 // C = C - 1
84 C_lo = C_lo - 1;
85 if (C_lo == 0xffffffffffffffffull)
86 C_hi--;
87 // check if we crossed into the lower decade
88 if (C_hi == 0x0000314dc6448d93ull && C_lo == 0x38c15b09ffffffffull) {
89 // C = 10^33 - 1
90 if (exp > 0) {
91 C_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
92 C_lo = 0x378d8e63ffffffffull;
93 // exp = exp - EXP_P1;
94 unbexp = unbexp - 1;
95 exp = (UINT64) (unbexp + 6176) << 49;
96 } else { // if exp = 0
97 if (is_midpoint_lt_even || is_midpoint_lt_even ||
98 is_inexact_gt_midpoint || is_inexact_gt_midpoint) // tiny & inexact
99 *ptrfpsf |= UNDERFLOW_EXCEPTION;
100 }
101 }
102 } else {
103 ; // the result is already correct
104 }
105 if (unbexp > expmax) { // 6111
106 *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
107 exp = 0;
108 if (!sign) { // result is positive
109 if (rnd_mode == ROUNDING_UP || rnd_mode == ROUNDING_TIES_AWAY) { // +inf
110 C_hi = 0x7800000000000000ull;
111 C_lo = 0x0000000000000000ull;
112 } else { // res = +MAXFP = (10^34-1) * 10^emax
113 C_hi = 0x5fffed09bead87c0ull;
114 C_lo = 0x378d8e63ffffffffull;
115 }
116 } else { // result is negative
117 if (rnd_mode == ROUNDING_DOWN || rnd_mode == ROUNDING_TIES_AWAY) { // -inf
118 C_hi = 0xf800000000000000ull;
119 C_lo = 0x0000000000000000ull;
120 } else { // res = -MAXFP = -(10^34-1) * 10^emax
121 C_hi = 0xdfffed09bead87c0ull;
122 C_lo = 0x378d8e63ffffffffull;
123 }
124 }
125 }
126 // assemble the result
127 res.w[1] = sign | exp | C_hi;
128 res.w[0] = C_lo;
129 *ptrres = res;
130 }
131
132 static void
133 add256 (UINT256 x, UINT256 y, UINT256 * pz) {
134 // *z = x + yl assume the sum fits in 256 bits
135 UINT256 z;
136 z.w[0] = x.w[0] + y.w[0];
137 if (z.w[0] < x.w[0]) {
138 x.w[1]++;
139 if (x.w[1] == 0x0000000000000000ull) {
140 x.w[2]++;
141 if (x.w[2] == 0x0000000000000000ull) {
142 x.w[3]++;
143 }
144 }
145 }
146 z.w[1] = x.w[1] + y.w[1];
147 if (z.w[1] < x.w[1]) {
148 x.w[2]++;
149 if (x.w[2] == 0x0000000000000000ull) {
150 x.w[3]++;
151 }
152 }
153 z.w[2] = x.w[2] + y.w[2];
154 if (z.w[2] < x.w[2]) {
155 x.w[3]++;
156 }
157 z.w[3] = x.w[3] + y.w[3]; // it was assumed that no carry is possible
158 *pz = z;
159 }
160
161 static void
162 sub256 (UINT256 x, UINT256 y, UINT256 * pz) {
163 // *z = x - y; assume x >= y
164 UINT256 z;
165 z.w[0] = x.w[0] - y.w[0];
166 if (z.w[0] > x.w[0]) {
167 x.w[1]--;
168 if (x.w[1] == 0xffffffffffffffffull) {
169 x.w[2]--;
170 if (x.w[2] == 0xffffffffffffffffull) {
171 x.w[3]--;
172 }
173 }
174 }
175 z.w[1] = x.w[1] - y.w[1];
176 if (z.w[1] > x.w[1]) {
177 x.w[2]--;
178 if (x.w[2] == 0xffffffffffffffffull) {
179 x.w[3]--;
180 }
181 }
182 z.w[2] = x.w[2] - y.w[2];
183 if (z.w[2] > x.w[2]) {
184 x.w[3]--;
185 }
186 z.w[3] = x.w[3] - y.w[3]; // no borrow possible, because x >= y
187 *pz = z;
188 }
189
190
191 static int
192 nr_digits256 (UINT256 R256) {
193 int ind;
194 // determine the number of decimal digits in R256
195 if (R256.w[3] == 0x0 && R256.w[2] == 0x0 && R256.w[1] == 0x0) {
196 // between 1 and 19 digits
197 for (ind = 1; ind <= 19; ind++) {
198 if (R256.w[0] < ten2k64[ind]) {
199 break;
200 }
201 }
202 // ind digits
203 } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0 &&
204 (R256.w[1] < ten2k128[0].w[1] ||
205 (R256.w[1] == ten2k128[0].w[1]
206 && R256.w[0] < ten2k128[0].w[0]))) {
207 // 20 digits
208 ind = 20;
209 } else if (R256.w[3] == 0x0 && R256.w[2] == 0x0) {
210 // between 21 and 38 digits
211 for (ind = 1; ind <= 18; ind++) {
212 if (R256.w[1] < ten2k128[ind].w[1] ||
213 (R256.w[1] == ten2k128[ind].w[1] &&
214 R256.w[0] < ten2k128[ind].w[0])) {
215 break;
216 }
217 }
218 // ind + 20 digits
219 ind = ind + 20;
220 } else if (R256.w[3] == 0x0 &&
221 (R256.w[2] < ten2k256[0].w[2] ||
222 (R256.w[2] == ten2k256[0].w[2] &&
223 R256.w[1] < ten2k256[0].w[1]) ||
224 (R256.w[2] == ten2k256[0].w[2] &&
225 R256.w[1] == ten2k256[0].w[1] &&
226 R256.w[0] < ten2k256[0].w[0]))) {
227 // 39 digits
228 ind = 39;
229 } else {
230 // between 40 and 68 digits
231 for (ind = 1; ind <= 29; ind++) {
232 if (R256.w[3] < ten2k256[ind].w[3] ||
233 (R256.w[3] == ten2k256[ind].w[3] &&
234 R256.w[2] < ten2k256[ind].w[2]) ||
235 (R256.w[3] == ten2k256[ind].w[3] &&
236 R256.w[2] == ten2k256[ind].w[2] &&
237 R256.w[1] < ten2k256[ind].w[1]) ||
238 (R256.w[3] == ten2k256[ind].w[3] &&
239 R256.w[2] == ten2k256[ind].w[2] &&
240 R256.w[1] == ten2k256[ind].w[1] &&
241 R256.w[0] < ten2k256[ind].w[0])) {
242 break;
243 }
244 }
245 // ind + 39 digits
246 ind = ind + 39;
247 }
248 return (ind);
249 }
250
251 // add/subtract C4 and C3 * 10^scale; this may follow a previous rounding, so
252 // use the rounding information from ptr_is_* to avoid a double rounding error
253 static void
254 add_and_round (int q3,
255 int q4,
256 int e4,
257 int delta,
258 int p34,
259 UINT64 z_sign,
260 UINT64 p_sign,
261 UINT128 C3,
262 UINT256 C4,
263 int rnd_mode,
264 int *ptr_is_midpoint_lt_even,
265 int *ptr_is_midpoint_gt_even,
266 int *ptr_is_inexact_lt_midpoint,
267 int *ptr_is_inexact_gt_midpoint,
268 _IDEC_flags * ptrfpsf, UINT128 * ptrres) {
269
270 int scale;
271 int x0;
272 int ind;
273 UINT64 R64;
274 UINT128 P128, R128;
275 UINT192 P192, R192;
276 UINT256 R256;
277 int is_midpoint_lt_even = 0;
278 int is_midpoint_gt_even = 0;
279 int is_inexact_lt_midpoint = 0;
280 int is_inexact_gt_midpoint = 0;
281 int is_midpoint_lt_even0 = 0;
282 int is_midpoint_gt_even0 = 0;
283 int is_inexact_lt_midpoint0 = 0;
284 int is_inexact_gt_midpoint0 = 0;
285 int incr_exp = 0;
286 int is_tiny = 0;
287 int lt_half_ulp = 0;
288 int eq_half_ulp = 0;
289 // int gt_half_ulp = 0;
290 UINT128 res = *ptrres;
291
292 // scale C3 up by 10^(q4-delta-q3), 0 <= q4-delta-q3 <= 2*P34-2 = 66
293 scale = q4 - delta - q3; // 0 <= scale <= 66 (or 0 <= scale <= 68 if this
294 // comes from Cases (2), (3), (4), (5), (6), with 0 <= |delta| <= 1
295
296 // calculate C3 * 10^scale in R256 (it has at most 67 decimal digits for
297 // Cases (15),(16),(17) and at most 69 for Cases (2),(3),(4),(5),(6))
298 if (scale == 0) {
299 R256.w[3] = 0x0ull;
300 R256.w[2] = 0x0ull;
301 R256.w[1] = C3.w[1];
302 R256.w[0] = C3.w[0];
303 } else if (scale <= 19) { // 10^scale fits in 64 bits
304 P128.w[1] = 0;
305 P128.w[0] = ten2k64[scale];
306 __mul_128x128_to_256 (R256, P128, C3);
307 } else if (scale <= 38) { // 10^scale fits in 128 bits
308 __mul_128x128_to_256 (R256, ten2k128[scale - 20], C3);
309 } else if (scale <= 57) { // 39 <= scale <= 57
310 // 10^scale fits in 192 bits but C3 * 10^scale fits in 223 or 230 bits
311 // (10^67 has 223 bits; 10^69 has 230 bits);
312 // must split the computation:
313 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
314 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
315 // Note that 1 <= scale - 38 <= 19 => 10^(scale-38) fits in 64 bits
316 __mul_64x128_to_128 (R128, ten2k64[scale - 38], C3);
317 // now multiply R128 by 10^38
318 __mul_128x128_to_256 (R256, R128, ten2k128[18]);
319 } else { // 58 <= scale <= 66
320 // 10^scale takes between 193 and 220 bits,
321 // and C3 * 10^scale fits in 223 bits (10^67/10^69 has 223/230 bits)
322 // must split the computation:
323 // 10^scale * C3 = 10*38 * 10^(scale-38) * C3 where 10^38 takes 127
324 // bits and so 10^(scale-38) * C3 fits in 128 bits with certainty
325 // Note that 20 <= scale - 38 <= 30 => 10^(scale-38) fits in 128 bits
326 // Calculate first 10^(scale-38) * C3, which fits in 128 bits; because
327 // 10^(scale-38) takes more than 64 bits, C3 will take less than 64
328 __mul_64x128_to_128 (R128, C3.w[0], ten2k128[scale - 58]);
329 // now calculate 10*38 * 10^(scale-38) * C3
330 __mul_128x128_to_256 (R256, R128, ten2k128[18]);
331 }
332 // C3 * 10^scale is now in R256
333
334 // for Cases (15), (16), (17) C4 > C3 * 10^scale because C4 has at least
335 // one extra digit; for Cases (2), (3), (4), (5), or (6) any order is
336 // possible
337 // add/subtract C4 and C3 * 10^scale; the exponent is e4
338 if (p_sign == z_sign) { // R256 = C4 + R256
339 // calculate R256 = C4 + C3 * 10^scale = C4 + R256 which is exact,
340 // but may require rounding
341 add256 (C4, R256, &R256);
342 } else { // if (p_sign != z_sign) { // R256 = C4 - R256
343 // calculate R256 = C4 - C3 * 10^scale = C4 - R256 or
344 // R256 = C3 * 10^scale - C4 = R256 - C4 which is exact,
345 // but may require rounding
346
347 // compare first R256 = C3 * 10^scale and C4
348 if (R256.w[3] > C4.w[3] || (R256.w[3] == C4.w[3] && R256.w[2] > C4.w[2]) ||
349 (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] > C4.w[1]) ||
350 (R256.w[3] == C4.w[3] && R256.w[2] == C4.w[2] && R256.w[1] == C4.w[1] &&
351 R256.w[0] >= C4.w[0])) { // C3 * 10^scale >= C4
352 // calculate R256 = C3 * 10^scale - C4 = R256 - C4, which is exact,
353 // but may require rounding
354 sub256 (R256, C4, &R256);
355 // flip p_sign too, because the result has the sign of z
356 p_sign = z_sign;
357 } else { // if C4 > C3 * 10^scale
358 // calculate R256 = C4 - C3 * 10^scale = C4 - R256, which is exact,
359 // but may require rounding
360 sub256 (C4, R256, &R256);
361 }
362 // if the result is pure zero, the sign depends on the rounding mode
363 // (x*y and z had opposite signs)
364 if (R256.w[3] == 0x0ull && R256.w[2] == 0x0ull &&
365 R256.w[1] == 0x0ull && R256.w[0] == 0x0ull) {
366 if (rnd_mode != ROUNDING_DOWN)
367 p_sign = 0x0000000000000000ull;
368 else
369 p_sign = 0x8000000000000000ull;
370 // the exponent is max (e4, expmin)
371 if (e4 < -6176)
372 e4 = expmin;
373 // assemble result
374 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49);
375 res.w[0] = 0x0;
376 *ptrres = res;
377 return;
378 }
379 }
380
381 // determine the number of decimal digits in R256
382 ind = nr_digits256 (R256);
383
384 // the exact result is (-1)^p_sign * R256 * 10^e4 where q (R256) = ind;
385 // round to the destination precision, with unbounded exponent
386
387 if (ind <= p34) {
388 // result rounded to the destination precision with unbounded exponent
389 // is exact
390 if (ind + e4 < p34 + expmin) {
391 is_tiny = 1; // applies to all rounding modes
392 }
393 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R256.w[1];
394 res.w[0] = R256.w[0];
395 // Note: res is correct only if expmin <= e4 <= expmax
396 } else { // if (ind > p34)
397 // if more than P digits, round to nearest to P digits
398 // round R256 to p34 digits
399 x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
400 if (ind <= 38) {
401 P128.w[1] = R256.w[1];
402 P128.w[0] = R256.w[0];
403 round128_19_38 (ind, x0, P128, &R128, &incr_exp,
404 &is_midpoint_lt_even, &is_midpoint_gt_even,
405 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
406 } else if (ind <= 57) {
407 P192.w[2] = R256.w[2];
408 P192.w[1] = R256.w[1];
409 P192.w[0] = R256.w[0];
410 round192_39_57 (ind, x0, P192, &R192, &incr_exp,
411 &is_midpoint_lt_even, &is_midpoint_gt_even,
412 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
413 R128.w[1] = R192.w[1];
414 R128.w[0] = R192.w[0];
415 } else { // if (ind <= 68)
416 round256_58_76 (ind, x0, R256, &R256, &incr_exp,
417 &is_midpoint_lt_even, &is_midpoint_gt_even,
418 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
419 R128.w[1] = R256.w[1];
420 R128.w[0] = R256.w[0];
421 }
422 // the rounded result has p34 = 34 digits
423 e4 = e4 + x0 + incr_exp;
424 if (rnd_mode == ROUNDING_TO_NEAREST) {
425 if (e4 < expmin) {
426 is_tiny = 1; // for other rounding modes apply correction
427 }
428 } else {
429 // for RM, RP, RZ, RA apply correction in order to determine tininess
430 // but do not save the result; apply the correction to
431 // (-1)^p_sign * significand * 10^0
432 P128.w[1] = p_sign | 0x3040000000000000ull | R128.w[1];
433 P128.w[0] = R128.w[0];
434 rounding_correction (rnd_mode,
435 is_inexact_lt_midpoint,
436 is_inexact_gt_midpoint, is_midpoint_lt_even,
437 is_midpoint_gt_even, 0, &P128, ptrfpsf);
438 scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
439 // the number of digits in the significand is p34 = 34
440 if (e4 + scale < expmin) {
441 is_tiny = 1;
442 }
443 }
444 ind = p34; // the number of decimal digits in the signifcand of res
445 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | R128.w[1]; // RN
446 res.w[0] = R128.w[0];
447 // Note: res is correct only if expmin <= e4 <= expmax
448 // set the inexact flag after rounding with bounded exponent, if any
449 }
450 // at this point we have the result rounded with unbounded exponent in
451 // res and we know its tininess:
452 // res = (-1)^p_sign * significand * 10^e4,
453 // where q (significand) = ind <= p34
454 // Note: res is correct only if expmin <= e4 <= expmax
455
456 // check for overflow if RN
457 if (rnd_mode == ROUNDING_TO_NEAREST && (ind + e4) > (p34 + expmax)) {
458 res.w[1] = p_sign | 0x7800000000000000ull;
459 res.w[0] = 0x0000000000000000ull;
460 *ptrres = res;
461 *ptrfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
462 return; // BID_RETURN (res)
463 } // else not overflow or not RN, so continue
464
465 // if (e4 >= expmin) we have the result rounded with bounded exponent
466 if (e4 < expmin) {
467 x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
468 // where the result rounded [at most] once is
469 // (-1)^p_sign * significand_res * 10^e4
470
471 // avoid double rounding error
472 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
473 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
474 is_midpoint_lt_even0 = is_midpoint_lt_even;
475 is_midpoint_gt_even0 = is_midpoint_gt_even;
476 is_inexact_lt_midpoint = 0;
477 is_inexact_gt_midpoint = 0;
478 is_midpoint_lt_even = 0;
479 is_midpoint_gt_even = 0;
480
481 if (x0 > ind) {
482 // nothing is left of res when moving the decimal point left x0 digits
483 is_inexact_lt_midpoint = 1;
484 res.w[1] = p_sign | 0x0000000000000000ull;
485 res.w[0] = 0x0000000000000000ull;
486 e4 = expmin;
487 } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
488 // this is <, =, or > 1/2 ulp
489 // compare the ind-digit value in the significand of res with
490 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
491 // less than, equal to, or greater than 1/2 ulp (significand of res)
492 R128.w[1] = res.w[1] & MASK_COEFF;
493 R128.w[0] = res.w[0];
494 if (ind <= 19) {
495 if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
496 lt_half_ulp = 1;
497 is_inexact_lt_midpoint = 1;
498 } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
499 eq_half_ulp = 1;
500 is_midpoint_gt_even = 1;
501 } else { // > 1/2 ulp
502 // gt_half_ulp = 1;
503 is_inexact_gt_midpoint = 1;
504 }
505 } else { // if (ind <= 38) {
506 if (R128.w[1] < midpoint128[ind - 20].w[1] ||
507 (R128.w[1] == midpoint128[ind - 20].w[1] &&
508 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
509 lt_half_ulp = 1;
510 is_inexact_lt_midpoint = 1;
511 } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
512 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
513 eq_half_ulp = 1;
514 is_midpoint_gt_even = 1;
515 } else { // > 1/2 ulp
516 // gt_half_ulp = 1;
517 is_inexact_gt_midpoint = 1;
518 }
519 }
520 if (lt_half_ulp || eq_half_ulp) {
521 // res = +0.0 * 10^expmin
522 res.w[1] = 0x0000000000000000ull;
523 res.w[0] = 0x0000000000000000ull;
524 } else { // if (gt_half_ulp)
525 // res = +1 * 10^expmin
526 res.w[1] = 0x0000000000000000ull;
527 res.w[0] = 0x0000000000000001ull;
528 }
529 res.w[1] = p_sign | res.w[1];
530 e4 = expmin;
531 } else { // if (1 <= x0 <= ind - 1 <= 33)
532 // round the ind-digit result to ind - x0 digits
533
534 if (ind <= 18) { // 2 <= ind <= 18
535 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
536 &is_midpoint_lt_even, &is_midpoint_gt_even,
537 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
538 res.w[1] = 0x0;
539 res.w[0] = R64;
540 } else if (ind <= 38) {
541 P128.w[1] = res.w[1] & MASK_COEFF;
542 P128.w[0] = res.w[0];
543 round128_19_38 (ind, x0, P128, &res, &incr_exp,
544 &is_midpoint_lt_even, &is_midpoint_gt_even,
545 &is_inexact_lt_midpoint,
546 &is_inexact_gt_midpoint);
547 }
548 e4 = e4 + x0; // expmin
549 // we want the exponent to be expmin, so if incr_exp = 1 then
550 // multiply the rounded result by 10 - it will still fit in 113 bits
551 if (incr_exp) {
552 // 64 x 128 -> 128
553 P128.w[1] = res.w[1] & MASK_COEFF;
554 P128.w[0] = res.w[0];
555 __mul_64x128_to_128 (res, ten2k64[1], P128);
556 }
557 res.w[1] =
558 p_sign | ((UINT64) (e4 + 6176) << 49) | (res.w[1] & MASK_COEFF);
559 // avoid a double rounding error
560 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
561 is_midpoint_lt_even) { // double rounding error upward
562 // res = res - 1
563 res.w[0]--;
564 if (res.w[0] == 0xffffffffffffffffull)
565 res.w[1]--;
566 // Note: a double rounding error upward is not possible; for this
567 // the result after the first rounding would have to be 99...95
568 // (35 digits in all), possibly followed by a number of zeros; this
569 // is not possible in Cases (2)-(6) or (15)-(17) which may get here
570 is_midpoint_lt_even = 0;
571 is_inexact_lt_midpoint = 1;
572 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
573 is_midpoint_gt_even) { // double rounding error downward
574 // res = res + 1
575 res.w[0]++;
576 if (res.w[0] == 0)
577 res.w[1]++;
578 is_midpoint_gt_even = 0;
579 is_inexact_gt_midpoint = 1;
580 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
581 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
582 // if this second rounding was exact the result may still be
583 // inexact because of the first rounding
584 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
585 is_inexact_gt_midpoint = 1;
586 }
587 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
588 is_inexact_lt_midpoint = 1;
589 }
590 } else if (is_midpoint_gt_even &&
591 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
592 // pulled up to a midpoint
593 is_inexact_lt_midpoint = 1;
594 is_inexact_gt_midpoint = 0;
595 is_midpoint_lt_even = 0;
596 is_midpoint_gt_even = 0;
597 } else if (is_midpoint_lt_even &&
598 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
599 // pulled down to a midpoint
600 is_inexact_lt_midpoint = 0;
601 is_inexact_gt_midpoint = 1;
602 is_midpoint_lt_even = 0;
603 is_midpoint_gt_even = 0;
604 } else {
605 ;
606 }
607 }
608 }
609 // res contains the correct result
610 // apply correction if not rounding to nearest
611 if (rnd_mode != ROUNDING_TO_NEAREST) {
612 rounding_correction (rnd_mode,
613 is_inexact_lt_midpoint, is_inexact_gt_midpoint,
614 is_midpoint_lt_even, is_midpoint_gt_even,
615 e4, &res, ptrfpsf);
616 }
617 if (is_midpoint_lt_even || is_midpoint_gt_even ||
618 is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
619 // set the inexact flag
620 *ptrfpsf |= INEXACT_EXCEPTION;
621 if (is_tiny)
622 *ptrfpsf |= UNDERFLOW_EXCEPTION;
623 }
624
625 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
626 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
627 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
628 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
629 *ptrres = res;
630 return;
631 }
632
633
634 #if DECIMAL_CALL_BY_REFERENCE
635 static void
636 bid128_ext_fma (int *ptr_is_midpoint_lt_even,
637 int *ptr_is_midpoint_gt_even,
638 int *ptr_is_inexact_lt_midpoint,
639 int *ptr_is_inexact_gt_midpoint, UINT128 * pres,
640 UINT128 * px, UINT128 * py,
641 UINT128 *
642 pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
643 _EXC_INFO_PARAM) {
644 UINT128 x = *px, y = *py, z = *pz;
645 #if !DECIMAL_GLOBAL_ROUNDING
646 unsigned int rnd_mode = *prnd_mode;
647 #endif
648 #else
649 static UINT128
650 bid128_ext_fma (int *ptr_is_midpoint_lt_even,
651 int *ptr_is_midpoint_gt_even,
652 int *ptr_is_inexact_lt_midpoint,
653 int *ptr_is_inexact_gt_midpoint, UINT128 x, UINT128 y,
654 UINT128 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
655 _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
656 #endif
657
658 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
659 UINT64 x_sign, y_sign, z_sign, p_sign, tmp_sign;
660 UINT64 x_exp = 0, y_exp = 0, z_exp = 0, p_exp;
661 int true_p_exp;
662 UINT128 C1, C2, C3;
663 UINT256 C4;
664 int q1 = 0, q2 = 0, q3 = 0, q4;
665 int e1, e2, e3, e4;
666 int scale, ind, delta, x0;
667 int p34 = P34; // used to modify the limit on the number of digits
668 BID_UI64DOUBLE tmp;
669 int x_nr_bits, y_nr_bits, z_nr_bits;
670 unsigned int save_fpsf;
671 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
672 int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
673 int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0;
674 int is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
675 int incr_exp = 0;
676 int lsb;
677 int lt_half_ulp = 0;
678 int eq_half_ulp = 0;
679 int gt_half_ulp = 0;
680 int is_tiny = 0;
681 UINT64 R64, tmp64;
682 UINT128 P128, R128;
683 UINT192 P192, R192;
684 UINT256 R256;
685
686 // the following are based on the table of special cases for fma; the NaN
687 // behavior is similar to that of the IA-64 Architecture fma
688
689 // identify cases where at least one operand is NaN
690
691 BID_SWAP128 (x);
692 BID_SWAP128 (y);
693 BID_SWAP128 (z);
694 if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN
695 // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
696 // check first for non-canonical NaN payload
697 if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
698 (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
699 (y.w[0] > 0x38c15b09ffffffffull))) {
700 y.w[1] = y.w[1] & 0xffffc00000000000ull;
701 y.w[0] = 0x0ull;
702 }
703 if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN
704 // set invalid flag
705 *pfpsf |= INVALID_EXCEPTION;
706 // return quiet (y)
707 res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
708 res.w[0] = y.w[0];
709 } else { // y is QNaN
710 // return y
711 res.w[1] = y.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
712 res.w[0] = y.w[0];
713 // if z = SNaN or x = SNaN signal invalid exception
714 if ((z.w[1] & MASK_SNAN) == MASK_SNAN ||
715 (x.w[1] & MASK_SNAN) == MASK_SNAN) {
716 // set invalid flag
717 *pfpsf |= INVALID_EXCEPTION;
718 }
719 }
720 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
721 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
722 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
723 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
724 BID_SWAP128 (res);
725 BID_RETURN (res)
726 } else if ((z.w[1] & MASK_NAN) == MASK_NAN) { // z is NAN
727 // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
728 // check first for non-canonical NaN payload
729 if (((z.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
730 (((z.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
731 (z.w[0] > 0x38c15b09ffffffffull))) {
732 z.w[1] = z.w[1] & 0xffffc00000000000ull;
733 z.w[0] = 0x0ull;
734 }
735 if ((z.w[1] & MASK_SNAN) == MASK_SNAN) { // z is SNAN
736 // set invalid flag
737 *pfpsf |= INVALID_EXCEPTION;
738 // return quiet (z)
739 res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
740 res.w[0] = z.w[0];
741 } else { // z is QNaN
742 // return z
743 res.w[1] = z.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
744 res.w[0] = z.w[0];
745 // if x = SNaN signal invalid exception
746 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) {
747 // set invalid flag
748 *pfpsf |= INVALID_EXCEPTION;
749 }
750 }
751 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
752 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
753 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
754 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
755 BID_SWAP128 (res);
756 BID_RETURN (res)
757 } else if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
758 // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
759 // check first for non-canonical NaN payload
760 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) ||
761 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) &&
762 (x.w[0] > 0x38c15b09ffffffffull))) {
763 x.w[1] = x.w[1] & 0xffffc00000000000ull;
764 x.w[0] = 0x0ull;
765 }
766 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
767 // set invalid flag
768 *pfpsf |= INVALID_EXCEPTION;
769 // return quiet (x)
770 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out also G[6]-G[16]
771 res.w[0] = x.w[0];
772 } else { // x is QNaN
773 // return x
774 res.w[1] = x.w[1] & 0xfc003fffffffffffull; // clear out G[6]-G[16]
775 res.w[0] = x.w[0];
776 }
777 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
778 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
779 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
780 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
781 BID_SWAP128 (res);
782 BID_RETURN (res)
783 }
784 // x, y, z are 0, f, or inf but not NaN => unpack the arguments and check
785 // for non-canonical values
786
787 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
788 C1.w[1] = x.w[1] & MASK_COEFF;
789 C1.w[0] = x.w[0];
790 if ((x.w[1] & MASK_ANY_INF) != MASK_INF) { // x != inf
791 // if x is not infinity check for non-canonical values - treated as zero
792 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
793 // non-canonical
794 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
795 C1.w[1] = 0; // significand high
796 C1.w[0] = 0; // significand low
797 } else { // G0_G1 != 11
798 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
799 if (C1.w[1] > 0x0001ed09bead87c0ull ||
800 (C1.w[1] == 0x0001ed09bead87c0ull &&
801 C1.w[0] > 0x378d8e63ffffffffull)) {
802 // x is non-canonical if coefficient is larger than 10^34 -1
803 C1.w[1] = 0;
804 C1.w[0] = 0;
805 } else { // canonical
806 ;
807 }
808 }
809 }
810 y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
811 C2.w[1] = y.w[1] & MASK_COEFF;
812 C2.w[0] = y.w[0];
813 if ((y.w[1] & MASK_ANY_INF) != MASK_INF) { // y != inf
814 // if y is not infinity check for non-canonical values - treated as zero
815 if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
816 // non-canonical
817 y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
818 C2.w[1] = 0; // significand high
819 C2.w[0] = 0; // significand low
820 } else { // G0_G1 != 11
821 y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits
822 if (C2.w[1] > 0x0001ed09bead87c0ull ||
823 (C2.w[1] == 0x0001ed09bead87c0ull &&
824 C2.w[0] > 0x378d8e63ffffffffull)) {
825 // y is non-canonical if coefficient is larger than 10^34 -1
826 C2.w[1] = 0;
827 C2.w[0] = 0;
828 } else { // canonical
829 ;
830 }
831 }
832 }
833 z_sign = z.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
834 C3.w[1] = z.w[1] & MASK_COEFF;
835 C3.w[0] = z.w[0];
836 if ((z.w[1] & MASK_ANY_INF) != MASK_INF) { // z != inf
837 // if z is not infinity check for non-canonical values - treated as zero
838 if ((z.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { // G0_G1=11
839 // non-canonical
840 z_exp = (z.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
841 C3.w[1] = 0; // significand high
842 C3.w[0] = 0; // significand low
843 } else { // G0_G1 != 11
844 z_exp = z.w[1] & MASK_EXP; // biased and shifted left 49 bits
845 if (C3.w[1] > 0x0001ed09bead87c0ull ||
846 (C3.w[1] == 0x0001ed09bead87c0ull &&
847 C3.w[0] > 0x378d8e63ffffffffull)) {
848 // z is non-canonical if coefficient is larger than 10^34 -1
849 C3.w[1] = 0;
850 C3.w[0] = 0;
851 } else { // canonical
852 ;
853 }
854 }
855 }
856
857 p_sign = x_sign ^ y_sign; // sign of the product
858
859 // identify cases where at least one operand is infinity
860
861 if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x = inf
862 if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
863 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
864 if (p_sign == z_sign) {
865 res.w[1] = z_sign | MASK_INF;
866 res.w[0] = 0x0;
867 } else {
868 // return QNaN Indefinite
869 res.w[1] = 0x7c00000000000000ull;
870 res.w[0] = 0x0000000000000000ull;
871 // set invalid flag
872 *pfpsf |= INVALID_EXCEPTION;
873 }
874 } else { // z = 0 or z = f
875 res.w[1] = p_sign | MASK_INF;
876 res.w[0] = 0x0;
877 }
878 } else if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f
879 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
880 if (p_sign == z_sign) {
881 res.w[1] = z_sign | MASK_INF;
882 res.w[0] = 0x0;
883 } else {
884 // return QNaN Indefinite
885 res.w[1] = 0x7c00000000000000ull;
886 res.w[0] = 0x0000000000000000ull;
887 // set invalid flag
888 *pfpsf |= INVALID_EXCEPTION;
889 }
890 } else { // z = 0 or z = f
891 res.w[1] = p_sign | MASK_INF;
892 res.w[0] = 0x0;
893 }
894 } else { // y = 0
895 // return QNaN Indefinite
896 res.w[1] = 0x7c00000000000000ull;
897 res.w[0] = 0x0000000000000000ull;
898 // set invalid flag
899 *pfpsf |= INVALID_EXCEPTION;
900 }
901 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
902 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
903 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
904 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
905 BID_SWAP128 (res);
906 BID_RETURN (res)
907 } else if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y = inf
908 if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
909 // x = f, necessarily
910 if ((p_sign != z_sign)
911 || (C1.w[1] == 0x0ull && C1.w[0] == 0x0ull)) {
912 // return QNaN Indefinite
913 res.w[1] = 0x7c00000000000000ull;
914 res.w[0] = 0x0000000000000000ull;
915 // set invalid flag
916 *pfpsf |= INVALID_EXCEPTION;
917 } else {
918 res.w[1] = z_sign | MASK_INF;
919 res.w[0] = 0x0;
920 }
921 } else if (C1.w[1] == 0x0 && C1.w[0] == 0x0) { // x = 0
922 // z = 0, f, inf
923 // return QNaN Indefinite
924 res.w[1] = 0x7c00000000000000ull;
925 res.w[0] = 0x0000000000000000ull;
926 // set invalid flag
927 *pfpsf |= INVALID_EXCEPTION;
928 } else {
929 // x = f and z = 0, f, necessarily
930 res.w[1] = p_sign | MASK_INF;
931 res.w[0] = 0x0;
932 }
933 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
934 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
935 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
936 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
937 BID_SWAP128 (res);
938 BID_RETURN (res)
939 } else if ((z.w[1] & MASK_ANY_INF) == MASK_INF) { // z = inf
940 // x = 0, f and y = 0, f, necessarily
941 res.w[1] = z_sign | MASK_INF;
942 res.w[0] = 0x0;
943 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
944 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
945 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
946 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
947 BID_SWAP128 (res);
948 BID_RETURN (res)
949 }
950
951 true_p_exp = (x_exp >> 49) - 6176 + (y_exp >> 49) - 6176;
952 if (true_p_exp < -6176)
953 p_exp = 0; // cannot be less than EXP_MIN
954 else
955 p_exp = (UINT64) (true_p_exp + 6176) << 49;
956
957 if (((C1.w[1] == 0x0 && C1.w[0] == 0x0) || (C2.w[1] == 0x0 && C2.w[0] == 0x0)) && C3.w[1] == 0x0 && C3.w[0] == 0x0) { // (x = 0 or y = 0) and z = 0
958 // the result is 0
959 if (p_exp < z_exp)
960 res.w[1] = p_exp; // preferred exponent
961 else
962 res.w[1] = z_exp; // preferred exponent
963 if (p_sign == z_sign) {
964 res.w[1] |= z_sign;
965 res.w[0] = 0x0;
966 } else { // x * y and z have opposite signs
967 if (rnd_mode == ROUNDING_DOWN) {
968 // res = -0.0
969 res.w[1] |= MASK_SIGN;
970 res.w[0] = 0x0;
971 } else {
972 // res = +0.0
973 // res.w[1] |= 0x0;
974 res.w[0] = 0x0;
975 }
976 }
977 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
978 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
979 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
980 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
981 BID_SWAP128 (res);
982 BID_RETURN (res)
983 }
984 // from this point on, we may need to know the number of decimal digits
985 // in the significands of x, y, z when x, y, z != 0
986
987 if (C1.w[1] != 0 || C1.w[0] != 0) { // x = f (non-zero finite)
988 // q1 = nr. of decimal digits in x
989 // determine first the nr. of bits in x
990 if (C1.w[1] == 0) {
991 if (C1.w[0] >= 0x0020000000000000ull) { // x >= 2^53
992 // split the 64-bit value in two 32-bit halves to avoid rounding errors
993 if (C1.w[0] >= 0x0000000100000000ull) { // x >= 2^32
994 tmp.d = (double) (C1.w[0] >> 32); // exact conversion
995 x_nr_bits =
996 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
997 } else { // x < 2^32
998 tmp.d = (double) (C1.w[0]); // exact conversion
999 x_nr_bits =
1000 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1001 }
1002 } else { // if x < 2^53
1003 tmp.d = (double) C1.w[0]; // exact conversion
1004 x_nr_bits =
1005 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1006 }
1007 } else { // C1.w[1] != 0 => nr. bits = 64 + nr_bits (C1.w[1])
1008 tmp.d = (double) C1.w[1]; // exact conversion
1009 x_nr_bits =
1010 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1011 }
1012 q1 = nr_digits[x_nr_bits - 1].digits;
1013 if (q1 == 0) {
1014 q1 = nr_digits[x_nr_bits - 1].digits1;
1015 if (C1.w[1] > nr_digits[x_nr_bits - 1].threshold_hi ||
1016 (C1.w[1] == nr_digits[x_nr_bits - 1].threshold_hi &&
1017 C1.w[0] >= nr_digits[x_nr_bits - 1].threshold_lo))
1018 q1++;
1019 }
1020 }
1021
1022 if (C2.w[1] != 0 || C2.w[0] != 0) { // y = f (non-zero finite)
1023 if (C2.w[1] == 0) {
1024 if (C2.w[0] >= 0x0020000000000000ull) { // y >= 2^53
1025 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1026 if (C2.w[0] >= 0x0000000100000000ull) { // y >= 2^32
1027 tmp.d = (double) (C2.w[0] >> 32); // exact conversion
1028 y_nr_bits =
1029 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1030 } else { // y < 2^32
1031 tmp.d = (double) C2.w[0]; // exact conversion
1032 y_nr_bits =
1033 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1034 }
1035 } else { // if y < 2^53
1036 tmp.d = (double) C2.w[0]; // exact conversion
1037 y_nr_bits =
1038 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1039 }
1040 } else { // C2.w[1] != 0 => nr. bits = 64 + nr_bits (C2.w[1])
1041 tmp.d = (double) C2.w[1]; // exact conversion
1042 y_nr_bits =
1043 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1044 }
1045
1046 q2 = nr_digits[y_nr_bits].digits;
1047 if (q2 == 0) {
1048 q2 = nr_digits[y_nr_bits].digits1;
1049 if (C2.w[1] > nr_digits[y_nr_bits].threshold_hi ||
1050 (C2.w[1] == nr_digits[y_nr_bits].threshold_hi &&
1051 C2.w[0] >= nr_digits[y_nr_bits].threshold_lo))
1052 q2++;
1053 }
1054 }
1055
1056 if (C3.w[1] != 0 || C3.w[0] != 0) { // z = f (non-zero finite)
1057 if (C3.w[1] == 0) {
1058 if (C3.w[0] >= 0x0020000000000000ull) { // z >= 2^53
1059 // split the 64-bit value in two 32-bit halves to avoid rounding errors
1060 if (C3.w[0] >= 0x0000000100000000ull) { // z >= 2^32
1061 tmp.d = (double) (C3.w[0] >> 32); // exact conversion
1062 z_nr_bits =
1063 32 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1064 } else { // z < 2^32
1065 tmp.d = (double) C3.w[0]; // exact conversion
1066 z_nr_bits =
1067 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1068 }
1069 } else { // if z < 2^53
1070 tmp.d = (double) C3.w[0]; // exact conversion
1071 z_nr_bits =
1072 ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1073 }
1074 } else { // C3.w[1] != 0 => nr. bits = 64 + nr_bits (C3.w[1])
1075 tmp.d = (double) C3.w[1]; // exact conversion
1076 z_nr_bits =
1077 64 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
1078 }
1079
1080 q3 = nr_digits[z_nr_bits].digits;
1081 if (q3 == 0) {
1082 q3 = nr_digits[z_nr_bits].digits1;
1083 if (C3.w[1] > nr_digits[z_nr_bits].threshold_hi ||
1084 (C3.w[1] == nr_digits[z_nr_bits].threshold_hi &&
1085 C3.w[0] >= nr_digits[z_nr_bits].threshold_lo))
1086 q3++;
1087 }
1088 }
1089
1090 if ((C1.w[1] == 0x0 && C1.w[0] == 0x0) ||
1091 (C2.w[1] == 0x0 && C2.w[0] == 0x0)) {
1092 // x = 0 or y = 0
1093 // z = f, necessarily; for 0 + z return z, with the preferred exponent
1094 // the result is z, but need to get the preferred exponent
1095 if (z_exp <= p_exp) { // the preferred exponent is z_exp
1096 res.w[1] = z_sign | (z_exp & MASK_EXP) | C3.w[1];
1097 res.w[0] = C3.w[0];
1098 } else { // if (p_exp < z_exp) the preferred exponent is p_exp
1099 // return (C3 * 10^scale) * 10^(z_exp - scale)
1100 // where scale = min (p34-q3, (z_exp-p_exp) >> 49)
1101 scale = p34 - q3;
1102 ind = (z_exp - p_exp) >> 49;
1103 if (ind < scale)
1104 scale = ind;
1105 if (scale == 0) {
1106 res.w[1] = z.w[1]; // & MASK_COEFF, which is redundant
1107 res.w[0] = z.w[0];
1108 } else if (q3 <= 19) { // z fits in 64 bits
1109 if (scale <= 19) { // 10^scale fits in 64 bits
1110 // 64 x 64 C3.w[0] * ten2k64[scale]
1111 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1112 } else { // 10^scale fits in 128 bits
1113 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1114 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1115 }
1116 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1117 // 64 x 128 ten2k64[scale] * C3
1118 __mul_128x64_to_128 (res, ten2k64[scale], C3);
1119 }
1120 // subtract scale from the exponent
1121 z_exp = z_exp - ((UINT64) scale << 49);
1122 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1123 }
1124 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1125 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1126 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1127 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1128 BID_SWAP128 (res);
1129 BID_RETURN (res)
1130 } else {
1131 ; // continue with x = f, y = f, z = 0 or x = f, y = f, z = f
1132 }
1133
1134 e1 = (x_exp >> 49) - 6176; // unbiased exponent of x
1135 e2 = (y_exp >> 49) - 6176; // unbiased exponent of y
1136 e3 = (z_exp >> 49) - 6176; // unbiased exponent of z
1137 e4 = e1 + e2; // unbiased exponent of the exact x * y
1138
1139 // calculate C1 * C2 and its number of decimal digits, q4
1140
1141 // the exact product has either q1 + q2 - 1 or q1 + q2 decimal digits
1142 // where 2 <= q1 + q2 <= 68
1143 // calculate C4 = C1 * C2 and determine q
1144 C4.w[3] = C4.w[2] = C4.w[1] = C4.w[0] = 0;
1145 if (q1 + q2 <= 19) { // if 2 <= q1 + q2 <= 19, C4 = C1 * C2 fits in 64 bits
1146 C4.w[0] = C1.w[0] * C2.w[0];
1147 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1148 if (C4.w[0] < ten2k64[q1 + q2 - 1])
1149 q4 = q1 + q2 - 1; // q4 in [1, 18]
1150 else
1151 q4 = q1 + q2; // q4 in [2, 19]
1152 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1153 } else if (q1 + q2 == 20) { // C4 = C1 * C2 fits in 64 or 128 bits
1154 // q1 <= 19 and q2 <= 19 so both C1 and C2 fit in 64 bits
1155 __mul_64x64_to_128MACH (C4, C1.w[0], C2.w[0]);
1156 // if C4 < 10^(q1+q2-1) = 10^19 then q4 = q1+q2-1 = 19 else q4 = q1+q2 = 20
1157 if (C4.w[1] == 0 && C4.w[0] < ten2k64[19]) { // 19 = q1+q2-1
1158 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1159 q4 = 19; // 19 = q1 + q2 - 1
1160 } else {
1161 // if (C4.w[1] == 0)
1162 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1163 // else
1164 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1165 q4 = 20; // 20 = q1 + q2
1166 }
1167 } else if (q1 + q2 <= 38) { // 21 <= q1 + q2 <= 38
1168 // C4 = C1 * C2 fits in 64 or 128 bits
1169 // (64 bits possibly, but only when q1 + q2 = 21 and C4 has 20 digits)
1170 // at least one of C1, C2 has at most 19 decimal digits & fits in 64 bits
1171 if (q1 <= 19) {
1172 __mul_128x64_to_128 (C4, C1.w[0], C2);
1173 } else { // q2 <= 19
1174 __mul_128x64_to_128 (C4, C2.w[0], C1);
1175 }
1176 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1177 if (C4.w[1] < ten2k128[q1 + q2 - 21].w[1] ||
1178 (C4.w[1] == ten2k128[q1 + q2 - 21].w[1] &&
1179 C4.w[0] < ten2k128[q1 + q2 - 21].w[0])) {
1180 // if (C4.w[1] == 0) // q4 = 20, necessarily
1181 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 64;
1182 // else
1183 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1184 q4 = q1 + q2 - 1; // q4 in [20, 37]
1185 } else {
1186 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1187 q4 = q1 + q2; // q4 in [21, 38]
1188 }
1189 } else if (q1 + q2 == 39) { // C4 = C1 * C2 fits in 128 or 192 bits
1190 // both C1 and C2 fit in 128 bits (actually in 113 bits)
1191 // may replace this by 128x128_to192
1192 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] is 0
1193 // if C4 < 10^(q1+q2-1) = 10^38 then q4 = q1+q2-1 = 38 else q4 = q1+q2 = 39
1194 if (C4.w[2] == 0 && (C4.w[1] < ten2k128[18].w[1] ||
1195 (C4.w[1] == ten2k128[18].w[1]
1196 && C4.w[0] < ten2k128[18].w[0]))) {
1197 // 18 = 38 - 20 = q1+q2-1 - 20
1198 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1199 q4 = 38; // 38 = q1 + q2 - 1
1200 } else {
1201 // if (C4.w[2] == 0)
1202 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1203 // else
1204 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1205 q4 = 39; // 39 = q1 + q2
1206 }
1207 } else if (q1 + q2 <= 57) { // 40 <= q1 + q2 <= 57
1208 // C4 = C1 * C2 fits in 128 or 192 bits
1209 // (128 bits possibly, but only when q1 + q2 = 40 and C4 has 39 digits)
1210 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1211 // may fit in 64 bits
1212 if (C1.w[1] == 0) { // C1 fits in 64 bits
1213 // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1214 __mul_64x128_full (C4.w[2], C4, C1.w[0], C2);
1215 } else if (C2.w[1] == 0) { // C2 fits in 64 bits
1216 // __mul_64x128_full (REShi64, RESlo128, A64, B128)
1217 __mul_64x128_full (C4.w[2], C4, C2.w[0], C1);
1218 } else { // both C1 and C2 require 128 bits
1219 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1220 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
1221 }
1222 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1223 if (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
1224 (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
1225 (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
1226 (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
1227 C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))) {
1228 // if (C4.w[2] == 0) // q4 = 39, necessarily
1229 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 128;
1230 // else
1231 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1232 q4 = q1 + q2 - 1; // q4 in [39, 56]
1233 } else {
1234 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1235 q4 = q1 + q2; // q4 in [40, 57]
1236 }
1237 } else if (q1 + q2 == 58) { // C4 = C1 * C2 fits in 192 or 256 bits
1238 // both C1 and C2 fit in 128 bits (actually in 113 bits); at most one
1239 // may fit in 64 bits
1240 if (C1.w[1] == 0) { // C1 * C2 will fit in 192 bits
1241 __mul_64x128_full (C4.w[2], C4, C1.w[0], C2); // may use 64x128_to_192
1242 } else if (C2.w[1] == 0) { // C1 * C2 will fit in 192 bits
1243 __mul_64x128_full (C4.w[2], C4, C2.w[0], C1); // may use 64x128_to_192
1244 } else { // C1 * C2 will fit in 192 bits or in 256 bits
1245 __mul_128x128_to_256 (C4, C1, C2);
1246 }
1247 // if C4 < 10^(q1+q2-1) = 10^57 then q4 = q1+q2-1 = 57 else q4 = q1+q2 = 58
1248 if (C4.w[3] == 0 && (C4.w[2] < ten2k256[18].w[2] ||
1249 (C4.w[2] == ten2k256[18].w[2]
1250 && (C4.w[1] < ten2k256[18].w[1]
1251 || (C4.w[1] == ten2k256[18].w[1]
1252 && C4.w[0] < ten2k256[18].w[0]))))) {
1253 // 18 = 57 - 39 = q1+q2-1 - 39
1254 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1255 q4 = 57; // 57 = q1 + q2 - 1
1256 } else {
1257 // if (C4.w[3] == 0)
1258 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1259 // else
1260 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1261 q4 = 58; // 58 = q1 + q2
1262 }
1263 } else { // if 59 <= q1 + q2 <= 68
1264 // C4 = C1 * C2 fits in 192 or 256 bits
1265 // (192 bits possibly, but only when q1 + q2 = 59 and C4 has 58 digits)
1266 // both C1 and C2 fit in 128 bits (actually in 113 bits); none fits in
1267 // 64 bits
1268 // may use __mul_128x128_to_192 (C4.w[2], C4.w[0], C2.w[0], C1);
1269 __mul_128x128_to_256 (C4, C1, C2); // C4.w[3] = 0
1270 // if C4 < 10^(q1+q2-1) then q4 = q1 + q2 - 1 else q4 = q1 + q2
1271 if (C4.w[3] < ten2k256[q1 + q2 - 40].w[3] ||
1272 (C4.w[3] == ten2k256[q1 + q2 - 40].w[3] &&
1273 (C4.w[2] < ten2k256[q1 + q2 - 40].w[2] ||
1274 (C4.w[2] == ten2k256[q1 + q2 - 40].w[2] &&
1275 (C4.w[1] < ten2k256[q1 + q2 - 40].w[1] ||
1276 (C4.w[1] == ten2k256[q1 + q2 - 40].w[1] &&
1277 C4.w[0] < ten2k256[q1 + q2 - 40].w[0])))))) {
1278 // if (C4.w[3] == 0) // q4 = 58, necessarily
1279 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 192;
1280 // else
1281 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1282 q4 = q1 + q2 - 1; // q4 in [58, 67]
1283 } else {
1284 // length of C1 * C2 rounded up to a multiple of 64 bits is len = 256;
1285 q4 = q1 + q2; // q4 in [59, 68]
1286 }
1287 }
1288
1289 if (C3.w[1] == 0x0 && C3.w[0] == 0x0) { // x = f, y = f, z = 0
1290 save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
1291 *pfpsf = 0;
1292
1293 if (q4 > p34) {
1294
1295 // truncate C4 to p34 digits into res
1296 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
1297 x0 = q4 - p34;
1298 if (q4 <= 38) {
1299 P128.w[1] = C4.w[1];
1300 P128.w[0] = C4.w[0];
1301 round128_19_38 (q4, x0, P128, &res, &incr_exp,
1302 &is_midpoint_lt_even, &is_midpoint_gt_even,
1303 &is_inexact_lt_midpoint,
1304 &is_inexact_gt_midpoint);
1305 } else if (q4 <= 57) { // 35 <= q4 <= 57
1306 P192.w[2] = C4.w[2];
1307 P192.w[1] = C4.w[1];
1308 P192.w[0] = C4.w[0];
1309 round192_39_57 (q4, x0, P192, &R192, &incr_exp,
1310 &is_midpoint_lt_even, &is_midpoint_gt_even,
1311 &is_inexact_lt_midpoint,
1312 &is_inexact_gt_midpoint);
1313 res.w[0] = R192.w[0];
1314 res.w[1] = R192.w[1];
1315 } else { // if (q4 <= 68)
1316 round256_58_76 (q4, x0, C4, &R256, &incr_exp,
1317 &is_midpoint_lt_even, &is_midpoint_gt_even,
1318 &is_inexact_lt_midpoint,
1319 &is_inexact_gt_midpoint);
1320 res.w[0] = R256.w[0];
1321 res.w[1] = R256.w[1];
1322 }
1323 e4 = e4 + x0;
1324 if (incr_exp) {
1325 e4 = e4 + 1;
1326 }
1327 q4 = p34;
1328 // res is now the coefficient of the result rounded to the destination
1329 // precision, with unbounded exponent; the exponent is e4; q4=digits(res)
1330 } else { // if (q4 <= p34)
1331 // C4 * 10^e4 is the result rounded to the destination precision, with
1332 // unbounded exponent (which is exact)
1333
1334 if ((q4 + e4 <= p34 + expmax) && (e4 > expmax)) {
1335 // e4 is too large, but can be brought within range by scaling up C4
1336 scale = e4 - expmax; // 1 <= scale < P-q4 <= P-1 => 1 <= scale <= P-2
1337 // res = (C4 * 10^scale) * 10^expmax
1338 if (q4 <= 19) { // C4 fits in 64 bits
1339 if (scale <= 19) { // 10^scale fits in 64 bits
1340 // 64 x 64 C4.w[0] * ten2k64[scale]
1341 __mul_64x64_to_128MACH (res, C4.w[0], ten2k64[scale]);
1342 } else { // 10^scale fits in 128 bits
1343 // 64 x 128 C4.w[0] * ten2k128[scale - 20]
1344 __mul_128x64_to_128 (res, C4.w[0], ten2k128[scale - 20]);
1345 }
1346 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
1347 // 64 x 128 ten2k64[scale] * CC43
1348 __mul_128x64_to_128 (res, ten2k64[scale], C4);
1349 }
1350 e4 = e4 - scale; // expmax
1351 q4 = q4 + scale;
1352 } else {
1353 res.w[1] = C4.w[1];
1354 res.w[0] = C4.w[0];
1355 }
1356 // res is the coefficient of the result rounded to the destination
1357 // precision, with unbounded exponent (it has q4 digits); the exponent
1358 // is e4 (exact result)
1359 }
1360
1361 // check for overflow
1362 if (q4 + e4 > p34 + expmax) {
1363 if (rnd_mode == ROUNDING_TO_NEAREST) {
1364 res.w[1] = p_sign | 0x7800000000000000ull; // +/-inf
1365 res.w[0] = 0x0000000000000000ull;
1366 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
1367 } else {
1368 res.w[1] = p_sign | res.w[1];
1369 rounding_correction (rnd_mode,
1370 is_inexact_lt_midpoint,
1371 is_inexact_gt_midpoint,
1372 is_midpoint_lt_even, is_midpoint_gt_even,
1373 e4, &res, pfpsf);
1374 }
1375 *pfpsf |= save_fpsf;
1376 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1377 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1378 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1379 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1380 BID_SWAP128 (res);
1381 BID_RETURN (res)
1382 }
1383 // check for underflow
1384 if (q4 + e4 < expmin + P34) {
1385 is_tiny = 1; // the result is tiny
1386 if (e4 < expmin) {
1387 // if e4 < expmin, we must truncate more of res
1388 x0 = expmin - e4; // x0 >= 1
1389 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
1390 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
1391 is_midpoint_lt_even0 = is_midpoint_lt_even;
1392 is_midpoint_gt_even0 = is_midpoint_gt_even;
1393 is_inexact_lt_midpoint = 0;
1394 is_inexact_gt_midpoint = 0;
1395 is_midpoint_lt_even = 0;
1396 is_midpoint_gt_even = 0;
1397 // the number of decimal digits in res is q4
1398 if (x0 < q4) { // 1 <= x0 <= q4-1 => round res to q4 - x0 digits
1399 if (q4 <= 18) { // 2 <= q4 <= 18, 1 <= x0 <= 17
1400 round64_2_18 (q4, x0, res.w[0], &R64, &incr_exp,
1401 &is_midpoint_lt_even, &is_midpoint_gt_even,
1402 &is_inexact_lt_midpoint,
1403 &is_inexact_gt_midpoint);
1404 if (incr_exp) {
1405 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
1406 R64 = ten2k64[q4 - x0];
1407 }
1408 // res.w[1] = 0; (from above)
1409 res.w[0] = R64;
1410 } else { // if (q4 <= 34)
1411 // 19 <= q4 <= 38
1412 P128.w[1] = res.w[1];
1413 P128.w[0] = res.w[0];
1414 round128_19_38 (q4, x0, P128, &res, &incr_exp,
1415 &is_midpoint_lt_even, &is_midpoint_gt_even,
1416 &is_inexact_lt_midpoint,
1417 &is_inexact_gt_midpoint);
1418 if (incr_exp) {
1419 // increase coefficient by a factor of 10; this will be <= 10^33
1420 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
1421 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
1422 // res.w[1] = 0;
1423 res.w[0] = ten2k64[q4 - x0];
1424 } else { // 20 <= q4 - x0 <= 37
1425 res.w[0] = ten2k128[q4 - x0 - 20].w[0];
1426 res.w[1] = ten2k128[q4 - x0 - 20].w[1];
1427 }
1428 }
1429 }
1430 e4 = e4 + x0; // expmin
1431 } else if (x0 == q4) {
1432 // the second rounding is for 0.d(0)d(1)...d(q4-1) * 10^emin
1433 // determine relationship with 1/2 ulp
1434 if (q4 <= 19) {
1435 if (res.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
1436 lt_half_ulp = 1;
1437 is_inexact_lt_midpoint = 1;
1438 } else if (res.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
1439 eq_half_ulp = 1;
1440 is_midpoint_gt_even = 1;
1441 } else { // > 1/2 ulp
1442 // gt_half_ulp = 1;
1443 is_inexact_gt_midpoint = 1;
1444 }
1445 } else { // if (q4 <= 34)
1446 if (res.w[1] < midpoint128[q4 - 20].w[1] ||
1447 (res.w[1] == midpoint128[q4 - 20].w[1] &&
1448 res.w[0] < midpoint128[q4 - 20].w[0])) { // < 1/2 ulp
1449 lt_half_ulp = 1;
1450 is_inexact_lt_midpoint = 1;
1451 } else if (res.w[1] == midpoint128[q4 - 20].w[1] &&
1452 res.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
1453 eq_half_ulp = 1;
1454 is_midpoint_gt_even = 1;
1455 } else { // > 1/2 ulp
1456 // gt_half_ulp = 1;
1457 is_inexact_gt_midpoint = 1;
1458 }
1459 }
1460 if (lt_half_ulp || eq_half_ulp) {
1461 // res = +0.0 * 10^expmin
1462 res.w[1] = 0x0000000000000000ull;
1463 res.w[0] = 0x0000000000000000ull;
1464 } else { // if (gt_half_ulp)
1465 // res = +1 * 10^expmin
1466 res.w[1] = 0x0000000000000000ull;
1467 res.w[0] = 0x0000000000000001ull;
1468 }
1469 e4 = expmin;
1470 } else { // if (x0 > q4)
1471 // the second rounding is for 0.0...d(0)d(1)...d(q4-1) * 10^emin
1472 res.w[1] = 0;
1473 res.w[0] = 0;
1474 e4 = expmin;
1475 is_inexact_lt_midpoint = 1;
1476 }
1477 // avoid a double rounding error
1478 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
1479 is_midpoint_lt_even) { // double rounding error upward
1480 // res = res - 1
1481 res.w[0]--;
1482 if (res.w[0] == 0xffffffffffffffffull)
1483 res.w[1]--;
1484 // Note: a double rounding error upward is not possible; for this
1485 // the result after the first rounding would have to be 99...95
1486 // (35 digits in all), possibly followed by a number of zeros; this
1487 // not possible for f * f + 0
1488 is_midpoint_lt_even = 0;
1489 is_inexact_lt_midpoint = 1;
1490 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
1491 is_midpoint_gt_even) { // double rounding error downward
1492 // res = res + 1
1493 res.w[0]++;
1494 if (res.w[0] == 0)
1495 res.w[1]++;
1496 is_midpoint_gt_even = 0;
1497 is_inexact_gt_midpoint = 1;
1498 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
1499 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
1500 // if this second rounding was exact the result may still be
1501 // inexact because of the first rounding
1502 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
1503 is_inexact_gt_midpoint = 1;
1504 }
1505 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
1506 is_inexact_lt_midpoint = 1;
1507 }
1508 } else if (is_midpoint_gt_even &&
1509 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
1510 // pulled up to a midpoint
1511 is_inexact_lt_midpoint = 1;
1512 is_inexact_gt_midpoint = 0;
1513 is_midpoint_lt_even = 0;
1514 is_midpoint_gt_even = 0;
1515 } else if (is_midpoint_lt_even &&
1516 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
1517 // pulled down to a midpoint
1518 is_inexact_lt_midpoint = 0;
1519 is_inexact_gt_midpoint = 1;
1520 is_midpoint_lt_even = 0;
1521 is_midpoint_gt_even = 0;
1522 } else {
1523 ;
1524 }
1525 } else { // if e4 >= emin then q4 < P and the result is tiny and exact
1526 if (e3 < e4) {
1527 // if (e3 < e4) the preferred exponent is e3
1528 // return (C4 * 10^scale) * 10^(e4 - scale)
1529 // where scale = min (p34-q4, (e4 - e3))
1530 scale = p34 - q4;
1531 ind = e4 - e3;
1532 if (ind < scale)
1533 scale = ind;
1534 if (scale == 0) {
1535 ; // res and e4 are unchanged
1536 } else if (q4 <= 19) { // C4 fits in 64 bits
1537 if (scale <= 19) { // 10^scale fits in 64 bits
1538 // 64 x 64 res.w[0] * ten2k64[scale]
1539 __mul_64x64_to_128MACH (res, res.w[0], ten2k64[scale]);
1540 } else { // 10^scale fits in 128 bits
1541 // 64 x 128 res.w[0] * ten2k128[scale - 20]
1542 __mul_128x64_to_128 (res, res.w[0], ten2k128[scale - 20]);
1543 }
1544 } else { // res fits in 128 bits, but 10^scale must fit in 64 bits
1545 // 64 x 128 ten2k64[scale] * C3
1546 __mul_128x64_to_128 (res, ten2k64[scale], res);
1547 }
1548 // subtract scale from the exponent
1549 e4 = e4 - scale;
1550 }
1551 }
1552
1553 // check for inexact result
1554 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
1555 is_midpoint_lt_even || is_midpoint_gt_even) {
1556 // set the inexact flag and the underflow flag
1557 *pfpsf |= INEXACT_EXCEPTION;
1558 *pfpsf |= UNDERFLOW_EXCEPTION;
1559 }
1560 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
1561 if (rnd_mode != ROUNDING_TO_NEAREST) {
1562 rounding_correction (rnd_mode,
1563 is_inexact_lt_midpoint,
1564 is_inexact_gt_midpoint,
1565 is_midpoint_lt_even, is_midpoint_gt_even,
1566 e4, &res, pfpsf);
1567 }
1568 *pfpsf |= save_fpsf;
1569 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1570 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1571 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1572 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1573 BID_SWAP128 (res);
1574 BID_RETURN (res)
1575 }
1576 // no overflow, and no underflow for rounding to nearest
1577 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1];
1578
1579 if (rnd_mode != ROUNDING_TO_NEAREST) {
1580 rounding_correction (rnd_mode,
1581 is_inexact_lt_midpoint,
1582 is_inexact_gt_midpoint,
1583 is_midpoint_lt_even, is_midpoint_gt_even,
1584 e4, &res, pfpsf);
1585 // if e4 = expmin && significand < 10^33 => result is tiny (for RD, RZ)
1586 if (e4 == expmin) {
1587 if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
1588 ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
1589 res.w[0] < 0x38c15b0a00000000ull)) {
1590 is_tiny = 1;
1591 }
1592 }
1593 }
1594
1595 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
1596 is_midpoint_lt_even || is_midpoint_gt_even) {
1597 // set the inexact flag
1598 *pfpsf |= INEXACT_EXCEPTION;
1599 if (is_tiny)
1600 *pfpsf |= UNDERFLOW_EXCEPTION;
1601 }
1602
1603 if ((*pfpsf & INEXACT_EXCEPTION) == 0) { // x * y is exact
1604 // need to ensure that the result has the preferred exponent
1605 p_exp = res.w[1] & MASK_EXP;
1606 if (z_exp < p_exp) { // the preferred exponent is z_exp
1607 // signficand of res in C3
1608 C3.w[1] = res.w[1] & MASK_COEFF;
1609 C3.w[0] = res.w[0];
1610 // the number of decimal digits of x * y is q4 <= 34
1611 // Note: the coefficient fits in 128 bits
1612
1613 // return (C3 * 10^scale) * 10^(p_exp - scale)
1614 // where scale = min (p34-q4, (p_exp-z_exp) >> 49)
1615 scale = p34 - q4;
1616 ind = (p_exp - z_exp) >> 49;
1617 if (ind < scale)
1618 scale = ind;
1619 // subtract scale from the exponent
1620 p_exp = p_exp - ((UINT64) scale << 49);
1621 if (scale == 0) {
1622 ; // leave res unchanged
1623 } else if (q4 <= 19) { // x * y fits in 64 bits
1624 if (scale <= 19) { // 10^scale fits in 64 bits
1625 // 64 x 64 C3.w[0] * ten2k64[scale]
1626 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1627 } else { // 10^scale fits in 128 bits
1628 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1629 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1630 }
1631 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
1632 } else { // x * y fits in 128 bits, but 10^scale must fit in 64 bits
1633 // 64 x 128 ten2k64[scale] * C3
1634 __mul_128x64_to_128 (res, ten2k64[scale], C3);
1635 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
1636 }
1637 } // else leave the result as it is, because p_exp <= z_exp
1638 }
1639 *pfpsf |= save_fpsf;
1640 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1641 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1642 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1643 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1644 BID_SWAP128 (res);
1645 BID_RETURN (res)
1646 } // else we have f * f + f
1647
1648 // continue with x = f, y = f, z = f
1649
1650 delta = q3 + e3 - q4 - e4;
1651 delta_ge_zero:
1652 if (delta >= 0) {
1653
1654 if (p34 <= delta - 1 || // Case (1')
1655 (p34 == delta && e3 + 6176 < p34 - q3)) { // Case (1''A)
1656 // check for overflow, which can occur only in Case (1')
1657 if ((q3 + e3) > (p34 + expmax) && p34 <= delta - 1) {
1658 // e3 > expmax implies p34 <= delta-1 and e3 > expmax is a necessary
1659 // condition for (q3 + e3) > (p34 + expmax)
1660 if (rnd_mode == ROUNDING_TO_NEAREST) {
1661 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
1662 res.w[0] = 0x0000000000000000ull;
1663 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
1664 } else {
1665 if (p_sign == z_sign) {
1666 is_inexact_lt_midpoint = 1;
1667 } else {
1668 is_inexact_gt_midpoint = 1;
1669 }
1670 // q3 <= p34; if (q3 < p34) scale C3 up by 10^(p34-q3)
1671 scale = p34 - q3;
1672 if (scale == 0) {
1673 res.w[1] = z_sign | C3.w[1];
1674 res.w[0] = C3.w[0];
1675 } else {
1676 if (q3 <= 19) { // C3 fits in 64 bits
1677 if (scale <= 19) { // 10^scale fits in 64 bits
1678 // 64 x 64 C3.w[0] * ten2k64[scale]
1679 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1680 } else { // 10^scale fits in 128 bits
1681 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1682 __mul_128x64_to_128 (res, C3.w[0],
1683 ten2k128[scale - 20]);
1684 }
1685 } else { // C3 fits in 128 bits, but 10^scale must fit in 64 bits
1686 // 64 x 128 ten2k64[scale] * C3
1687 __mul_128x64_to_128 (res, ten2k64[scale], C3);
1688 }
1689 // the coefficient in res has q3 + scale = p34 digits
1690 }
1691 e3 = e3 - scale;
1692 res.w[1] = z_sign | res.w[1];
1693 rounding_correction (rnd_mode,
1694 is_inexact_lt_midpoint,
1695 is_inexact_gt_midpoint,
1696 is_midpoint_lt_even, is_midpoint_gt_even,
1697 e3, &res, pfpsf);
1698 }
1699 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1700 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1701 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1702 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1703 BID_SWAP128 (res);
1704 BID_RETURN (res)
1705 }
1706 // res = z
1707 if (q3 < p34) { // the preferred exponent is z_exp - (p34 - q3)
1708 // return (C3 * 10^scale) * 10^(z_exp - scale)
1709 // where scale = min (p34-q3, z_exp-EMIN)
1710 scale = p34 - q3;
1711 ind = e3 + 6176;
1712 if (ind < scale)
1713 scale = ind;
1714 if (scale == 0) {
1715 res.w[1] = C3.w[1];
1716 res.w[0] = C3.w[0];
1717 } else if (q3 <= 19) { // z fits in 64 bits
1718 if (scale <= 19) { // 10^scale fits in 64 bits
1719 // 64 x 64 C3.w[0] * ten2k64[scale]
1720 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1721 } else { // 10^scale fits in 128 bits
1722 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1723 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1724 }
1725 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1726 // 64 x 128 ten2k64[scale] * C3
1727 __mul_128x64_to_128 (res, ten2k64[scale], C3);
1728 }
1729 // the coefficient in res has q3 + scale digits
1730 // subtract scale from the exponent
1731 z_exp = z_exp - ((UINT64) scale << 49);
1732 e3 = e3 - scale;
1733 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1734 if (scale + q3 < p34)
1735 *pfpsf |= UNDERFLOW_EXCEPTION;
1736 } else {
1737 scale = 0;
1738 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | C3.w[1];
1739 res.w[0] = C3.w[0];
1740 }
1741
1742 // use the following to avoid double rounding errors when operating on
1743 // mixed formats in rounding to nearest, and for correcting the result
1744 // if not rounding to nearest
1745 if ((p_sign != z_sign) && (delta == (q3 + scale + 1))) {
1746 // there is a gap of exactly one digit between the scaled C3 and C4
1747 // C3 * 10^ scale = 10^(q3+scale-1) <=> C3 = 10^(q3-1) is special case
1748 if ((q3 <= 19 && C3.w[0] != ten2k64[q3 - 1]) ||
1749 (q3 == 20 && (C3.w[1] != 0 || C3.w[0] != ten2k64[19])) ||
1750 (q3 >= 21 && (C3.w[1] != ten2k128[q3 - 21].w[1] ||
1751 C3.w[0] != ten2k128[q3 - 21].w[0]))) {
1752 // C3 * 10^ scale != 10^(q3-1)
1753 // if ((res.w[1] & MASK_COEFF) != 0x0000314dc6448d93ull ||
1754 // res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
1755 is_inexact_gt_midpoint = 1; // if (z_sign), set as if for abs. value
1756 } else { // if C3 * 10^scale = 10^(q3+scale-1)
1757 // ok from above e3 = (z_exp >> 49) - 6176;
1758 // the result is always inexact
1759 if (q4 == 1) {
1760 R64 = C4.w[0];
1761 } else {
1762 // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
1763 // x = q4-1, 1 <= x <= 67 and check if this operation is exact
1764 if (q4 <= 18) { // 2 <= q4 <= 18
1765 round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
1766 &is_midpoint_lt_even, &is_midpoint_gt_even,
1767 &is_inexact_lt_midpoint,
1768 &is_inexact_gt_midpoint);
1769 } else if (q4 <= 38) {
1770 P128.w[1] = C4.w[1];
1771 P128.w[0] = C4.w[0];
1772 round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
1773 &is_midpoint_lt_even,
1774 &is_midpoint_gt_even,
1775 &is_inexact_lt_midpoint,
1776 &is_inexact_gt_midpoint);
1777 R64 = R128.w[0]; // one decimal digit
1778 } else if (q4 <= 57) {
1779 P192.w[2] = C4.w[2];
1780 P192.w[1] = C4.w[1];
1781 P192.w[0] = C4.w[0];
1782 round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
1783 &is_midpoint_lt_even,
1784 &is_midpoint_gt_even,
1785 &is_inexact_lt_midpoint,
1786 &is_inexact_gt_midpoint);
1787 R64 = R192.w[0]; // one decimal digit
1788 } else { // if (q4 <= 68)
1789 round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
1790 &is_midpoint_lt_even,
1791 &is_midpoint_gt_even,
1792 &is_inexact_lt_midpoint,
1793 &is_inexact_gt_midpoint);
1794 R64 = R256.w[0]; // one decimal digit
1795 }
1796 if (incr_exp) {
1797 R64 = 10;
1798 }
1799 }
1800 if (q4 == 1 && C4.w[0] == 5) {
1801 is_inexact_lt_midpoint = 0;
1802 is_inexact_gt_midpoint = 0;
1803 is_midpoint_lt_even = 1;
1804 is_midpoint_gt_even = 0;
1805 } else if ((e3 == expmin) ||
1806 R64 < 5 || (R64 == 5 && is_inexact_gt_midpoint)) {
1807 // result does not change
1808 is_inexact_lt_midpoint = 0;
1809 is_inexact_gt_midpoint = 1;
1810 is_midpoint_lt_even = 0;
1811 is_midpoint_gt_even = 0;
1812 } else {
1813 is_inexact_lt_midpoint = 1;
1814 is_inexact_gt_midpoint = 0;
1815 is_midpoint_lt_even = 0;
1816 is_midpoint_gt_even = 0;
1817 // result decremented is 10^(q3+scale) - 1
1818 if ((q3 + scale) <= 19) {
1819 res.w[1] = 0;
1820 res.w[0] = ten2k64[q3 + scale];
1821 } else { // if ((q3 + scale + 1) <= 35)
1822 res.w[1] = ten2k128[q3 + scale - 20].w[1];
1823 res.w[0] = ten2k128[q3 + scale - 20].w[0];
1824 }
1825 res.w[0] = res.w[0] - 1; // borrow never occurs
1826 z_exp = z_exp - EXP_P1;
1827 e3 = e3 - 1;
1828 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
1829 }
1830 if (e3 == expmin) {
1831 if (R64 < 5 || (R64 == 5 && !is_inexact_lt_midpoint)) {
1832 ; // result not tiny (in round-to-nearest mode)
1833 } else {
1834 *pfpsf |= UNDERFLOW_EXCEPTION;
1835 }
1836 }
1837 } // end 10^(q3+scale-1)
1838 // set the inexact flag
1839 *pfpsf |= INEXACT_EXCEPTION;
1840 } else {
1841 if (p_sign == z_sign) {
1842 // if (z_sign), set as if for absolute value
1843 is_inexact_lt_midpoint = 1;
1844 } else { // if (p_sign != z_sign)
1845 // if (z_sign), set as if for absolute value
1846 is_inexact_gt_midpoint = 1;
1847 }
1848 *pfpsf |= INEXACT_EXCEPTION;
1849 }
1850 // the result is always inexact => set the inexact flag
1851 // Determine tininess:
1852 // if (exp > expmin)
1853 // the result is not tiny
1854 // else // if exp = emin
1855 // if (q3 + scale < p34)
1856 // the result is tiny
1857 // else // if (q3 + scale = p34)
1858 // if (C3 * 10^scale > 10^33)
1859 // the result is not tiny
1860 // else // if C3 * 10^scale = 10^33
1861 // if (xy * z > 0)
1862 // the result is not tiny
1863 // else // if (xy * z < 0)
1864 // if (z > 0)
1865 // if rnd_mode != RP
1866 // the result is tiny
1867 // else // if RP
1868 // the result is not tiny
1869 // else // if (z < 0)
1870 // if rnd_mode != RM
1871 // the result is tiny
1872 // else // if RM
1873 // the result is not tiny
1874 // endif
1875 // endif
1876 // endif
1877 // endif
1878 // endif
1879 // endif
1880 if ((e3 == expmin && (q3 + scale) < p34) ||
1881 (e3 == expmin && (q3 + scale) == p34 &&
1882 (res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull && // 10^33_high
1883 res.w[0] == 0x38c15b0a00000000ull && // 10^33_low
1884 z_sign != p_sign && ((!z_sign && rnd_mode != ROUNDING_UP) ||
1885 (z_sign && rnd_mode != ROUNDING_DOWN)))) {
1886 *pfpsf |= UNDERFLOW_EXCEPTION;
1887 }
1888 if (rnd_mode != ROUNDING_TO_NEAREST) {
1889 rounding_correction (rnd_mode,
1890 is_inexact_lt_midpoint,
1891 is_inexact_gt_midpoint,
1892 is_midpoint_lt_even, is_midpoint_gt_even,
1893 e3, &res, pfpsf);
1894 }
1895 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
1896 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
1897 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
1898 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
1899 BID_SWAP128 (res);
1900 BID_RETURN (res)
1901
1902 } else if (p34 == delta) { // Case (1''B)
1903
1904 // because Case (1''A) was treated above, e3 + 6176 >= p34 - q3
1905 // and C3 can be scaled up to p34 digits if needed
1906
1907 // scale C3 to p34 digits if needed
1908 scale = p34 - q3; // 0 <= scale <= p34 - 1
1909 if (scale == 0) {
1910 res.w[1] = C3.w[1];
1911 res.w[0] = C3.w[0];
1912 } else if (q3 <= 19) { // z fits in 64 bits
1913 if (scale <= 19) { // 10^scale fits in 64 bits
1914 // 64 x 64 C3.w[0] * ten2k64[scale]
1915 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
1916 } else { // 10^scale fits in 128 bits
1917 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
1918 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
1919 }
1920 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
1921 // 64 x 128 ten2k64[scale] * C3
1922 __mul_128x64_to_128 (res, ten2k64[scale], C3);
1923 }
1924 // subtract scale from the exponent
1925 z_exp = z_exp - ((UINT64) scale << 49);
1926 e3 = e3 - scale;
1927 // now z_sign, z_exp, and res correspond to a z scaled to p34 = 34 digits
1928
1929 // determine whether x * y is less than, equal to, or greater than
1930 // 1/2 ulp (z)
1931 if (q4 <= 19) {
1932 if (C4.w[0] < midpoint64[q4 - 1]) { // < 1/2 ulp
1933 lt_half_ulp = 1;
1934 } else if (C4.w[0] == midpoint64[q4 - 1]) { // = 1/2 ulp
1935 eq_half_ulp = 1;
1936 } else { // > 1/2 ulp
1937 gt_half_ulp = 1;
1938 }
1939 } else if (q4 <= 38) {
1940 if (C4.w[2] == 0 && (C4.w[1] < midpoint128[q4 - 20].w[1] ||
1941 (C4.w[1] == midpoint128[q4 - 20].w[1] &&
1942 C4.w[0] < midpoint128[q4 - 20].w[0]))) { // < 1/2 ulp
1943 lt_half_ulp = 1;
1944 } else if (C4.w[2] == 0 && C4.w[1] == midpoint128[q4 - 20].w[1] &&
1945 C4.w[0] == midpoint128[q4 - 20].w[0]) { // = 1/2 ulp
1946 eq_half_ulp = 1;
1947 } else { // > 1/2 ulp
1948 gt_half_ulp = 1;
1949 }
1950 } else if (q4 <= 58) {
1951 if (C4.w[3] == 0 && (C4.w[2] < midpoint192[q4 - 39].w[2] ||
1952 (C4.w[2] == midpoint192[q4 - 39].w[2] &&
1953 C4.w[1] < midpoint192[q4 - 39].w[1]) ||
1954 (C4.w[2] == midpoint192[q4 - 39].w[2] &&
1955 C4.w[1] == midpoint192[q4 - 39].w[1] &&
1956 C4.w[0] < midpoint192[q4 - 39].w[0]))) { // < 1/2 ulp
1957 lt_half_ulp = 1;
1958 } else if (C4.w[3] == 0 && C4.w[2] == midpoint192[q4 - 39].w[2] &&
1959 C4.w[1] == midpoint192[q4 - 39].w[1] &&
1960 C4.w[0] == midpoint192[q4 - 39].w[0]) { // = 1/2 ulp
1961 eq_half_ulp = 1;
1962 } else { // > 1/2 ulp
1963 gt_half_ulp = 1;
1964 }
1965 } else {
1966 if (C4.w[3] < midpoint256[q4 - 59].w[3] ||
1967 (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1968 C4.w[2] < midpoint256[q4 - 59].w[2]) ||
1969 (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1970 C4.w[2] == midpoint256[q4 - 59].w[2] &&
1971 C4.w[1] < midpoint256[q4 - 59].w[1]) ||
1972 (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1973 C4.w[2] == midpoint256[q4 - 59].w[2] &&
1974 C4.w[1] == midpoint256[q4 - 59].w[1] &&
1975 C4.w[0] < midpoint256[q4 - 59].w[0])) { // < 1/2 ulp
1976 lt_half_ulp = 1;
1977 } else if (C4.w[3] == midpoint256[q4 - 59].w[3] &&
1978 C4.w[2] == midpoint256[q4 - 59].w[2] &&
1979 C4.w[1] == midpoint256[q4 - 59].w[1] &&
1980 C4.w[0] == midpoint256[q4 - 59].w[0]) { // = 1/2 ulp
1981 eq_half_ulp = 1;
1982 } else { // > 1/2 ulp
1983 gt_half_ulp = 1;
1984 }
1985 }
1986
1987 if (p_sign == z_sign) {
1988 if (lt_half_ulp) {
1989 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
1990 // use the following to avoid double rounding errors when operating on
1991 // mixed formats in rounding to nearest
1992 is_inexact_lt_midpoint = 1; // if (z_sign), as if for absolute value
1993 } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
1994 // add 1 ulp to the significand
1995 res.w[0]++;
1996 if (res.w[0] == 0x0ull)
1997 res.w[1]++;
1998 // check for rounding overflow, when coeff == 10^34
1999 if ((res.w[1] & MASK_COEFF) == 0x0001ed09bead87c0ull &&
2000 res.w[0] == 0x378d8e6400000000ull) { // coefficient = 10^34
2001 e3 = e3 + 1;
2002 // coeff = 10^33
2003 z_exp = ((UINT64) (e3 + 6176) << 49) & MASK_EXP;
2004 res.w[1] = 0x0000314dc6448d93ull;
2005 res.w[0] = 0x38c15b0a00000000ull;
2006 }
2007 // end add 1 ulp
2008 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2009 if (eq_half_ulp) {
2010 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2011 } else {
2012 is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
2013 }
2014 } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2015 // leave unchanged
2016 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2017 is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
2018 }
2019 // the result is always inexact, and never tiny
2020 // set the inexact flag
2021 *pfpsf |= INEXACT_EXCEPTION;
2022 // check for overflow
2023 if (e3 > expmax && rnd_mode == ROUNDING_TO_NEAREST) {
2024 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2025 res.w[0] = 0x0000000000000000ull;
2026 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2027 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2028 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2029 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2030 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2031 BID_SWAP128 (res);
2032 BID_RETURN (res)
2033 }
2034 if (rnd_mode != ROUNDING_TO_NEAREST) {
2035 rounding_correction (rnd_mode,
2036 is_inexact_lt_midpoint,
2037 is_inexact_gt_midpoint,
2038 is_midpoint_lt_even, is_midpoint_gt_even,
2039 e3, &res, pfpsf);
2040 z_exp = res.w[1] & MASK_EXP;
2041 }
2042 } else { // if (p_sign != z_sign)
2043 // consider two cases, because C3 * 10^scale = 10^33 is a special case
2044 if (res.w[1] != 0x0000314dc6448d93ull ||
2045 res.w[0] != 0x38c15b0a00000000ull) { // C3 * 10^scale != 10^33
2046 if (lt_half_ulp) {
2047 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2048 // use the following to avoid double rounding errors when operating
2049 // on mixed formats in rounding to nearest
2050 is_inexact_gt_midpoint = 1; // if (z_sign), as if for absolute value
2051 } else if ((eq_half_ulp && (res.w[0] & 0x01)) || gt_half_ulp) {
2052 // subtract 1 ulp from the significand
2053 res.w[0]--;
2054 if (res.w[0] == 0xffffffffffffffffull)
2055 res.w[1]--;
2056 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2057 if (eq_half_ulp) {
2058 is_midpoint_gt_even = 1; // if (z_sign), as if for absolute value
2059 } else {
2060 is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
2061 }
2062 } else { // if (eq_half_ulp && !(res.w[0] & 0x01))
2063 // leave unchanged
2064 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2065 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2066 }
2067 // the result is always inexact, and never tiny
2068 // check for overflow for RN
2069 if (e3 > expmax) {
2070 if (rnd_mode == ROUNDING_TO_NEAREST) {
2071 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2072 res.w[0] = 0x0000000000000000ull;
2073 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2074 } else {
2075 rounding_correction (rnd_mode,
2076 is_inexact_lt_midpoint,
2077 is_inexact_gt_midpoint,
2078 is_midpoint_lt_even,
2079 is_midpoint_gt_even, e3, &res,
2080 pfpsf);
2081 }
2082 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2083 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2084 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2085 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2086 BID_SWAP128 (res);
2087 BID_RETURN (res)
2088 }
2089 // set the inexact flag
2090 *pfpsf |= INEXACT_EXCEPTION;
2091 if (rnd_mode != ROUNDING_TO_NEAREST) {
2092 rounding_correction (rnd_mode,
2093 is_inexact_lt_midpoint,
2094 is_inexact_gt_midpoint,
2095 is_midpoint_lt_even,
2096 is_midpoint_gt_even, e3, &res, pfpsf);
2097 }
2098 z_exp = res.w[1] & MASK_EXP;
2099 } else { // if C3 * 10^scale = 10^33
2100 e3 = (z_exp >> 49) - 6176;
2101 if (e3 > expmin) {
2102 // the result is exact if exp > expmin and C4 = d*10^(q4-1),
2103 // where d = 1, 2, 3, ..., 9; it could be tiny too, but exact
2104 if (q4 == 1) {
2105 // if q4 = 1 the result is exact
2106 // result coefficient = 10^34 - C4
2107 res.w[1] = 0x0001ed09bead87c0ull;
2108 res.w[0] = 0x378d8e6400000000ull - C4.w[0];
2109 z_exp = z_exp - EXP_P1;
2110 e3 = e3 - 1;
2111 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2112 } else {
2113 // if q4 > 1 then truncate C4 from q4 digits to 1 digit;
2114 // x = q4-1, 1 <= x <= 67 and check if this operation is exact
2115 if (q4 <= 18) { // 2 <= q4 <= 18
2116 round64_2_18 (q4, q4 - 1, C4.w[0], &R64, &incr_exp,
2117 &is_midpoint_lt_even,
2118 &is_midpoint_gt_even,
2119 &is_inexact_lt_midpoint,
2120 &is_inexact_gt_midpoint);
2121 } else if (q4 <= 38) {
2122 P128.w[1] = C4.w[1];
2123 P128.w[0] = C4.w[0];
2124 round128_19_38 (q4, q4 - 1, P128, &R128, &incr_exp,
2125 &is_midpoint_lt_even,
2126 &is_midpoint_gt_even,
2127 &is_inexact_lt_midpoint,
2128 &is_inexact_gt_midpoint);
2129 R64 = R128.w[0]; // one decimal digit
2130 } else if (q4 <= 57) {
2131 P192.w[2] = C4.w[2];
2132 P192.w[1] = C4.w[1];
2133 P192.w[0] = C4.w[0];
2134 round192_39_57 (q4, q4 - 1, P192, &R192, &incr_exp,
2135 &is_midpoint_lt_even,
2136 &is_midpoint_gt_even,
2137 &is_inexact_lt_midpoint,
2138 &is_inexact_gt_midpoint);
2139 R64 = R192.w[0]; // one decimal digit
2140 } else { // if (q4 <= 68)
2141 round256_58_76 (q4, q4 - 1, C4, &R256, &incr_exp,
2142 &is_midpoint_lt_even,
2143 &is_midpoint_gt_even,
2144 &is_inexact_lt_midpoint,
2145 &is_inexact_gt_midpoint);
2146 R64 = R256.w[0]; // one decimal digit
2147 }
2148 if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2149 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2150 // the result is exact: 10^34 - R64
2151 // incr_exp = 0 with certainty
2152 z_exp = z_exp - EXP_P1;
2153 e3 = e3 - 1;
2154 res.w[1] =
2155 z_sign | (z_exp & MASK_EXP) | 0x0001ed09bead87c0ull;
2156 res.w[0] = 0x378d8e6400000000ull - R64;
2157 } else {
2158 // We want R64 to be the top digit of C4, but we actually
2159 // obtained (C4 * 10^(-q4+1))RN; a correction may be needed,
2160 // because the top digit is (C4 * 10^(-q4+1))RZ
2161 // however, if incr_exp = 1 then R64 = 10 with certainty
2162 if (incr_exp) {
2163 R64 = 10;
2164 }
2165 // the result is inexact as C4 has more than 1 significant digit
2166 // and C3 * 10^scale = 10^33
2167 // example of case that is treated here:
2168 // 100...0 * 10^e3 - 0.41 * 10^e3 =
2169 // 0999...9.59 * 10^e3 -> rounds to 99...96*10^(e3-1)
2170 // note that (e3 > expmin}
2171 // in order to round, subtract R64 from 10^34 and then compare
2172 // C4 - R64 * 10^(q4-1) with 1/2 ulp
2173 // calculate 10^34 - R64
2174 res.w[1] = 0x0001ed09bead87c0ull;
2175 res.w[0] = 0x378d8e6400000000ull - R64;
2176 z_exp = z_exp - EXP_P1; // will be OR-ed with sign & significand
2177 // calculate C4 - R64 * 10^(q4-1); this is a rare case and
2178 // R64 is small, 1 <= R64 <= 9
2179 e3 = e3 - 1;
2180 if (is_inexact_lt_midpoint) {
2181 is_inexact_lt_midpoint = 0;
2182 is_inexact_gt_midpoint = 1;
2183 } else if (is_inexact_gt_midpoint) {
2184 is_inexact_gt_midpoint = 0;
2185 is_inexact_lt_midpoint = 1;
2186 } else if (is_midpoint_lt_even) {
2187 is_midpoint_lt_even = 0;
2188 is_midpoint_gt_even = 1;
2189 } else if (is_midpoint_gt_even) {
2190 is_midpoint_gt_even = 0;
2191 is_midpoint_lt_even = 1;
2192 } else {
2193 ;
2194 }
2195 // the result is always inexact, and never tiny
2196 // check for overflow for RN
2197 if (e3 > expmax) {
2198 if (rnd_mode == ROUNDING_TO_NEAREST) {
2199 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2200 res.w[0] = 0x0000000000000000ull;
2201 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2202 } else {
2203 rounding_correction (rnd_mode,
2204 is_inexact_lt_midpoint,
2205 is_inexact_gt_midpoint,
2206 is_midpoint_lt_even,
2207 is_midpoint_gt_even, e3, &res,
2208 pfpsf);
2209 }
2210 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2211 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2212 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2213 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2214 BID_SWAP128 (res);
2215 BID_RETURN (res)
2216 }
2217 // set the inexact flag
2218 *pfpsf |= INEXACT_EXCEPTION;
2219 res.w[1] =
2220 z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
2221 if (rnd_mode != ROUNDING_TO_NEAREST) {
2222 rounding_correction (rnd_mode,
2223 is_inexact_lt_midpoint,
2224 is_inexact_gt_midpoint,
2225 is_midpoint_lt_even,
2226 is_midpoint_gt_even, e3, &res,
2227 pfpsf);
2228 }
2229 z_exp = res.w[1] & MASK_EXP;
2230 } // end result is inexact
2231 } // end q4 > 1
2232 } else { // if (e3 = emin)
2233 // if e3 = expmin the result is also tiny (the condition for
2234 // tininess is C4 > 050...0 [q4 digits] which is met because
2235 // the msd of C4 is not zero)
2236 // the result is tiny and inexact in all rounding modes;
2237 // it is either 100...0 or 0999...9 (use lt_half_ulp, eq_half_ulp,
2238 // gt_half_ulp to calculate)
2239 // if (lt_half_ulp || eq_half_ulp) res = 10^33 stays unchanged
2240
2241 // p_sign != z_sign so swap gt_half_ulp and lt_half_ulp
2242 if (gt_half_ulp) { // res = 10^33 - 1
2243 res.w[1] = 0x0000314dc6448d93ull;
2244 res.w[0] = 0x38c15b09ffffffffull;
2245 } else {
2246 res.w[1] = 0x0000314dc6448d93ull;
2247 res.w[0] = 0x38c15b0a00000000ull;
2248 }
2249 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2250 *pfpsf |= UNDERFLOW_EXCEPTION; // inexact is set later
2251
2252 if (eq_half_ulp) {
2253 is_midpoint_lt_even = 1; // if (z_sign), as if for absolute value
2254 } else if (lt_half_ulp) {
2255 is_inexact_gt_midpoint = 1; //if(z_sign), as if for absolute value
2256 } else { // if (gt_half_ulp)
2257 is_inexact_lt_midpoint = 1; //if(z_sign), as if for absolute value
2258 }
2259
2260 if (rnd_mode != ROUNDING_TO_NEAREST) {
2261 rounding_correction (rnd_mode,
2262 is_inexact_lt_midpoint,
2263 is_inexact_gt_midpoint,
2264 is_midpoint_lt_even,
2265 is_midpoint_gt_even, e3, &res,
2266 pfpsf);
2267 z_exp = res.w[1] & MASK_EXP;
2268 }
2269 } // end e3 = emin
2270 // set the inexact flag (if the result was not exact)
2271 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
2272 is_midpoint_lt_even || is_midpoint_gt_even)
2273 *pfpsf |= INEXACT_EXCEPTION;
2274 } // end 10^33
2275 } // end if (p_sign != z_sign)
2276 res.w[1] = z_sign | (z_exp & MASK_EXP) | res.w[1];
2277 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2278 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2279 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2280 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2281 BID_SWAP128 (res);
2282 BID_RETURN (res)
2283
2284 } else if (((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
2285 (q3 <= delta && delta + q4 <= p34) || // Case (3)
2286 (delta < q3 && p34 < delta + q4) || // Case (4)
2287 (delta < q3 && q3 <= delta + q4 && delta + q4 <= p34) || // Case (5)
2288 (delta + q4 < q3)) && // Case (6)
2289 !(delta <= 1 && p_sign != z_sign)) { // Case (2), (3), (4), (5) or (6)
2290
2291 // the result has the sign of z
2292
2293 if ((q3 <= delta && delta < p34 && p34 < delta + q4) || // Case (2)
2294 (delta < q3 && p34 < delta + q4)) { // Case (4)
2295 // round first the sum x * y + z with unbounded exponent
2296 // scale C3 up by scale = p34 - q3, 1 <= scale <= p34-1,
2297 // 1 <= scale <= 33
2298 // calculate res = C3 * 10^scale
2299 scale = p34 - q3;
2300 x0 = delta + q4 - p34;
2301 } else if (delta + q4 < q3) { // Case (6)
2302 // make Case (6) look like Case (3) or Case (5) with scale = 0
2303 // by scaling up C4 by 10^(q3 - delta - q4)
2304 scale = q3 - delta - q4; // 1 <= scale <= 33
2305 if (q4 <= 19) { // 1 <= scale <= 19; C4 fits in 64 bits
2306 if (scale <= 19) { // 10^scale fits in 64 bits
2307 // 64 x 64 C4.w[0] * ten2k64[scale]
2308 __mul_64x64_to_128MACH (P128, C4.w[0], ten2k64[scale]);
2309 } else { // 10^scale fits in 128 bits
2310 // 64 x 128 C4.w[0] * ten2k128[scale - 20]
2311 __mul_128x64_to_128 (P128, C4.w[0], ten2k128[scale - 20]);
2312 }
2313 } else { // C4 fits in 128 bits, but 10^scale must fit in 64 bits
2314 // 64 x 128 ten2k64[scale] * C4
2315 __mul_128x64_to_128 (P128, ten2k64[scale], C4);
2316 }
2317 C4.w[0] = P128.w[0];
2318 C4.w[1] = P128.w[1];
2319 // e4 does not need adjustment, as it is not used from this point on
2320 scale = 0;
2321 x0 = 0;
2322 // now Case (6) looks like Case (3) or Case (5) with scale = 0
2323 } else { // if Case (3) or Case (5)
2324 // Note: Case (3) is similar to Case (2), but scale differs and the
2325 // result is exact, unless it is tiny (so x0 = 0 when calculating the
2326 // result with unbounded exponent)
2327
2328 // calculate first the sum x * y + z with unbounded exponent (exact)
2329 // scale C3 up by scale = delta + q4 - q3, 1 <= scale <= p34-1,
2330 // 1 <= scale <= 33
2331 // calculate res = C3 * 10^scale
2332 scale = delta + q4 - q3;
2333 x0 = 0;
2334 // Note: the comments which follow refer [mainly] to Case (2)]
2335 }
2336
2337 case2_repeat:
2338 if (scale == 0) { // this could happen e.g. if we return to case2_repeat
2339 // or in Case (4)
2340 res.w[1] = C3.w[1];
2341 res.w[0] = C3.w[0];
2342 } else if (q3 <= 19) { // 1 <= scale <= 19; z fits in 64 bits
2343 if (scale <= 19) { // 10^scale fits in 64 bits
2344 // 64 x 64 C3.w[0] * ten2k64[scale]
2345 __mul_64x64_to_128MACH (res, C3.w[0], ten2k64[scale]);
2346 } else { // 10^scale fits in 128 bits
2347 // 64 x 128 C3.w[0] * ten2k128[scale - 20]
2348 __mul_128x64_to_128 (res, C3.w[0], ten2k128[scale - 20]);
2349 }
2350 } else { // z fits in 128 bits, but 10^scale must fit in 64 bits
2351 // 64 x 128 ten2k64[scale] * C3
2352 __mul_128x64_to_128 (res, ten2k64[scale], C3);
2353 }
2354 // e3 is already calculated
2355 e3 = e3 - scale;
2356 // now res = C3 * 10^scale and e3 = e3 - scale
2357 // Note: C3 * 10^scale could be 10^34 if we returned to case2_repeat
2358 // because the result was too small
2359
2360 // round C4 to nearest to q4 - x0 digits, where x0 = delta + q4 - p34,
2361 // 1 <= x0 <= min (q4 - 1, 2 * p34 - 1) <=> 1 <= x0 <= min (q4 - 1, 67)
2362 // Also: 1 <= q4 - x0 <= p34 -1 => 1 <= q4 - x0 <= 33 (so the result of
2363 // the rounding fits in 128 bits!)
2364 // x0 = delta + q4 - p34 (calculated before reaching case2_repeat)
2365 // because q3 + q4 - x0 <= P => x0 >= q3 + q4 - p34
2366 if (x0 == 0) { // this could happen only if we return to case2_repeat, or
2367 // for Case (3) or Case (6)
2368 R128.w[1] = C4.w[1];
2369 R128.w[0] = C4.w[0];
2370 } else if (q4 <= 18) {
2371 // 2 <= q4 <= 18, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 17
2372 round64_2_18 (q4, x0, C4.w[0], &R64, &incr_exp,
2373 &is_midpoint_lt_even, &is_midpoint_gt_even,
2374 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
2375 if (incr_exp) {
2376 // R64 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 17
2377 R64 = ten2k64[q4 - x0];
2378 }
2379 R128.w[1] = 0;
2380 R128.w[0] = R64;
2381 } else if (q4 <= 38) {
2382 // 19 <= q4 <= 38, max(1, q3+q4-p34) <= x0 <= q4 - 1, 1 <= x0 <= 37
2383 P128.w[1] = C4.w[1];
2384 P128.w[0] = C4.w[0];
2385 round128_19_38 (q4, x0, P128, &R128, &incr_exp,
2386 &is_midpoint_lt_even, &is_midpoint_gt_even,
2387 &is_inexact_lt_midpoint,
2388 &is_inexact_gt_midpoint);
2389 if (incr_exp) {
2390 // R128 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 1, 1 <= q4 - x0 <= 37
2391 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2392 R128.w[0] = ten2k64[q4 - x0];
2393 // R128.w[1] stays 0
2394 } else { // 20 <= q4 - x0 <= 37
2395 R128.w[0] = ten2k128[q4 - x0 - 20].w[0];
2396 R128.w[1] = ten2k128[q4 - x0 - 20].w[1];
2397 }
2398 }
2399 } else if (q4 <= 57) {
2400 // 38 <= q4 <= 57, max(1, q3+q4-p34) <= x0 <= q4 - 1, 5 <= x0 <= 56
2401 P192.w[2] = C4.w[2];
2402 P192.w[1] = C4.w[1];
2403 P192.w[0] = C4.w[0];
2404 round192_39_57 (q4, x0, P192, &R192, &incr_exp,
2405 &is_midpoint_lt_even, &is_midpoint_gt_even,
2406 &is_inexact_lt_midpoint,
2407 &is_inexact_gt_midpoint);
2408 // R192.w[2] is always 0
2409 if (incr_exp) {
2410 // R192 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 5, 1 <= q4 - x0 <= 52
2411 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2412 R192.w[0] = ten2k64[q4 - x0];
2413 // R192.w[1] stays 0
2414 // R192.w[2] stays 0
2415 } else { // 20 <= q4 - x0 <= 33
2416 R192.w[0] = ten2k128[q4 - x0 - 20].w[0];
2417 R192.w[1] = ten2k128[q4 - x0 - 20].w[1];
2418 // R192.w[2] stays 0
2419 }
2420 }
2421 R128.w[1] = R192.w[1];
2422 R128.w[0] = R192.w[0];
2423 } else {
2424 // 58 <= q4 <= 68, max(1, q3+q4-p34) <= x0 <= q4 - 1, 25 <= x0 <= 67
2425 round256_58_76 (q4, x0, C4, &R256, &incr_exp,
2426 &is_midpoint_lt_even, &is_midpoint_gt_even,
2427 &is_inexact_lt_midpoint,
2428 &is_inexact_gt_midpoint);
2429 // R256.w[3] and R256.w[2] are always 0
2430 if (incr_exp) {
2431 // R256 = 10^(q4-x0), 1 <= q4 - x0 <= q4 - 25, 1 <= q4 - x0 <= 43
2432 if (q4 - x0 <= 19) { // 1 <= q4 - x0 <= 19
2433 R256.w[0] = ten2k64[q4 - x0];
2434 // R256.w[1] stays 0
2435 // R256.w[2] stays 0
2436 // R256.w[3] stays 0
2437 } else { // 20 <= q4 - x0 <= 33
2438 R256.w[0] = ten2k128[q4 - x0 - 20].w[0];
2439 R256.w[1] = ten2k128[q4 - x0 - 20].w[1];
2440 // R256.w[2] stays 0
2441 // R256.w[3] stays 0
2442 }
2443 }
2444 R128.w[1] = R256.w[1];
2445 R128.w[0] = R256.w[0];
2446 }
2447 // now add C3 * 10^scale in res and the signed top (q4-x0) digits of C4,
2448 // rounded to nearest, which were copied into R128
2449 if (z_sign == p_sign) {
2450 lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale
2451 // the sum can result in [up to] p34 or p34 + 1 digits
2452 res.w[0] = res.w[0] + R128.w[0];
2453 res.w[1] = res.w[1] + R128.w[1];
2454 if (res.w[0] < R128.w[0])
2455 res.w[1]++; // carry
2456 // if res > 10^34 - 1 need to increase x0 and decrease scale by 1
2457 if (res.w[1] > 0x0001ed09bead87c0ull ||
2458 (res.w[1] == 0x0001ed09bead87c0ull &&
2459 res.w[0] > 0x378d8e63ffffffffull)) {
2460 // avoid double rounding error
2461 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
2462 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
2463 is_midpoint_lt_even0 = is_midpoint_lt_even;
2464 is_midpoint_gt_even0 = is_midpoint_gt_even;
2465 is_inexact_lt_midpoint = 0;
2466 is_inexact_gt_midpoint = 0;
2467 is_midpoint_lt_even = 0;
2468 is_midpoint_gt_even = 0;
2469 P128.w[1] = res.w[1];
2470 P128.w[0] = res.w[0];
2471 round128_19_38 (35, 1, P128, &res, &incr_exp,
2472 &is_midpoint_lt_even, &is_midpoint_gt_even,
2473 &is_inexact_lt_midpoint,
2474 &is_inexact_gt_midpoint);
2475 // incr_exp is 0 with certainty in this case
2476 // avoid a double rounding error
2477 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
2478 is_midpoint_lt_even) { // double rounding error upward
2479 // res = res - 1
2480 res.w[0]--;
2481 if (res.w[0] == 0xffffffffffffffffull)
2482 res.w[1]--;
2483 // Note: a double rounding error upward is not possible; for this
2484 // the result after the first rounding would have to be 99...95
2485 // (35 digits in all), possibly followed by a number of zeros; this
2486 // not possible in Cases (2)-(6) or (15)-(17) which may get here
2487 is_midpoint_lt_even = 0;
2488 is_inexact_lt_midpoint = 1;
2489 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
2490 is_midpoint_gt_even) { // double rounding error downward
2491 // res = res + 1
2492 res.w[0]++;
2493 if (res.w[0] == 0)
2494 res.w[1]++;
2495 is_midpoint_gt_even = 0;
2496 is_inexact_gt_midpoint = 1;
2497 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2498 !is_inexact_lt_midpoint
2499 && !is_inexact_gt_midpoint) {
2500 // if this second rounding was exact the result may still be
2501 // inexact because of the first rounding
2502 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
2503 is_inexact_gt_midpoint = 1;
2504 }
2505 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
2506 is_inexact_lt_midpoint = 1;
2507 }
2508 } else if (is_midpoint_gt_even &&
2509 (is_inexact_gt_midpoint0
2510 || is_midpoint_lt_even0)) {
2511 // pulled up to a midpoint
2512 is_inexact_lt_midpoint = 1;
2513 is_inexact_gt_midpoint = 0;
2514 is_midpoint_lt_even = 0;
2515 is_midpoint_gt_even = 0;
2516 } else if (is_midpoint_lt_even &&
2517 (is_inexact_lt_midpoint0
2518 || is_midpoint_gt_even0)) {
2519 // pulled down to a midpoint
2520 is_inexact_lt_midpoint = 0;
2521 is_inexact_gt_midpoint = 1;
2522 is_midpoint_lt_even = 0;
2523 is_midpoint_gt_even = 0;
2524 } else {
2525 ;
2526 }
2527 // adjust exponent
2528 e3 = e3 + 1;
2529 if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2530 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2531 if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
2532 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
2533 is_inexact_lt_midpoint = 1;
2534 }
2535 }
2536 } else {
2537 // this is the result rounded with unbounded exponent, unless a
2538 // correction is needed
2539 res.w[1] = res.w[1] & MASK_COEFF;
2540 if (lsb == 1) {
2541 if (is_midpoint_gt_even) {
2542 // res = res + 1
2543 is_midpoint_gt_even = 0;
2544 is_midpoint_lt_even = 1;
2545 res.w[0]++;
2546 if (res.w[0] == 0x0)
2547 res.w[1]++;
2548 // check for rounding overflow
2549 if (res.w[1] == 0x0001ed09bead87c0ull &&
2550 res.w[0] == 0x378d8e6400000000ull) {
2551 // res = 10^34 => rounding overflow
2552 res.w[1] = 0x0000314dc6448d93ull;
2553 res.w[0] = 0x38c15b0a00000000ull; // 10^33
2554 e3++;
2555 }
2556 } else if (is_midpoint_lt_even) {
2557 // res = res - 1
2558 is_midpoint_lt_even = 0;
2559 is_midpoint_gt_even = 1;
2560 res.w[0]--;
2561 if (res.w[0] == 0xffffffffffffffffull)
2562 res.w[1]--;
2563 // if the result is pure zero, the sign depends on the rounding
2564 // mode (x*y and z had opposite signs)
2565 if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
2566 if (rnd_mode != ROUNDING_DOWN)
2567 z_sign = 0x0000000000000000ull;
2568 else
2569 z_sign = 0x8000000000000000ull;
2570 // the exponent is max (e3, expmin)
2571 res.w[1] = 0x0;
2572 res.w[0] = 0x0;
2573 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2574 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2575 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2576 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2577 BID_SWAP128 (res);
2578 BID_RETURN (res)
2579 }
2580 } else {
2581 ;
2582 }
2583 }
2584 }
2585 } else { // if (z_sign != p_sign)
2586 lsb = res.w[0] & 0x01; // lsb of C3 * 10^scale; R128 contains rounded C4
2587 // used to swap rounding indicators if p_sign != z_sign
2588 // the sum can result in [up to] p34 or p34 - 1 digits
2589 tmp64 = res.w[0];
2590 res.w[0] = res.w[0] - R128.w[0];
2591 res.w[1] = res.w[1] - R128.w[1];
2592 if (res.w[0] > tmp64)
2593 res.w[1]--; // borrow
2594 // if res < 10^33 and exp > expmin need to decrease x0 and
2595 // increase scale by 1
2596 if (e3 > expmin && ((res.w[1] < 0x0000314dc6448d93ull ||
2597 (res.w[1] == 0x0000314dc6448d93ull &&
2598 res.w[0] < 0x38c15b0a00000000ull)) ||
2599 (is_inexact_lt_midpoint
2600 && res.w[1] == 0x0000314dc6448d93ull
2601 && res.w[0] == 0x38c15b0a00000000ull))
2602 && x0 >= 1) {
2603 x0 = x0 - 1;
2604 // first restore e3, otherwise it will be too small
2605 e3 = e3 + scale;
2606 scale = scale + 1;
2607 is_inexact_lt_midpoint = 0;
2608 is_inexact_gt_midpoint = 0;
2609 is_midpoint_lt_even = 0;
2610 is_midpoint_gt_even = 0;
2611 incr_exp = 0;
2612 goto case2_repeat;
2613 }
2614 // else this is the result rounded with unbounded exponent;
2615 // because the result has opposite sign to that of C4 which was
2616 // rounded, need to change the rounding indicators
2617 if (is_inexact_lt_midpoint) {
2618 is_inexact_lt_midpoint = 0;
2619 is_inexact_gt_midpoint = 1;
2620 } else if (is_inexact_gt_midpoint) {
2621 is_inexact_gt_midpoint = 0;
2622 is_inexact_lt_midpoint = 1;
2623 } else if (lsb == 0) {
2624 if (is_midpoint_lt_even) {
2625 is_midpoint_lt_even = 0;
2626 is_midpoint_gt_even = 1;
2627 } else if (is_midpoint_gt_even) {
2628 is_midpoint_gt_even = 0;
2629 is_midpoint_lt_even = 1;
2630 } else {
2631 ;
2632 }
2633 } else if (lsb == 1) {
2634 if (is_midpoint_lt_even) {
2635 // res = res + 1
2636 res.w[0]++;
2637 if (res.w[0] == 0x0)
2638 res.w[1]++;
2639 // check for rounding overflow
2640 if (res.w[1] == 0x0001ed09bead87c0ull &&
2641 res.w[0] == 0x378d8e6400000000ull) {
2642 // res = 10^34 => rounding overflow
2643 res.w[1] = 0x0000314dc6448d93ull;
2644 res.w[0] = 0x38c15b0a00000000ull; // 10^33
2645 e3++;
2646 }
2647 } else if (is_midpoint_gt_even) {
2648 // res = res - 1
2649 res.w[0]--;
2650 if (res.w[0] == 0xffffffffffffffffull)
2651 res.w[1]--;
2652 // if the result is pure zero, the sign depends on the rounding
2653 // mode (x*y and z had opposite signs)
2654 if (res.w[1] == 0x0ull && res.w[0] == 0x0ull) {
2655 if (rnd_mode != ROUNDING_DOWN)
2656 z_sign = 0x0000000000000000ull;
2657 else
2658 z_sign = 0x8000000000000000ull;
2659 // the exponent is max (e3, expmin)
2660 res.w[1] = 0x0;
2661 res.w[0] = 0x0;
2662 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2663 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2664 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2665 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2666 BID_SWAP128 (res);
2667 BID_RETURN (res)
2668 }
2669 } else {
2670 ;
2671 }
2672 } else {
2673 ;
2674 }
2675 }
2676 // check for underflow
2677 if (e3 == expmin) { // and if significand < 10^33 => result is tiny
2678 if ((res.w[1] & MASK_COEFF) < 0x0000314dc6448d93ull ||
2679 ((res.w[1] & MASK_COEFF) == 0x0000314dc6448d93ull &&
2680 res.w[0] < 0x38c15b0a00000000ull)) {
2681 is_tiny = 1;
2682 }
2683 } else if (e3 < expmin) {
2684 // the result is tiny, so we must truncate more of res
2685 is_tiny = 1;
2686 x0 = expmin - e3;
2687 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
2688 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
2689 is_midpoint_lt_even0 = is_midpoint_lt_even;
2690 is_midpoint_gt_even0 = is_midpoint_gt_even;
2691 is_inexact_lt_midpoint = 0;
2692 is_inexact_gt_midpoint = 0;
2693 is_midpoint_lt_even = 0;
2694 is_midpoint_gt_even = 0;
2695 // determine the number of decimal digits in res
2696 if (res.w[1] == 0x0) {
2697 // between 1 and 19 digits
2698 for (ind = 1; ind <= 19; ind++) {
2699 if (res.w[0] < ten2k64[ind]) {
2700 break;
2701 }
2702 }
2703 // ind digits
2704 } else if (res.w[1] < ten2k128[0].w[1] ||
2705 (res.w[1] == ten2k128[0].w[1]
2706 && res.w[0] < ten2k128[0].w[0])) {
2707 // 20 digits
2708 ind = 20;
2709 } else { // between 21 and 38 digits
2710 for (ind = 1; ind <= 18; ind++) {
2711 if (res.w[1] < ten2k128[ind].w[1] ||
2712 (res.w[1] == ten2k128[ind].w[1] &&
2713 res.w[0] < ten2k128[ind].w[0])) {
2714 break;
2715 }
2716 }
2717 // ind + 20 digits
2718 ind = ind + 20;
2719 }
2720
2721 // at this point ind >= x0; because delta >= 2 on this path, the case
2722 // ind = x0 can occur only in Case (2) or case (3), when C3 has one
2723 // digit (q3 = 1) equal to 1 (C3 = 1), e3 is expmin (e3 = expmin),
2724 // the signs of x * y and z are opposite, and through cancellation
2725 // the most significant decimal digit in res has the weight
2726 // 10^(emin-1); however, it is clear that in this case the most
2727 // significant digit is 9, so the result before rounding is
2728 // 0.9... * 10^emin
2729 // Otherwise, ind > x0 because there are non-zero decimal digits in the
2730 // result with weight of at least 10^emin, and correction for underflow
2731 // can be carried out using the round*_*_2_* () routines
2732 if (x0 == ind) { // the result before rounding is 0.9... * 10^emin
2733 res.w[1] = 0x0;
2734 res.w[0] = 0x1;
2735 is_inexact_gt_midpoint = 1;
2736 } else if (ind <= 18) { // check that 2 <= ind
2737 // 2 <= ind <= 18, 1 <= x0 <= 17
2738 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
2739 &is_midpoint_lt_even, &is_midpoint_gt_even,
2740 &is_inexact_lt_midpoint,
2741 &is_inexact_gt_midpoint);
2742 if (incr_exp) {
2743 // R64 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 17
2744 R64 = ten2k64[ind - x0];
2745 }
2746 res.w[1] = 0;
2747 res.w[0] = R64;
2748 } else if (ind <= 38) {
2749 // 19 <= ind <= 38
2750 P128.w[1] = res.w[1];
2751 P128.w[0] = res.w[0];
2752 round128_19_38 (ind, x0, P128, &res, &incr_exp,
2753 &is_midpoint_lt_even, &is_midpoint_gt_even,
2754 &is_inexact_lt_midpoint,
2755 &is_inexact_gt_midpoint);
2756 if (incr_exp) {
2757 // R128 = 10^(ind-x0), 1 <= ind - x0 <= ind - 1, 1 <= ind - x0 <= 37
2758 if (ind - x0 <= 19) { // 1 <= ind - x0 <= 19
2759 res.w[0] = ten2k64[ind - x0];
2760 // res.w[1] stays 0
2761 } else { // 20 <= ind - x0 <= 37
2762 res.w[0] = ten2k128[ind - x0 - 20].w[0];
2763 res.w[1] = ten2k128[ind - x0 - 20].w[1];
2764 }
2765 }
2766 }
2767 // avoid a double rounding error
2768 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
2769 is_midpoint_lt_even) { // double rounding error upward
2770 // res = res - 1
2771 res.w[0]--;
2772 if (res.w[0] == 0xffffffffffffffffull)
2773 res.w[1]--;
2774 // Note: a double rounding error upward is not possible; for this
2775 // the result after the first rounding would have to be 99...95
2776 // (35 digits in all), possibly followed by a number of zeros; this
2777 // not possible in Cases (2)-(6) which may get here
2778 is_midpoint_lt_even = 0;
2779 is_inexact_lt_midpoint = 1;
2780 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
2781 is_midpoint_gt_even) { // double rounding error downward
2782 // res = res + 1
2783 res.w[0]++;
2784 if (res.w[0] == 0)
2785 res.w[1]++;
2786 is_midpoint_gt_even = 0;
2787 is_inexact_gt_midpoint = 1;
2788 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2789 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2790 // if this second rounding was exact the result may still be
2791 // inexact because of the first rounding
2792 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
2793 is_inexact_gt_midpoint = 1;
2794 }
2795 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
2796 is_inexact_lt_midpoint = 1;
2797 }
2798 } else if (is_midpoint_gt_even &&
2799 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
2800 // pulled up to a midpoint
2801 is_inexact_lt_midpoint = 1;
2802 is_inexact_gt_midpoint = 0;
2803 is_midpoint_lt_even = 0;
2804 is_midpoint_gt_even = 0;
2805 } else if (is_midpoint_lt_even &&
2806 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
2807 // pulled down to a midpoint
2808 is_inexact_lt_midpoint = 0;
2809 is_inexact_gt_midpoint = 1;
2810 is_midpoint_lt_even = 0;
2811 is_midpoint_gt_even = 0;
2812 } else {
2813 ;
2814 }
2815 // adjust exponent
2816 e3 = e3 + x0;
2817 if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2818 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2819 if (is_midpoint_lt_even0 || is_midpoint_gt_even0 ||
2820 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0) {
2821 is_inexact_lt_midpoint = 1;
2822 }
2823 }
2824 } else {
2825 ; // not underflow
2826 }
2827 // check for inexact result
2828 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
2829 is_midpoint_lt_even || is_midpoint_gt_even) {
2830 // set the inexact flag
2831 *pfpsf |= INEXACT_EXCEPTION;
2832 if (is_tiny)
2833 *pfpsf |= UNDERFLOW_EXCEPTION;
2834 }
2835 // now check for significand = 10^34 (may have resulted from going
2836 // back to case2_repeat)
2837 if (res.w[1] == 0x0001ed09bead87c0ull &&
2838 res.w[0] == 0x378d8e6400000000ull) { // if res = 10^34
2839 res.w[1] = 0x0000314dc6448d93ull; // res = 10^33
2840 res.w[0] = 0x38c15b0a00000000ull;
2841 e3 = e3 + 1;
2842 }
2843 res.w[1] = z_sign | ((UINT64) (e3 + 6176) << 49) | res.w[1];
2844 // check for overflow
2845 if (rnd_mode == ROUNDING_TO_NEAREST && e3 > expmax) {
2846 res.w[1] = z_sign | 0x7800000000000000ull; // +/-inf
2847 res.w[0] = 0x0000000000000000ull;
2848 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
2849 }
2850 if (rnd_mode != ROUNDING_TO_NEAREST) {
2851 rounding_correction (rnd_mode,
2852 is_inexact_lt_midpoint,
2853 is_inexact_gt_midpoint,
2854 is_midpoint_lt_even, is_midpoint_gt_even,
2855 e3, &res, pfpsf);
2856 }
2857 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2858 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2859 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2860 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2861 BID_SWAP128 (res);
2862 BID_RETURN (res)
2863
2864 } else {
2865
2866 // we get here only if delta <= 1 in Cases (2), (3), (4), (5), or (6) and
2867 // the signs of x*y and z are opposite; in these cases massive
2868 // cancellation can occur, so it is better to scale either C3 or C4 and
2869 // to perform the subtraction before rounding; rounding is performed
2870 // next, depending on the number of decimal digits in the result and on
2871 // the exponent value
2872 // Note: overlow is not possible in this case
2873 // this is similar to Cases (15), (16), and (17)
2874
2875 if (delta + q4 < q3) { // from Case (6)
2876 // Case (6) with 0<= delta <= 1 is similar to Cases (15), (16), and
2877 // (17) if we swap (C3, C4), (q3, q4), (e3, e4), (z_sign, p_sign)
2878 // and call add_and_round; delta stays positive
2879 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
2880 P128.w[1] = C3.w[1];
2881 P128.w[0] = C3.w[0];
2882 C3.w[1] = C4.w[1];
2883 C3.w[0] = C4.w[0];
2884 C4.w[1] = P128.w[1];
2885 C4.w[0] = P128.w[0];
2886 ind = q3;
2887 q3 = q4;
2888 q4 = ind;
2889 ind = e3;
2890 e3 = e4;
2891 e4 = ind;
2892 tmp_sign = z_sign;
2893 z_sign = p_sign;
2894 p_sign = tmp_sign;
2895 } else { // from Cases (2), (3), (4), (5)
2896 // In Cases (2), (3), (4), (5) with 0 <= delta <= 1 C3 has to be
2897 // scaled up by q4 + delta - q3; this is the same as in Cases (15),
2898 // (16), and (17) if we just change the sign of delta
2899 delta = -delta;
2900 }
2901 add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
2902 rnd_mode, &is_midpoint_lt_even,
2903 &is_midpoint_gt_even, &is_inexact_lt_midpoint,
2904 &is_inexact_gt_midpoint, pfpsf, &res);
2905 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
2906 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
2907 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
2908 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
2909 BID_SWAP128 (res);
2910 BID_RETURN (res)
2911
2912 }
2913
2914 } else { // if delta < 0
2915
2916 delta = -delta;
2917
2918 if (p34 < q4 && q4 <= delta) { // Case (7)
2919
2920 // truncate C4 to p34 digits into res
2921 // x = q4-p34, 1 <= x <= 34 because 35 <= q4 <= 68
2922 x0 = q4 - p34;
2923 if (q4 <= 38) {
2924 P128.w[1] = C4.w[1];
2925 P128.w[0] = C4.w[0];
2926 round128_19_38 (q4, x0, P128, &res, &incr_exp,
2927 &is_midpoint_lt_even, &is_midpoint_gt_even,
2928 &is_inexact_lt_midpoint,
2929 &is_inexact_gt_midpoint);
2930 } else if (q4 <= 57) { // 35 <= q4 <= 57
2931 P192.w[2] = C4.w[2];
2932 P192.w[1] = C4.w[1];
2933 P192.w[0] = C4.w[0];
2934 round192_39_57 (q4, x0, P192, &R192, &incr_exp,
2935 &is_midpoint_lt_even, &is_midpoint_gt_even,
2936 &is_inexact_lt_midpoint,
2937 &is_inexact_gt_midpoint);
2938 res.w[0] = R192.w[0];
2939 res.w[1] = R192.w[1];
2940 } else { // if (q4 <= 68)
2941 round256_58_76 (q4, x0, C4, &R256, &incr_exp,
2942 &is_midpoint_lt_even, &is_midpoint_gt_even,
2943 &is_inexact_lt_midpoint,
2944 &is_inexact_gt_midpoint);
2945 res.w[0] = R256.w[0];
2946 res.w[1] = R256.w[1];
2947 }
2948 e4 = e4 + x0;
2949 if (incr_exp) {
2950 e4 = e4 + 1;
2951 }
2952 if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
2953 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
2954 // if C4 rounded to p34 digits is exact then the result is inexact,
2955 // in a way that depends on the signs of x * y and z
2956 if (p_sign == z_sign) {
2957 is_inexact_lt_midpoint = 1;
2958 } else { // if (p_sign != z_sign)
2959 if (res.w[1] != 0x0000314dc6448d93ull ||
2960 res.w[0] != 0x38c15b0a00000000ull) { // res != 10^33
2961 is_inexact_gt_midpoint = 1;
2962 } else { // res = 10^33 and exact is a special case
2963 // if C3 < 1/2 ulp then res = 10^33 and is_inexact_gt_midpoint = 1
2964 // if C3 = 1/2 ulp then res = 10^33 and is_midpoint_lt_even = 1
2965 // if C3 > 1/2 ulp then res = 10^34-1 and is_inexact_lt_midpoint = 1
2966 // Note: ulp is really ulp/10 (after borrow which propagates to msd)
2967 if (delta > p34 + 1) { // C3 < 1/2
2968 // res = 10^33, unchanged
2969 is_inexact_gt_midpoint = 1;
2970 } else { // if (delta == p34 + 1)
2971 if (q3 <= 19) {
2972 if (C3.w[0] < midpoint64[q3 - 1]) { // C3 < 1/2 ulp
2973 // res = 10^33, unchanged
2974 is_inexact_gt_midpoint = 1;
2975 } else if (C3.w[0] == midpoint64[q3 - 1]) { // C3 = 1/2 ulp
2976 // res = 10^33, unchanged
2977 is_midpoint_lt_even = 1;
2978 } else { // if (C3.w[0] > midpoint64[q3-1]), C3 > 1/2 ulp
2979 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
2980 res.w[0] = 0x378d8e63ffffffffull;
2981 e4 = e4 - 1;
2982 is_inexact_lt_midpoint = 1;
2983 }
2984 } else { // if (20 <= q3 <=34)
2985 if (C3.w[1] < midpoint128[q3 - 20].w[1] ||
2986 (C3.w[1] == midpoint128[q3 - 20].w[1] &&
2987 C3.w[0] < midpoint128[q3 - 20].w[0])) { // C3 < 1/2 ulp
2988 // res = 10^33, unchanged
2989 is_inexact_gt_midpoint = 1;
2990 } else if (C3.w[1] == midpoint128[q3 - 20].w[1] &&
2991 C3.w[0] == midpoint128[q3 - 20].w[0]) { // C3 = 1/2 ulp
2992 // res = 10^33, unchanged
2993 is_midpoint_lt_even = 1;
2994 } else { // if (C3 > midpoint128[q3-20]), C3 > 1/2 ulp
2995 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
2996 res.w[0] = 0x378d8e63ffffffffull;
2997 e4 = e4 - 1;
2998 is_inexact_lt_midpoint = 1;
2999 }
3000 }
3001 }
3002 }
3003 }
3004 } else if (is_midpoint_lt_even) {
3005 if (z_sign != p_sign) {
3006 // needs correction: res = res - 1
3007 res.w[0] = res.w[0] - 1;
3008 if (res.w[0] == 0xffffffffffffffffull)
3009 res.w[1]--;
3010 // if it is (10^33-1)*10^e4 then the corect result is
3011 // (10^34-1)*10(e4-1)
3012 if (res.w[1] == 0x0000314dc6448d93ull &&
3013 res.w[0] == 0x38c15b09ffffffffull) {
3014 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3015 res.w[0] = 0x378d8e63ffffffffull;
3016 e4 = e4 - 1;
3017 }
3018 is_midpoint_lt_even = 0;
3019 is_inexact_lt_midpoint = 1;
3020 } else { // if (z_sign == p_sign)
3021 is_midpoint_lt_even = 0;
3022 is_inexact_gt_midpoint = 1;
3023 }
3024 } else if (is_midpoint_gt_even) {
3025 if (z_sign == p_sign) {
3026 // needs correction: res = res + 1 (cannot cross in the next binade)
3027 res.w[0] = res.w[0] + 1;
3028 if (res.w[0] == 0x0000000000000000ull)
3029 res.w[1]++;
3030 is_midpoint_gt_even = 0;
3031 is_inexact_gt_midpoint = 1;
3032 } else { // if (z_sign != p_sign)
3033 is_midpoint_gt_even = 0;
3034 is_inexact_lt_midpoint = 1;
3035 }
3036 } else {
3037 ; // the rounded result is already correct
3038 }
3039 // check for overflow
3040 if (rnd_mode == ROUNDING_TO_NEAREST && e4 > expmax) {
3041 res.w[1] = p_sign | 0x7800000000000000ull;
3042 res.w[0] = 0x0000000000000000ull;
3043 *pfpsf |= (OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
3044 } else { // no overflow or not RN
3045 p_exp = ((UINT64) (e4 + 6176) << 49);
3046 res.w[1] = p_sign | (p_exp & MASK_EXP) | res.w[1];
3047 }
3048 if (rnd_mode != ROUNDING_TO_NEAREST) {
3049 rounding_correction (rnd_mode,
3050 is_inexact_lt_midpoint,
3051 is_inexact_gt_midpoint,
3052 is_midpoint_lt_even, is_midpoint_gt_even,
3053 e4, &res, pfpsf);
3054 }
3055 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
3056 is_midpoint_lt_even || is_midpoint_gt_even) {
3057 // set the inexact flag
3058 *pfpsf |= INEXACT_EXCEPTION;
3059 }
3060 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3061 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3062 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3063 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3064 BID_SWAP128 (res);
3065 BID_RETURN (res)
3066
3067 } else if ((q4 <= p34 && p34 <= delta) || // Case (8)
3068 (q4 <= delta && delta < p34 && p34 < delta + q3) || // Case (9)
3069 (q4 <= delta && delta + q3 <= p34) || // Case (10)
3070 (delta < q4 && q4 <= p34 && p34 < delta + q3) || // Case (13)
3071 (delta < q4 && q4 <= delta + q3 && delta + q3 <= p34) || // Case (14)
3072 (delta + q3 < q4 && q4 <= p34)) { // Case (18)
3073
3074 // Case (8) is similar to Case (1), with C3 and C4 swapped
3075 // Case (9) is similar to Case (2), with C3 and C4 swapped
3076 // Case (10) is similar to Case (3), with C3 and C4 swapped
3077 // Case (13) is similar to Case (4), with C3 and C4 swapped
3078 // Case (14) is similar to Case (5), with C3 and C4 swapped
3079 // Case (18) is similar to Case (6), with C3 and C4 swapped
3080
3081 // swap (C3, C4), (q3, q4), (e3, 34), (z_sign, p_sign), (z_exp, p_exp)
3082 // and go back to delta_ge_zero
3083 // C4.w[3] = 0 and C4.w[2] = 0, so swap just the low part of C4 with C3
3084 P128.w[1] = C3.w[1];
3085 P128.w[0] = C3.w[0];
3086 C3.w[1] = C4.w[1];
3087 C3.w[0] = C4.w[0];
3088 C4.w[1] = P128.w[1];
3089 C4.w[0] = P128.w[0];
3090 ind = q3;
3091 q3 = q4;
3092 q4 = ind;
3093 ind = e3;
3094 e3 = e4;
3095 e4 = ind;
3096 tmp_sign = z_sign;
3097 z_sign = p_sign;
3098 p_sign = tmp_sign;
3099 tmp.ui64 = z_exp;
3100 z_exp = p_exp;
3101 p_exp = tmp.ui64;
3102 goto delta_ge_zero;
3103
3104 } else if ((p34 <= delta && delta < q4 && q4 < delta + q3) || // Case (11)
3105 (delta < p34 && p34 < q4 && q4 < delta + q3)) { // Case (12)
3106
3107 // round C3 to nearest to q3 - x0 digits, where x0 = e4 - e3,
3108 // 1 <= x0 <= q3 - 1 <= p34 - 1
3109 x0 = e4 - e3; // or x0 = delta + q3 - q4
3110 if (q3 <= 18) { // 2 <= q3 <= 18
3111 round64_2_18 (q3, x0, C3.w[0], &R64, &incr_exp,
3112 &is_midpoint_lt_even, &is_midpoint_gt_even,
3113 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
3114 // C3.w[1] = 0;
3115 C3.w[0] = R64;
3116 } else if (q3 <= 38) {
3117 round128_19_38 (q3, x0, C3, &R128, &incr_exp,
3118 &is_midpoint_lt_even, &is_midpoint_gt_even,
3119 &is_inexact_lt_midpoint,
3120 &is_inexact_gt_midpoint);
3121 C3.w[1] = R128.w[1];
3122 C3.w[0] = R128.w[0];
3123 }
3124 // the rounded result has q3 - x0 digits
3125 // we want the exponent to be e4, so if incr_exp = 1 then
3126 // multiply the rounded result by 10 - it will still fit in 113 bits
3127 if (incr_exp) {
3128 // 64 x 128 -> 128
3129 P128.w[1] = C3.w[1];
3130 P128.w[0] = C3.w[0];
3131 __mul_64x128_to_128 (C3, ten2k64[1], P128);
3132 }
3133 e3 = e3 + x0; // this is e4
3134 // now add/subtract the 256-bit C4 and the new (and shorter) 128-bit C3;
3135 // the result will have the sign of x * y; the exponent is e4
3136 R256.w[3] = 0;
3137 R256.w[2] = 0;
3138 R256.w[1] = C3.w[1];
3139 R256.w[0] = C3.w[0];
3140 if (p_sign == z_sign) { // R256 = C4 + R256
3141 add256 (C4, R256, &R256);
3142 } else { // if (p_sign != z_sign) { // R256 = C4 - R256
3143 sub256 (C4, R256, &R256); // the result cannot be pure zero
3144 // because the result has opposite sign to that of R256 which was
3145 // rounded, need to change the rounding indicators
3146 lsb = C4.w[0] & 0x01;
3147 if (is_inexact_lt_midpoint) {
3148 is_inexact_lt_midpoint = 0;
3149 is_inexact_gt_midpoint = 1;
3150 } else if (is_inexact_gt_midpoint) {
3151 is_inexact_gt_midpoint = 0;
3152 is_inexact_lt_midpoint = 1;
3153 } else if (lsb == 0) {
3154 if (is_midpoint_lt_even) {
3155 is_midpoint_lt_even = 0;
3156 is_midpoint_gt_even = 1;
3157 } else if (is_midpoint_gt_even) {
3158 is_midpoint_gt_even = 0;
3159 is_midpoint_lt_even = 1;
3160 } else {
3161 ;
3162 }
3163 } else if (lsb == 1) {
3164 if (is_midpoint_lt_even) {
3165 // res = res + 1
3166 R256.w[0]++;
3167 if (R256.w[0] == 0x0) {
3168 R256.w[1]++;
3169 if (R256.w[1] == 0x0) {
3170 R256.w[2]++;
3171 if (R256.w[2] == 0x0) {
3172 R256.w[3]++;
3173 }
3174 }
3175 }
3176 // no check for rounding overflow - R256 was a difference
3177 } else if (is_midpoint_gt_even) {
3178 // res = res - 1
3179 R256.w[0]--;
3180 if (R256.w[0] == 0xffffffffffffffffull) {
3181 R256.w[1]--;
3182 if (R256.w[1] == 0xffffffffffffffffull) {
3183 R256.w[2]--;
3184 if (R256.w[2] == 0xffffffffffffffffull) {
3185 R256.w[3]--;
3186 }
3187 }
3188 }
3189 } else {
3190 ;
3191 }
3192 } else {
3193 ;
3194 }
3195 }
3196 // determine the number of decimal digits in R256
3197 ind = nr_digits256 (R256); // ind >= p34
3198 // if R256 is sum, then ind > p34; if R256 is a difference, then
3199 // ind >= p34; this means that we can calculate the result rounded to
3200 // the destination precision, with unbounded exponent, starting from R256
3201 // and using the indicators from the rounding of C3 to avoid a double
3202 // rounding error
3203
3204 if (ind < p34) {
3205 ;
3206 } else if (ind == p34) {
3207 // the result rounded to the destination precision with
3208 // unbounded exponent
3209 // is (-1)^p_sign * R256 * 10^e4
3210 res.w[1] = R256.w[1];
3211 res.w[0] = R256.w[0];
3212 } else { // if (ind > p34)
3213 // if more than P digits, round to nearest to P digits
3214 // round R256 to p34 digits
3215 x0 = ind - p34; // 1 <= x0 <= 34 as 35 <= ind <= 68
3216 // save C3 rounding indicators to help avoid double rounding error
3217 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
3218 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
3219 is_midpoint_lt_even0 = is_midpoint_lt_even;
3220 is_midpoint_gt_even0 = is_midpoint_gt_even;
3221 // initialize rounding indicators
3222 is_inexact_lt_midpoint = 0;
3223 is_inexact_gt_midpoint = 0;
3224 is_midpoint_lt_even = 0;
3225 is_midpoint_gt_even = 0;
3226 // round to p34 digits; the result fits in 113 bits
3227 if (ind <= 38) {
3228 P128.w[1] = R256.w[1];
3229 P128.w[0] = R256.w[0];
3230 round128_19_38 (ind, x0, P128, &R128, &incr_exp,
3231 &is_midpoint_lt_even, &is_midpoint_gt_even,
3232 &is_inexact_lt_midpoint,
3233 &is_inexact_gt_midpoint);
3234 } else if (ind <= 57) {
3235 P192.w[2] = R256.w[2];
3236 P192.w[1] = R256.w[1];
3237 P192.w[0] = R256.w[0];
3238 round192_39_57 (ind, x0, P192, &R192, &incr_exp,
3239 &is_midpoint_lt_even, &is_midpoint_gt_even,
3240 &is_inexact_lt_midpoint,
3241 &is_inexact_gt_midpoint);
3242 R128.w[1] = R192.w[1];
3243 R128.w[0] = R192.w[0];
3244 } else { // if (ind <= 68)
3245 round256_58_76 (ind, x0, R256, &R256, &incr_exp,
3246 &is_midpoint_lt_even, &is_midpoint_gt_even,
3247 &is_inexact_lt_midpoint,
3248 &is_inexact_gt_midpoint);
3249 R128.w[1] = R256.w[1];
3250 R128.w[0] = R256.w[0];
3251 }
3252 // the rounded result has p34 = 34 digits
3253 e4 = e4 + x0 + incr_exp;
3254
3255 res.w[1] = R128.w[1];
3256 res.w[0] = R128.w[0];
3257
3258 // avoid a double rounding error
3259 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
3260 is_midpoint_lt_even) { // double rounding error upward
3261 // res = res - 1
3262 res.w[0]--;
3263 if (res.w[0] == 0xffffffffffffffffull)
3264 res.w[1]--;
3265 is_midpoint_lt_even = 0;
3266 is_inexact_lt_midpoint = 1;
3267 // Note: a double rounding error upward is not possible; for this
3268 // the result after the first rounding would have to be 99...95
3269 // (35 digits in all), possibly followed by a number of zeros; this
3270 // not possible in Cases (2)-(6) or (15)-(17) which may get here
3271 // if this is 10^33 - 1 make it 10^34 - 1 and decrement exponent
3272 if (res.w[1] == 0x0000314dc6448d93ull &&
3273 res.w[0] == 0x38c15b09ffffffffull) { // 10^33 - 1
3274 res.w[1] = 0x0001ed09bead87c0ull; // 10^34 - 1
3275 res.w[0] = 0x378d8e63ffffffffull;
3276 e4--;
3277 }
3278 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
3279 is_midpoint_gt_even) { // double rounding error downward
3280 // res = res + 1
3281 res.w[0]++;
3282 if (res.w[0] == 0)
3283 res.w[1]++;
3284 is_midpoint_gt_even = 0;
3285 is_inexact_gt_midpoint = 1;
3286 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
3287 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
3288 // if this second rounding was exact the result may still be
3289 // inexact because of the first rounding
3290 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
3291 is_inexact_gt_midpoint = 1;
3292 }
3293 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
3294 is_inexact_lt_midpoint = 1;
3295 }
3296 } else if (is_midpoint_gt_even &&
3297 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
3298 // pulled up to a midpoint
3299 is_inexact_lt_midpoint = 1;
3300 is_inexact_gt_midpoint = 0;
3301 is_midpoint_lt_even = 0;
3302 is_midpoint_gt_even = 0;
3303 } else if (is_midpoint_lt_even &&
3304 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
3305 // pulled down to a midpoint
3306 is_inexact_lt_midpoint = 0;
3307 is_inexact_gt_midpoint = 1;
3308 is_midpoint_lt_even = 0;
3309 is_midpoint_gt_even = 0;
3310 } else {
3311 ;
3312 }
3313 }
3314
3315 // determine tininess
3316 if (rnd_mode == ROUNDING_TO_NEAREST) {
3317 if (e4 < expmin) {
3318 is_tiny = 1; // for other rounding modes apply correction
3319 }
3320 } else {
3321 // for RM, RP, RZ, RA apply correction in order to determine tininess
3322 // but do not save the result; apply the correction to
3323 // (-1)^p_sign * res * 10^0
3324 P128.w[1] = p_sign | 0x3040000000000000ull | res.w[1];
3325 P128.w[0] = res.w[0];
3326 rounding_correction (rnd_mode,
3327 is_inexact_lt_midpoint,
3328 is_inexact_gt_midpoint,
3329 is_midpoint_lt_even, is_midpoint_gt_even,
3330 0, &P128, pfpsf);
3331 scale = ((P128.w[1] & MASK_EXP) >> 49) - 6176; // -1, 0, or +1
3332 // the number of digits in the significand is p34 = 34
3333 if (e4 + scale < expmin) {
3334 is_tiny = 1;
3335 }
3336 }
3337
3338 // the result rounded to the destination precision with unbounded exponent
3339 // is (-1)^p_sign * res * 10^e4
3340 res.w[1] = p_sign | ((UINT64) (e4 + 6176) << 49) | res.w[1]; // RN
3341 // res.w[0] unchanged;
3342 // Note: res is correct only if expmin <= e4 <= expmax
3343 ind = p34; // the number of decimal digits in the signifcand of res
3344
3345 // at this point we have the result rounded with unbounded exponent in
3346 // res and we know its tininess:
3347 // res = (-1)^p_sign * significand * 10^e4,
3348 // where q (significand) = ind = p34
3349 // Note: res is correct only if expmin <= e4 <= expmax
3350
3351 // check for overflow if RN
3352 if (rnd_mode == ROUNDING_TO_NEAREST
3353 && (ind + e4) > (p34 + expmax)) {
3354 res.w[1] = p_sign | 0x7800000000000000ull;
3355 res.w[0] = 0x0000000000000000ull;
3356 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
3357 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3358 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3359 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3360 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3361 BID_SWAP128 (res);
3362 BID_RETURN (res)
3363 } // else not overflow or not RN, so continue
3364
3365 // from this point on this is similar to the last part of the computation
3366 // for Cases (15), (16), (17)
3367
3368 // if (e4 >= expmin) we have the result rounded with bounded exponent
3369 if (e4 < expmin) {
3370 x0 = expmin - e4; // x0 >= 1; the number of digits to chop off of res
3371 // where the result rounded [at most] once is
3372 // (-1)^p_sign * significand_res * 10^e4
3373
3374 // avoid double rounding error
3375 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
3376 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
3377 is_midpoint_lt_even0 = is_midpoint_lt_even;
3378 is_midpoint_gt_even0 = is_midpoint_gt_even;
3379 is_inexact_lt_midpoint = 0;
3380 is_inexact_gt_midpoint = 0;
3381 is_midpoint_lt_even = 0;
3382 is_midpoint_gt_even = 0;
3383
3384 if (x0 > ind) {
3385 // nothing is left of res when moving the decimal point left x0 digits
3386 is_inexact_lt_midpoint = 1;
3387 res.w[1] = p_sign | 0x0000000000000000ull;
3388 res.w[0] = 0x0000000000000000ull;
3389 e4 = expmin;
3390 } else if (x0 == ind) { // 1 <= x0 = ind <= p34 = 34
3391 // this is <, =, or > 1/2 ulp
3392 // compare the ind-digit value in the significand of res with
3393 // 1/2 ulp = 5*10^(ind-1), i.e. determine whether it is
3394 // less than, equal to, or greater than 1/2 ulp (significand of res)
3395 R128.w[1] = res.w[1] & MASK_COEFF;
3396 R128.w[0] = res.w[0];
3397 if (ind <= 19) {
3398 if (R128.w[0] < midpoint64[ind - 1]) { // < 1/2 ulp
3399 lt_half_ulp = 1;
3400 is_inexact_lt_midpoint = 1;
3401 } else if (R128.w[0] == midpoint64[ind - 1]) { // = 1/2 ulp
3402 eq_half_ulp = 1;
3403 is_midpoint_gt_even = 1;
3404 } else { // > 1/2 ulp
3405 gt_half_ulp = 1;
3406 is_inexact_gt_midpoint = 1;
3407 }
3408 } else { // if (ind <= 38)
3409 if (R128.w[1] < midpoint128[ind - 20].w[1] ||
3410 (R128.w[1] == midpoint128[ind - 20].w[1] &&
3411 R128.w[0] < midpoint128[ind - 20].w[0])) { // < 1/2 ulp
3412 lt_half_ulp = 1;
3413 is_inexact_lt_midpoint = 1;
3414 } else if (R128.w[1] == midpoint128[ind - 20].w[1] &&
3415 R128.w[0] == midpoint128[ind - 20].w[0]) { // = 1/2 ulp
3416 eq_half_ulp = 1;
3417 is_midpoint_gt_even = 1;
3418 } else { // > 1/2 ulp
3419 gt_half_ulp = 1;
3420 is_inexact_gt_midpoint = 1;
3421 }
3422 }
3423 if (lt_half_ulp || eq_half_ulp) {
3424 // res = +0.0 * 10^expmin
3425 res.w[1] = 0x0000000000000000ull;
3426 res.w[0] = 0x0000000000000000ull;
3427 } else { // if (gt_half_ulp)
3428 // res = +1 * 10^expmin
3429 res.w[1] = 0x0000000000000000ull;
3430 res.w[0] = 0x0000000000000001ull;
3431 }
3432 res.w[1] = p_sign | res.w[1];
3433 e4 = expmin;
3434 } else { // if (1 <= x0 <= ind - 1 <= 33)
3435 // round the ind-digit result to ind - x0 digits
3436
3437 if (ind <= 18) { // 2 <= ind <= 18
3438 round64_2_18 (ind, x0, res.w[0], &R64, &incr_exp,
3439 &is_midpoint_lt_even, &is_midpoint_gt_even,
3440 &is_inexact_lt_midpoint,
3441 &is_inexact_gt_midpoint);
3442 res.w[1] = 0x0;
3443 res.w[0] = R64;
3444 } else if (ind <= 38) {
3445 P128.w[1] = res.w[1] & MASK_COEFF;
3446 P128.w[0] = res.w[0];
3447 round128_19_38 (ind, x0, P128, &res, &incr_exp,
3448 &is_midpoint_lt_even, &is_midpoint_gt_even,
3449 &is_inexact_lt_midpoint,
3450 &is_inexact_gt_midpoint);
3451 }
3452 e4 = e4 + x0; // expmin
3453 // we want the exponent to be expmin, so if incr_exp = 1 then
3454 // multiply the rounded result by 10 - it will still fit in 113 bits
3455 if (incr_exp) {
3456 // 64 x 128 -> 128
3457 P128.w[1] = res.w[1] & MASK_COEFF;
3458 P128.w[0] = res.w[0];
3459 __mul_64x128_to_128 (res, ten2k64[1], P128);
3460 }
3461 res.w[1] =
3462 p_sign | ((UINT64) (e4 + 6176) << 49) | (res.
3463 w[1] & MASK_COEFF);
3464 // avoid a double rounding error
3465 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
3466 is_midpoint_lt_even) { // double rounding error upward
3467 // res = res - 1
3468 res.w[0]--;
3469 if (res.w[0] == 0xffffffffffffffffull)
3470 res.w[1]--;
3471 // Note: a double rounding error upward is not possible; for this
3472 // the result after the first rounding would have to be 99...95
3473 // (35 digits in all), possibly followed by a number of zeros; this
3474 // not possible in this underflow case
3475 is_midpoint_lt_even = 0;
3476 is_inexact_lt_midpoint = 1;
3477 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
3478 is_midpoint_gt_even) { // double rounding error downward
3479 // res = res + 1
3480 res.w[0]++;
3481 if (res.w[0] == 0)
3482 res.w[1]++;
3483 is_midpoint_gt_even = 0;
3484 is_inexact_gt_midpoint = 1;
3485 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
3486 !is_inexact_lt_midpoint
3487 && !is_inexact_gt_midpoint) {
3488 // if this second rounding was exact the result may still be
3489 // inexact because of the first rounding
3490 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
3491 is_inexact_gt_midpoint = 1;
3492 }
3493 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
3494 is_inexact_lt_midpoint = 1;
3495 }
3496 } else if (is_midpoint_gt_even &&
3497 (is_inexact_gt_midpoint0
3498 || is_midpoint_lt_even0)) {
3499 // pulled up to a midpoint
3500 is_inexact_lt_midpoint = 1;
3501 is_inexact_gt_midpoint = 0;
3502 is_midpoint_lt_even = 0;
3503 is_midpoint_gt_even = 0;
3504 } else if (is_midpoint_lt_even &&
3505 (is_inexact_lt_midpoint0
3506 || is_midpoint_gt_even0)) {
3507 // pulled down to a midpoint
3508 is_inexact_lt_midpoint = 0;
3509 is_inexact_gt_midpoint = 1;
3510 is_midpoint_lt_even = 0;
3511 is_midpoint_gt_even = 0;
3512 } else {
3513 ;
3514 }
3515 }
3516 }
3517 // res contains the correct result
3518 // apply correction if not rounding to nearest
3519 if (rnd_mode != ROUNDING_TO_NEAREST) {
3520 rounding_correction (rnd_mode,
3521 is_inexact_lt_midpoint,
3522 is_inexact_gt_midpoint,
3523 is_midpoint_lt_even, is_midpoint_gt_even,
3524 e4, &res, pfpsf);
3525 }
3526 if (is_midpoint_lt_even || is_midpoint_gt_even ||
3527 is_inexact_lt_midpoint || is_inexact_gt_midpoint) {
3528 // set the inexact flag
3529 *pfpsf |= INEXACT_EXCEPTION;
3530 if (is_tiny)
3531 *pfpsf |= UNDERFLOW_EXCEPTION;
3532 }
3533 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3534 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3535 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3536 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3537 BID_SWAP128 (res);
3538 BID_RETURN (res)
3539
3540 } else if ((p34 <= delta && delta + q3 <= q4) || // Case (15)
3541 (delta < p34 && p34 < delta + q3 && delta + q3 <= q4) || //Case (16)
3542 (delta + q3 <= p34 && p34 < q4)) { // Case (17)
3543
3544 // calculate first the result rounded to the destination precision, with
3545 // unbounded exponent
3546
3547 add_and_round (q3, q4, e4, delta, p34, z_sign, p_sign, C3, C4,
3548 rnd_mode, &is_midpoint_lt_even,
3549 &is_midpoint_gt_even, &is_inexact_lt_midpoint,
3550 &is_inexact_gt_midpoint, pfpsf, &res);
3551 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3552 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3553 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3554 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3555 BID_SWAP128 (res);
3556 BID_RETURN (res)
3557
3558 } else {
3559 ;
3560 }
3561
3562 } // end if delta < 0
3563
3564 *ptr_is_midpoint_lt_even = is_midpoint_lt_even;
3565 *ptr_is_midpoint_gt_even = is_midpoint_gt_even;
3566 *ptr_is_inexact_lt_midpoint = is_inexact_lt_midpoint;
3567 *ptr_is_inexact_gt_midpoint = is_inexact_gt_midpoint;
3568 BID_SWAP128 (res);
3569 BID_RETURN (res)
3570
3571 }
3572
3573
3574 #if DECIMAL_CALL_BY_REFERENCE
3575 void
3576 bid128_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
3577 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3578 _EXC_INFO_PARAM) {
3579 UINT128 x = *px, y = *py, z = *pz;
3580 #if !DECIMAL_GLOBAL_ROUNDING
3581 unsigned int rnd_mode = *prnd_mode;
3582 #endif
3583 #else
3584 UINT128
3585 bid128_fma (UINT128 x, UINT128 y, UINT128 z
3586 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3587 _EXC_INFO_PARAM) {
3588 #endif
3589 int is_midpoint_lt_even, is_midpoint_gt_even,
3590 is_inexact_lt_midpoint, is_inexact_gt_midpoint;
3591 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3592
3593 #if DECIMAL_CALL_BY_REFERENCE
3594 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3595 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3596 &res, &x, &y, &z
3597 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3598 _EXC_INFO_ARG);
3599 #else
3600 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3601 &is_inexact_lt_midpoint,
3602 &is_inexact_gt_midpoint, x, y,
3603 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3604 _EXC_INFO_ARG);
3605 #endif
3606 BID_RETURN (res);
3607 }
3608
3609
3610 #if DECIMAL_CALL_BY_REFERENCE
3611 void
3612 bid128ddd_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT64 * pz
3613 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3614 _EXC_INFO_PARAM) {
3615 UINT64 x = *px, y = *py, z = *pz;
3616 #if !DECIMAL_GLOBAL_ROUNDING
3617 unsigned int rnd_mode = *prnd_mode;
3618 #endif
3619 #else
3620 UINT128
3621 bid128ddd_fma (UINT64 x, UINT64 y, UINT64 z
3622 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3623 _EXC_INFO_PARAM) {
3624 #endif
3625 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3626 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3627 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3628 UINT128 x1, y1, z1;
3629
3630 #if DECIMAL_CALL_BY_REFERENCE
3631 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3632 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3633 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3634 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3635 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3636 &res, &x1, &y1, &z1
3637 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3638 _EXC_INFO_ARG);
3639 #else
3640 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3641 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3642 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3643 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3644 &is_inexact_lt_midpoint,
3645 &is_inexact_gt_midpoint, x1, y1,
3646 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3647 _EXC_INFO_ARG);
3648 #endif
3649 BID_RETURN (res);
3650 }
3651
3652
3653 #if DECIMAL_CALL_BY_REFERENCE
3654 void
3655 bid128ddq_fma (UINT128 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
3656 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3657 _EXC_INFO_PARAM) {
3658 UINT64 x = *px, y = *py;
3659 UINT128 z = *pz;
3660 #if !DECIMAL_GLOBAL_ROUNDING
3661 unsigned int rnd_mode = *prnd_mode;
3662 #endif
3663 #else
3664 UINT128
3665 bid128ddq_fma (UINT64 x, UINT64 y, UINT128 z
3666 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3667 _EXC_INFO_PARAM) {
3668 #endif
3669 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3670 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3671 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3672 UINT128 x1, y1;
3673
3674 #if DECIMAL_CALL_BY_REFERENCE
3675 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3676 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3677 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3678 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3679 &res, &x1, &y1, &z
3680 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3681 _EXC_INFO_ARG);
3682 #else
3683 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3684 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3685 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3686 &is_inexact_lt_midpoint,
3687 &is_inexact_gt_midpoint, x1, y1,
3688 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3689 _EXC_INFO_ARG);
3690 #endif
3691 BID_RETURN (res);
3692 }
3693
3694
3695 #if DECIMAL_CALL_BY_REFERENCE
3696 void
3697 bid128dqd_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
3698 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3699 _EXC_INFO_PARAM) {
3700 UINT64 x = *px, z = *pz;
3701 #if !DECIMAL_GLOBAL_ROUNDING
3702 unsigned int rnd_mode = *prnd_mode;
3703 #endif
3704 #else
3705 UINT128
3706 bid128dqd_fma (UINT64 x, UINT128 y, UINT64 z
3707 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3708 _EXC_INFO_PARAM) {
3709 #endif
3710 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3711 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3712 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3713 UINT128 x1, z1;
3714
3715 #if DECIMAL_CALL_BY_REFERENCE
3716 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3717 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3718 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3719 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3720 &res, &x1, py, &z1
3721 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3722 _EXC_INFO_ARG);
3723 #else
3724 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3725 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3726 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3727 &is_inexact_lt_midpoint,
3728 &is_inexact_gt_midpoint, x1, y,
3729 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3730 _EXC_INFO_ARG);
3731 #endif
3732 BID_RETURN (res);
3733 }
3734
3735
3736 #if DECIMAL_CALL_BY_REFERENCE
3737 void
3738 bid128dqq_fma (UINT128 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
3739 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3740 _EXC_INFO_PARAM) {
3741 UINT64 x = *px;
3742 #if !DECIMAL_GLOBAL_ROUNDING
3743 unsigned int rnd_mode = *prnd_mode;
3744 #endif
3745 #else
3746 UINT128
3747 bid128dqq_fma (UINT64 x, UINT128 y, UINT128 z
3748 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3749 _EXC_INFO_PARAM) {
3750 #endif
3751 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3752 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3753 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3754 UINT128 x1;
3755
3756 #if DECIMAL_CALL_BY_REFERENCE
3757 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3758 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3759 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3760 &res, &x1, py, pz
3761 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3762 _EXC_INFO_ARG);
3763 #else
3764 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3765 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3766 &is_inexact_lt_midpoint,
3767 &is_inexact_gt_midpoint, x1, y,
3768 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3769 _EXC_INFO_ARG);
3770 #endif
3771 BID_RETURN (res);
3772 }
3773
3774
3775 #if DECIMAL_CALL_BY_REFERENCE
3776 void
3777 bid128qdd_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
3778 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3779 _EXC_INFO_PARAM) {
3780 UINT64 y = *py, z = *pz;
3781 #if !DECIMAL_GLOBAL_ROUNDING
3782 unsigned int rnd_mode = *prnd_mode;
3783 #endif
3784 #else
3785 UINT128
3786 bid128qdd_fma (UINT128 x, UINT64 y, UINT64 z
3787 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3788 _EXC_INFO_PARAM) {
3789 #endif
3790 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3791 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3792 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3793 UINT128 y1, z1;
3794
3795 #if DECIMAL_CALL_BY_REFERENCE
3796 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3797 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3798 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3799 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3800 &res, px, &y1, &z1
3801 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3802 _EXC_INFO_ARG);
3803 #else
3804 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3805 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3806 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3807 &is_inexact_lt_midpoint,
3808 &is_inexact_gt_midpoint, x, y1,
3809 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3810 _EXC_INFO_ARG);
3811 #endif
3812 BID_RETURN (res);
3813 }
3814
3815
3816 #if DECIMAL_CALL_BY_REFERENCE
3817 void
3818 bid128qdq_fma (UINT128 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
3819 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3820 _EXC_INFO_PARAM) {
3821 UINT64 y = *py;
3822 #if !DECIMAL_GLOBAL_ROUNDING
3823 unsigned int rnd_mode = *prnd_mode;
3824 #endif
3825 #else
3826 UINT128
3827 bid128qdq_fma (UINT128 x, UINT64 y, UINT128 z
3828 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3829 _EXC_INFO_PARAM) {
3830 #endif
3831 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3832 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3833 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3834 UINT128 y1;
3835
3836 #if DECIMAL_CALL_BY_REFERENCE
3837 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3838 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3839 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3840 &res, px, &y1, pz
3841 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3842 _EXC_INFO_ARG);
3843 #else
3844 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3845 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3846 &is_inexact_lt_midpoint,
3847 &is_inexact_gt_midpoint, x, y1,
3848 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3849 _EXC_INFO_ARG);
3850 #endif
3851 BID_RETURN (res);
3852 }
3853
3854
3855 #if DECIMAL_CALL_BY_REFERENCE
3856 void
3857 bid128qqd_fma (UINT128 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
3858 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3859 _EXC_INFO_PARAM) {
3860 UINT64 z = *pz;
3861 #if !DECIMAL_GLOBAL_ROUNDING
3862 unsigned int rnd_mode = *prnd_mode;
3863 #endif
3864 #else
3865 UINT128
3866 bid128qqd_fma (UINT128 x, UINT128 y, UINT64 z
3867 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3868 _EXC_INFO_PARAM) {
3869 #endif
3870 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
3871 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
3872 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
3873 UINT128 z1;
3874
3875 #if DECIMAL_CALL_BY_REFERENCE
3876 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3877 bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3878 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint,
3879 &res, px, py, &z1
3880 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3881 _EXC_INFO_ARG);
3882 #else
3883 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3884 res = bid128_ext_fma (&is_midpoint_lt_even, &is_midpoint_gt_even,
3885 &is_inexact_lt_midpoint,
3886 &is_inexact_gt_midpoint, x, y,
3887 z1 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3888 _EXC_INFO_ARG);
3889 #endif
3890 BID_RETURN (res);
3891 }
3892
3893 // Note: bid128qqq_fma is represented by bid128_fma
3894
3895 // Note: bid64ddd_fma is represented by bid64_fma
3896
3897 #if DECIMAL_CALL_BY_REFERENCE
3898 void
3899 bid64ddq_fma (UINT64 * pres, UINT64 * px, UINT64 * py, UINT128 * pz
3900 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3901 _EXC_INFO_PARAM) {
3902 UINT64 x = *px, y = *py;
3903 #if !DECIMAL_GLOBAL_ROUNDING
3904 unsigned int rnd_mode = *prnd_mode;
3905 #endif
3906 #else
3907 UINT64
3908 bid64ddq_fma (UINT64 x, UINT64 y, UINT128 z
3909 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3910 _EXC_INFO_PARAM) {
3911 #endif
3912 UINT64 res1 = 0xbaddbaddbaddbaddull;
3913 UINT128 x1, y1;
3914
3915 #if DECIMAL_CALL_BY_REFERENCE
3916 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3917 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3918 bid64qqq_fma (&res1, &x1, &y1, pz
3919 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3920 _EXC_INFO_ARG);
3921 #else
3922 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3923 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3924 res1 = bid64qqq_fma (x1, y1, z
3925 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3926 _EXC_INFO_ARG);
3927 #endif
3928 BID_RETURN (res1);
3929 }
3930
3931
3932 #if DECIMAL_CALL_BY_REFERENCE
3933 void
3934 bid64dqd_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT64 * pz
3935 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3936 _EXC_INFO_PARAM) {
3937 UINT64 x = *px, z = *pz;
3938 #if !DECIMAL_GLOBAL_ROUNDING
3939 unsigned int rnd_mode = *prnd_mode;
3940 #endif
3941 #else
3942 UINT64
3943 bid64dqd_fma (UINT64 x, UINT128 y, UINT64 z
3944 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3945 _EXC_INFO_PARAM) {
3946 #endif
3947 UINT64 res1 = 0xbaddbaddbaddbaddull;
3948 UINT128 x1, z1;
3949
3950 #if DECIMAL_CALL_BY_REFERENCE
3951 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3952 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3953 bid64qqq_fma (&res1, &x1, py, &z1
3954 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3955 _EXC_INFO_ARG);
3956 #else
3957 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3958 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3959 res1 = bid64qqq_fma (x1, y, z1
3960 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3961 _EXC_INFO_ARG);
3962 #endif
3963 BID_RETURN (res1);
3964 }
3965
3966
3967 #if DECIMAL_CALL_BY_REFERENCE
3968 void
3969 bid64dqq_fma (UINT64 * pres, UINT64 * px, UINT128 * py, UINT128 * pz
3970 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3971 _EXC_INFO_PARAM) {
3972 UINT64 x = *px;
3973 #if !DECIMAL_GLOBAL_ROUNDING
3974 unsigned int rnd_mode = *prnd_mode;
3975 #endif
3976 #else
3977 UINT64
3978 bid64dqq_fma (UINT64 x, UINT128 y, UINT128 z
3979 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
3980 _EXC_INFO_PARAM) {
3981 #endif
3982 UINT64 res1 = 0xbaddbaddbaddbaddull;
3983 UINT128 x1;
3984
3985 #if DECIMAL_CALL_BY_REFERENCE
3986 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3987 bid64qqq_fma (&res1, &x1, py, pz
3988 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3989 _EXC_INFO_ARG);
3990 #else
3991 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
3992 res1 = bid64qqq_fma (x1, y, z
3993 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
3994 _EXC_INFO_ARG);
3995 #endif
3996 BID_RETURN (res1);
3997 }
3998
3999
4000 #if DECIMAL_CALL_BY_REFERENCE
4001 void
4002 bid64qdd_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT64 * pz
4003 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4004 _EXC_INFO_PARAM) {
4005 UINT64 y = *py, z = *pz;
4006 #if !DECIMAL_GLOBAL_ROUNDING
4007 unsigned int rnd_mode = *prnd_mode;
4008 #endif
4009 #else
4010 UINT64
4011 bid64qdd_fma (UINT128 x, UINT64 y, UINT64 z
4012 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4013 _EXC_INFO_PARAM) {
4014 #endif
4015 UINT64 res1 = 0xbaddbaddbaddbaddull;
4016 UINT128 y1, z1;
4017
4018 #if DECIMAL_CALL_BY_REFERENCE
4019 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4020 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4021 bid64qqq_fma (&res1, px, &y1, &z1
4022 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4023 _EXC_INFO_ARG);
4024 #else
4025 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4026 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4027 res1 = bid64qqq_fma (x, y1, z1
4028 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4029 _EXC_INFO_ARG);
4030 #endif
4031 BID_RETURN (res1);
4032 }
4033
4034
4035 #if DECIMAL_CALL_BY_REFERENCE
4036 void
4037 bid64qdq_fma (UINT64 * pres, UINT128 * px, UINT64 * py, UINT128 * pz
4038 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4039 _EXC_INFO_PARAM) {
4040 UINT64 y = *py;
4041 #if !DECIMAL_GLOBAL_ROUNDING
4042 unsigned int rnd_mode = *prnd_mode;
4043 #endif
4044 #else
4045 UINT64
4046 bid64qdq_fma (UINT128 x, UINT64 y, UINT128 z
4047 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4048 _EXC_INFO_PARAM) {
4049 #endif
4050 UINT64 res1 = 0xbaddbaddbaddbaddull;
4051 UINT128 y1;
4052
4053 #if DECIMAL_CALL_BY_REFERENCE
4054 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4055 bid64qqq_fma (&res1, px, &y1, pz
4056 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4057 _EXC_INFO_ARG);
4058 #else
4059 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4060 res1 = bid64qqq_fma (x, y1, z
4061 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4062 _EXC_INFO_ARG);
4063 #endif
4064 BID_RETURN (res1);
4065 }
4066
4067
4068 #if DECIMAL_CALL_BY_REFERENCE
4069 void
4070 bid64qqd_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT64 * pz
4071 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4072 _EXC_INFO_PARAM) {
4073 UINT64 z = *pz;
4074 #if !DECIMAL_GLOBAL_ROUNDING
4075 unsigned int rnd_mode = *prnd_mode;
4076 #endif
4077 #else
4078 UINT64
4079 bid64qqd_fma (UINT128 x, UINT128 y, UINT64 z
4080 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4081 _EXC_INFO_PARAM) {
4082 #endif
4083 UINT64 res1 = 0xbaddbaddbaddbaddull;
4084 UINT128 z1;
4085
4086 #if DECIMAL_CALL_BY_REFERENCE
4087 bid64_to_bid128 (&z1, &z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4088 bid64qqq_fma (&res1, px, py, &z1
4089 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4090 _EXC_INFO_ARG);
4091 #else
4092 z1 = bid64_to_bid128 (z _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
4093 res1 = bid64qqq_fma (x, y, z1
4094 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4095 _EXC_INFO_ARG);
4096 #endif
4097 BID_RETURN (res1);
4098 }
4099
4100
4101 #if DECIMAL_CALL_BY_REFERENCE
4102 void
4103 bid64qqq_fma (UINT64 * pres, UINT128 * px, UINT128 * py, UINT128 * pz
4104 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4105 _EXC_INFO_PARAM) {
4106 UINT128 x = *px, y = *py, z = *pz;
4107 #if !DECIMAL_GLOBAL_ROUNDING
4108 unsigned int rnd_mode = *prnd_mode;
4109 #endif
4110 #else
4111 UINT64
4112 bid64qqq_fma (UINT128 x, UINT128 y, UINT128 z
4113 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
4114 _EXC_INFO_PARAM) {
4115 #endif
4116 int is_midpoint_lt_even0 = 0, is_midpoint_gt_even0 = 0,
4117 is_inexact_lt_midpoint0 = 0, is_inexact_gt_midpoint0 = 0;
4118 int is_midpoint_lt_even = 0, is_midpoint_gt_even = 0,
4119 is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
4120 int incr_exp;
4121 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
4122 UINT128 res128 = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} };
4123 UINT64 res1 = 0xbaddbaddbaddbaddull;
4124 unsigned int save_fpsf; // needed because of the call to bid128_ext_fma
4125 UINT64 sign;
4126 UINT64 exp;
4127 int unbexp;
4128 UINT128 C;
4129 BID_UI64DOUBLE tmp;
4130 int nr_bits;
4131 int q, x0;
4132 int scale;
4133 int lt_half_ulp = 0, eq_half_ulp = 0;
4134
4135 // Note: for rounding modes other than RN or RA, the result can be obtained
4136 // by rounding first to BID128 and then to BID64
4137
4138 save_fpsf = *pfpsf; // sticky bits - caller value must be preserved
4139 *pfpsf = 0;
4140
4141 #if DECIMAL_CALL_BY_REFERENCE
4142 bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
4143 &is_inexact_lt_midpoint0, &is_inexact_gt_midpoint0,
4144 &res, &x, &y, &z
4145 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4146 _EXC_INFO_ARG);
4147 #else
4148 res = bid128_ext_fma (&is_midpoint_lt_even0, &is_midpoint_gt_even0,
4149 &is_inexact_lt_midpoint0,
4150 &is_inexact_gt_midpoint0, x, y,
4151 z _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
4152 _EXC_INFO_ARG);
4153 #endif
4154
4155 if ((rnd_mode == ROUNDING_DOWN) || (rnd_mode == ROUNDING_UP) ||
4156 (rnd_mode == ROUNDING_TO_ZERO) || // no double rounding error is possible
4157 ((res.w[HIGH_128W] & MASK_NAN) == MASK_NAN) || //res=QNaN (cannot be SNaN)
4158 ((res.w[HIGH_128W] & MASK_ANY_INF) == MASK_INF)) { // result is infinity
4159 #if DECIMAL_CALL_BY_REFERENCE
4160 bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
4161 #else
4162 res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
4163 #endif
4164 // determine the unbiased exponent of the result
4165 unbexp = ((res1 >> 53) & 0x3ff) - 398;
4166
4167 // if subnormal, res1 must have exp = -398
4168 // if tiny and inexact set underflow and inexact status flags
4169 if (!((res1 & MASK_NAN) == MASK_NAN) && // res1 not NaN
4170 (unbexp == -398)
4171 && ((res1 & MASK_BINARY_SIG1) < 1000000000000000ull)
4172 && (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0
4173 || is_midpoint_lt_even0 || is_midpoint_gt_even0)) {
4174 // set the inexact flag and the underflow flag
4175 *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
4176 } else if (is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
4177 is_midpoint_lt_even0 || is_midpoint_gt_even0) {
4178 // set the inexact flag and the underflow flag
4179 *pfpsf |= INEXACT_EXCEPTION;
4180 }
4181
4182 *pfpsf |= save_fpsf;
4183 BID_RETURN (res1);
4184 } // else continue, and use rounding to nearest to round to 16 digits
4185
4186 // at this point the result is rounded to nearest (even or away) to 34 digits
4187 // (or less if exact), and it is zero or finite non-zero canonical [sub]normal
4188 sign = res.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
4189 exp = res.w[HIGH_128W] & MASK_EXP; // biased and shifted left 49 bits
4190 unbexp = (exp >> 49) - 6176;
4191 C.w[1] = res.w[HIGH_128W] & MASK_COEFF;
4192 C.w[0] = res.w[LOW_128W];
4193
4194 if ((C.w[1] == 0x0 && C.w[0] == 0x0) || // result is zero
4195 (unbexp <= (-398 - 35)) || (unbexp >= (369 + 16))) {
4196 // clear under/overflow
4197 #if DECIMAL_CALL_BY_REFERENCE
4198 bid128_to_bid64 (&res1, &res _RND_MODE_ARG _EXC_FLAGS_ARG);
4199 #else
4200 res1 = bid128_to_bid64 (res _RND_MODE_ARG _EXC_FLAGS_ARG);
4201 #endif
4202 *pfpsf |= save_fpsf;
4203 BID_RETURN (res1);
4204 } // else continue
4205
4206 // -398 - 34 <= unbexp <= 369 + 15
4207 if (rnd_mode == ROUNDING_TIES_AWAY) {
4208 // apply correction, if needed, to make the result rounded to nearest-even
4209 if (is_midpoint_gt_even) {
4210 // res = res - 1
4211 res1--; // res1 is now even
4212 } // else the result is already correctly rounded to nearest-even
4213 }
4214 // at this point the result is finite, non-zero canonical normal or subnormal,
4215 // and in most cases overflow or underflow will not occur
4216
4217 // determine the number of digits q in the result
4218 // q = nr. of decimal digits in x
4219 // determine first the nr. of bits in x
4220 if (C.w[1] == 0) {
4221 if (C.w[0] >= 0x0020000000000000ull) { // x >= 2^53
4222 // split the 64-bit value in two 32-bit halves to avoid rounding errors
4223 if (C.w[0] >= 0x0000000100000000ull) { // x >= 2^32
4224 tmp.d = (double) (C.w[0] >> 32); // exact conversion
4225 nr_bits =
4226 33 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4227 } else { // x < 2^32
4228 tmp.d = (double) (C.w[0]); // exact conversion
4229 nr_bits =
4230 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4231 }
4232 } else { // if x < 2^53
4233 tmp.d = (double) C.w[0]; // exact conversion
4234 nr_bits =
4235 1 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4236 }
4237 } else { // C.w[1] != 0 => nr. bits = 64 + nr_bits (C.w[1])
4238 tmp.d = (double) C.w[1]; // exact conversion
4239 nr_bits =
4240 65 + ((((unsigned int) (tmp.ui64 >> 52)) & 0x7ff) - 0x3ff);
4241 }
4242 q = nr_digits[nr_bits - 1].digits;
4243 if (q == 0) {
4244 q = nr_digits[nr_bits - 1].digits1;
4245 if (C.w[1] > nr_digits[nr_bits - 1].threshold_hi ||
4246 (C.w[1] == nr_digits[nr_bits - 1].threshold_hi &&
4247 C.w[0] >= nr_digits[nr_bits - 1].threshold_lo))
4248 q++;
4249 }
4250 // if q > 16, round to nearest even to 16 digits (but for underflow it may
4251 // have to be truncated even more)
4252 if (q > 16) {
4253 x0 = q - 16;
4254 if (q <= 18) {
4255 round64_2_18 (q, x0, C.w[0], &res1, &incr_exp,
4256 &is_midpoint_lt_even, &is_midpoint_gt_even,
4257 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4258 } else { // 19 <= q <= 34
4259 round128_19_38 (q, x0, C, &res128, &incr_exp,
4260 &is_midpoint_lt_even, &is_midpoint_gt_even,
4261 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4262 res1 = res128.w[0]; // the result fits in 64 bits
4263 }
4264 unbexp = unbexp + x0;
4265 if (incr_exp)
4266 unbexp++;
4267 q = 16; // need to set in case denormalization is necessary
4268 } else {
4269 // the result does not require a second rounding (and it must have
4270 // been exact in the first rounding, since q <= 16)
4271 res1 = C.w[0];
4272 }
4273
4274 // avoid a double rounding error
4275 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
4276 is_midpoint_lt_even) { // double rounding error upward
4277 // res = res - 1
4278 res1--; // res1 becomes odd
4279 is_midpoint_lt_even = 0;
4280 is_inexact_lt_midpoint = 1;
4281 if (res1 == 0x00038d7ea4c67fffull) { // 10^15 - 1
4282 res1 = 0x002386f26fc0ffffull; // 10^16 - 1
4283 unbexp--;
4284 }
4285 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
4286 is_midpoint_gt_even) { // double rounding error downward
4287 // res = res + 1
4288 res1++; // res1 becomes odd (so it cannot be 10^16)
4289 is_midpoint_gt_even = 0;
4290 is_inexact_gt_midpoint = 1;
4291 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
4292 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
4293 // if this second rounding was exact the result may still be
4294 // inexact because of the first rounding
4295 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
4296 is_inexact_gt_midpoint = 1;
4297 }
4298 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
4299 is_inexact_lt_midpoint = 1;
4300 }
4301 } else if (is_midpoint_gt_even &&
4302 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
4303 // pulled up to a midpoint
4304 is_inexact_lt_midpoint = 1;
4305 is_inexact_gt_midpoint = 0;
4306 is_midpoint_lt_even = 0;
4307 is_midpoint_gt_even = 0;
4308 } else if (is_midpoint_lt_even &&
4309 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
4310 // pulled down to a midpoint
4311 is_inexact_lt_midpoint = 0;
4312 is_inexact_gt_midpoint = 1;
4313 is_midpoint_lt_even = 0;
4314 is_midpoint_gt_even = 0;
4315 } else {
4316 ;
4317 }
4318 // this is the result rounded correctly to nearest even, with unbounded exp.
4319
4320 // check for overflow
4321 if (q + unbexp > P16 + expmax16) {
4322 res1 = sign | 0x7800000000000000ull;
4323 *pfpsf |= (INEXACT_EXCEPTION | OVERFLOW_EXCEPTION);
4324 *pfpsf |= save_fpsf;
4325 BID_RETURN (res1)
4326 } else if (unbexp > expmax16) { // q + unbexp <= P16 + expmax16
4327 // not overflow; the result must be exact, and we can multiply res1 by
4328 // 10^(unbexp - expmax16) and the product will fit in 16 decimal digits
4329 scale = unbexp - expmax16;
4330 res1 = res1 * ten2k64[scale]; // res1 * 10^scale
4331 unbexp = expmax16; // unbexp - scale
4332 } else {
4333 ; // continue
4334 }
4335
4336 // check for underflow
4337 if (q + unbexp < P16 + expmin16) {
4338 if (unbexp < expmin16) {
4339 // we must truncate more of res
4340 x0 = expmin16 - unbexp; // x0 >= 1
4341 is_inexact_lt_midpoint0 = is_inexact_lt_midpoint;
4342 is_inexact_gt_midpoint0 = is_inexact_gt_midpoint;
4343 is_midpoint_lt_even0 = is_midpoint_lt_even;
4344 is_midpoint_gt_even0 = is_midpoint_gt_even;
4345 is_inexact_lt_midpoint = 0;
4346 is_inexact_gt_midpoint = 0;
4347 is_midpoint_lt_even = 0;
4348 is_midpoint_gt_even = 0;
4349 // the number of decimal digits in res1 is q
4350 if (x0 < q) { // 1 <= x0 <= q-1 => round res to q - x0 digits
4351 // 2 <= q <= 16, 1 <= x0 <= 15
4352 round64_2_18 (q, x0, res1, &res1, &incr_exp,
4353 &is_midpoint_lt_even, &is_midpoint_gt_even,
4354 &is_inexact_lt_midpoint, &is_inexact_gt_midpoint);
4355 if (incr_exp) {
4356 // res1 = 10^(q-x0), 1 <= q - x0 <= q - 1, 1 <= q - x0 <= 15
4357 res1 = ten2k64[q - x0];
4358 }
4359 unbexp = unbexp + x0; // expmin16
4360 } else if (x0 == q) {
4361 // the second rounding is for 0.d(0)d(1)...d(q-1) * 10^emin
4362 // determine relationship with 1/2 ulp
4363 // q <= 16
4364 if (res1 < midpoint64[q - 1]) { // < 1/2 ulp
4365 lt_half_ulp = 1;
4366 is_inexact_lt_midpoint = 1;
4367 } else if (res1 == midpoint64[q - 1]) { // = 1/2 ulp
4368 eq_half_ulp = 1;
4369 is_midpoint_gt_even = 1;
4370 } else { // > 1/2 ulp
4371 // gt_half_ulp = 1;
4372 is_inexact_gt_midpoint = 1;
4373 }
4374 if (lt_half_ulp || eq_half_ulp) {
4375 // res = +0.0 * 10^expmin16
4376 res1 = 0x0000000000000000ull;
4377 } else { // if (gt_half_ulp)
4378 // res = +1 * 10^expmin16
4379 res1 = 0x0000000000000001ull;
4380 }
4381 unbexp = expmin16;
4382 } else { // if (x0 > q)
4383 // the second rounding is for 0.0...d(0)d(1)...d(q-1) * 10^emin
4384 res1 = 0x0000000000000000ull;
4385 unbexp = expmin16;
4386 is_inexact_lt_midpoint = 1;
4387 }
4388 // avoid a double rounding error
4389 if ((is_inexact_gt_midpoint0 || is_midpoint_lt_even0) &&
4390 is_midpoint_lt_even) { // double rounding error upward
4391 // res = res - 1
4392 res1--; // res1 becomes odd
4393 is_midpoint_lt_even = 0;
4394 is_inexact_lt_midpoint = 1;
4395 } else if ((is_inexact_lt_midpoint0 || is_midpoint_gt_even0) &&
4396 is_midpoint_gt_even) { // double rounding error downward
4397 // res = res + 1
4398 res1++; // res1 becomes odd
4399 is_midpoint_gt_even = 0;
4400 is_inexact_gt_midpoint = 1;
4401 } else if (!is_midpoint_lt_even && !is_midpoint_gt_even &&
4402 !is_inexact_lt_midpoint && !is_inexact_gt_midpoint) {
4403 // if this rounding was exact the result may still be
4404 // inexact because of the previous roundings
4405 if (is_inexact_gt_midpoint0 || is_midpoint_lt_even0) {
4406 is_inexact_gt_midpoint = 1;
4407 }
4408 if (is_inexact_lt_midpoint0 || is_midpoint_gt_even0) {
4409 is_inexact_lt_midpoint = 1;
4410 }
4411 } else if (is_midpoint_gt_even &&
4412 (is_inexact_gt_midpoint0 || is_midpoint_lt_even0)) {
4413 // pulled up to a midpoint
4414 is_inexact_lt_midpoint = 1;
4415 is_inexact_gt_midpoint = 0;
4416 is_midpoint_lt_even = 0;
4417 is_midpoint_gt_even = 0;
4418 } else if (is_midpoint_lt_even &&
4419 (is_inexact_lt_midpoint0 || is_midpoint_gt_even0)) {
4420 // pulled down to a midpoint
4421 is_inexact_lt_midpoint = 0;
4422 is_inexact_gt_midpoint = 1;
4423 is_midpoint_lt_even = 0;
4424 is_midpoint_gt_even = 0;
4425 } else {
4426 ;
4427 }
4428 }
4429 // else if unbexp >= emin then q < P (because q + unbexp < P16 + expmin16)
4430 // and the result is tiny and exact
4431
4432 // check for inexact result
4433 if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
4434 is_midpoint_lt_even || is_midpoint_gt_even ||
4435 is_inexact_lt_midpoint0 || is_inexact_gt_midpoint0 ||
4436 is_midpoint_lt_even0 || is_midpoint_gt_even0) {
4437 // set the inexact flag and the underflow flag
4438 *pfpsf |= (INEXACT_EXCEPTION | UNDERFLOW_EXCEPTION);
4439 }
4440 } else if (is_inexact_lt_midpoint || is_inexact_gt_midpoint ||
4441 is_midpoint_lt_even || is_midpoint_gt_even) {
4442 *pfpsf |= INEXACT_EXCEPTION;
4443 }
4444 // this is the result rounded correctly to nearest, with bounded exponent
4445
4446 if (rnd_mode == ROUNDING_TIES_AWAY && is_midpoint_gt_even) { // correction
4447 // res = res + 1
4448 res1++; // res1 is now odd
4449 } // else the result is already correct
4450
4451 // assemble the result
4452 if (res1 < 0x0020000000000000ull) { // res < 2^53
4453 res1 = sign | ((UINT64) (unbexp + 398) << 53) | res1;
4454 } else { // res1 >= 2^53
4455 res1 = sign | MASK_STEERING_BITS |
4456 ((UINT64) (unbexp + 398) << 51) | (res1 & MASK_BINARY_SIG2);
4457 }
4458 *pfpsf |= save_fpsf;
4459 BID_RETURN (res1);
4460 }