comparison libgcc/config/libbid/bid64_add.c @ 0:a06113de4d67

first commit
author kent <kent@cr.ie.u-ryukyu.ac.jp>
date Fri, 17 Jul 2009 14:47:48 +0900
parents
children 04ced10e8804
comparison
equal deleted inserted replaced
-1:000000000000 0:a06113de4d67
1 /* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
9
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 /*****************************************************************************
25 * BID64 add
26 *****************************************************************************
27 *
28 * Algorithm description:
29 *
30 * if(exponent_a < exponent_b)
31 * switch a, b
32 * diff_expon = exponent_a - exponent_b
33 * if(diff_expon > 16)
34 * return normalize(a)
35 * if(coefficient_a*10^diff_expon guaranteed below 2^62)
36 * S = sign_a*coefficient_a*10^diff_expon + sign_b*coefficient_b
37 * if(|S|<10^16)
38 * return get_BID64(sign(S),exponent_b,|S|)
39 * else
40 * determine number of extra digits in S (1, 2, or 3)
41 * return rounded result
42 * else // large exponent difference
43 * if(number_digits(coefficient_a*10^diff_expon) +/- 10^16)
44 * guaranteed the same as
45 * number_digits(coefficient_a*10^diff_expon) )
46 * S = normalize(coefficient_a + (sign_a^sign_b)*10^(16-diff_expon))
47 * corr = 10^16 + (sign_a^sign_b)*coefficient_b
48 * corr*10^exponent_b is rounded so it aligns with S*10^exponent_S
49 * return get_BID64(sign_a,exponent(S),S+rounded(corr))
50 * else
51 * add sign_a*coefficient_a*10^diff_expon, sign_b*coefficient_b
52 * in 128-bit integer arithmetic, then round to 16 decimal digits
53 *
54 *
55 ****************************************************************************/
56
57 #include "bid_internal.h"
58
59 #if DECIMAL_CALL_BY_REFERENCE
60 void bid64_add (UINT64 * pres, UINT64 * px,
61 UINT64 *
62 py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
63 _EXC_INFO_PARAM);
64 #else
65 UINT64 bid64_add (UINT64 x,
66 UINT64 y _RND_MODE_PARAM _EXC_FLAGS_PARAM
67 _EXC_MASKS_PARAM _EXC_INFO_PARAM);
68 #endif
69
70 #if DECIMAL_CALL_BY_REFERENCE
71
72 void
73 bid64_sub (UINT64 * pres, UINT64 * px,
74 UINT64 *
75 py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
76 _EXC_INFO_PARAM) {
77 UINT64 y = *py;
78 #if !DECIMAL_GLOBAL_ROUNDING
79 _IDEC_round rnd_mode = *prnd_mode;
80 #endif
81 // check if y is not NaN
82 if (((y & NAN_MASK64) != NAN_MASK64))
83 y ^= 0x8000000000000000ull;
84 bid64_add (pres, px,
85 &y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
86 _EXC_INFO_ARG);
87 }
88 #else
89
90 UINT64
91 bid64_sub (UINT64 x,
92 UINT64 y _RND_MODE_PARAM _EXC_FLAGS_PARAM
93 _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
94 // check if y is not NaN
95 if (((y & NAN_MASK64) != NAN_MASK64))
96 y ^= 0x8000000000000000ull;
97
98 return bid64_add (x,
99 y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
100 _EXC_INFO_ARG);
101 }
102 #endif
103
104
105
106 #if DECIMAL_CALL_BY_REFERENCE
107
108 void
109 bid64_add (UINT64 * pres, UINT64 * px,
110 UINT64 *
111 py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
112 _EXC_INFO_PARAM) {
113 UINT64 x, y;
114 #else
115
116 UINT64
117 bid64_add (UINT64 x,
118 UINT64 y _RND_MODE_PARAM _EXC_FLAGS_PARAM
119 _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
120 #endif
121
122 UINT128 CA, CT, CT_new;
123 UINT64 sign_x, sign_y, coefficient_x, coefficient_y, C64_new;
124 UINT64 valid_x, valid_y;
125 UINT64 res;
126 UINT64 sign_a, sign_b, coefficient_a, coefficient_b, sign_s, sign_ab,
127 rem_a;
128 UINT64 saved_ca, saved_cb, C0_64, C64, remainder_h, T1, carry, tmp;
129 int_double tempx;
130 int exponent_x, exponent_y, exponent_a, exponent_b, diff_dec_expon;
131 int bin_expon_ca, extra_digits, amount, scale_k, scale_ca;
132 unsigned rmode, status;
133
134 #if DECIMAL_CALL_BY_REFERENCE
135 #if !DECIMAL_GLOBAL_ROUNDING
136 _IDEC_round rnd_mode = *prnd_mode;
137 #endif
138 x = *px;
139 y = *py;
140 #endif
141
142 valid_x = unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x);
143 valid_y = unpack_BID64 (&sign_y, &exponent_y, &coefficient_y, y);
144
145 // unpack arguments, check for NaN or Infinity
146 if (!valid_x) {
147 // x is Inf. or NaN
148
149 // test if x is NaN
150 if ((x & NAN_MASK64) == NAN_MASK64) {
151 #ifdef SET_STATUS_FLAGS
152 if (((x & SNAN_MASK64) == SNAN_MASK64) // sNaN
153 || ((y & SNAN_MASK64) == SNAN_MASK64))
154 __set_status_flags (pfpsf, INVALID_EXCEPTION);
155 #endif
156 res = coefficient_x & QUIET_MASK64;
157 BID_RETURN (res);
158 }
159 // x is Infinity?
160 if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
161 // check if y is Inf
162 if (((y & NAN_MASK64) == INFINITY_MASK64)) {
163 if (sign_x == (y & 0x8000000000000000ull)) {
164 res = coefficient_x;
165 BID_RETURN (res);
166 }
167 // return NaN
168 {
169 #ifdef SET_STATUS_FLAGS
170 __set_status_flags (pfpsf, INVALID_EXCEPTION);
171 #endif
172 res = NAN_MASK64;
173 BID_RETURN (res);
174 }
175 }
176 // check if y is NaN
177 if (((y & NAN_MASK64) == NAN_MASK64)) {
178 res = coefficient_y & QUIET_MASK64;
179 #ifdef SET_STATUS_FLAGS
180 if (((y & SNAN_MASK64) == SNAN_MASK64))
181 __set_status_flags (pfpsf, INVALID_EXCEPTION);
182 #endif
183 BID_RETURN (res);
184 }
185 // otherwise return +/-Inf
186 {
187 res = coefficient_x;
188 BID_RETURN (res);
189 }
190 }
191 // x is 0
192 {
193 if (((y & INFINITY_MASK64) != INFINITY_MASK64) && coefficient_y) {
194 if (exponent_y <= exponent_x) {
195 res = y;
196 BID_RETURN (res);
197 }
198 }
199 }
200
201 }
202 if (!valid_y) {
203 // y is Inf. or NaN?
204 if (((y & INFINITY_MASK64) == INFINITY_MASK64)) {
205 #ifdef SET_STATUS_FLAGS
206 if ((y & SNAN_MASK64) == SNAN_MASK64) // sNaN
207 __set_status_flags (pfpsf, INVALID_EXCEPTION);
208 #endif
209 res = coefficient_y & QUIET_MASK64;
210 BID_RETURN (res);
211 }
212 // y is 0
213 if (!coefficient_x) { // x==0
214 if (exponent_x <= exponent_y)
215 res = ((UINT64) exponent_x) << 53;
216 else
217 res = ((UINT64) exponent_y) << 53;
218 if (sign_x == sign_y)
219 res |= sign_x;
220 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
221 #ifndef IEEE_ROUND_NEAREST
222 if (rnd_mode == ROUNDING_DOWN && sign_x != sign_y)
223 res |= 0x8000000000000000ull;
224 #endif
225 #endif
226 BID_RETURN (res);
227 } else if (exponent_y >= exponent_x) {
228 res = x;
229 BID_RETURN (res);
230 }
231 }
232 // sort arguments by exponent
233 if (exponent_x < exponent_y) {
234 sign_a = sign_y;
235 exponent_a = exponent_y;
236 coefficient_a = coefficient_y;
237 sign_b = sign_x;
238 exponent_b = exponent_x;
239 coefficient_b = coefficient_x;
240 } else {
241 sign_a = sign_x;
242 exponent_a = exponent_x;
243 coefficient_a = coefficient_x;
244 sign_b = sign_y;
245 exponent_b = exponent_y;
246 coefficient_b = coefficient_y;
247 }
248
249 // exponent difference
250 diff_dec_expon = exponent_a - exponent_b;
251
252 /* get binary coefficients of x and y */
253
254 //--- get number of bits in the coefficients of x and y ---
255
256 // version 2 (original)
257 tempx.d = (double) coefficient_a;
258 bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
259
260 if (diff_dec_expon > MAX_FORMAT_DIGITS) {
261 // normalize a to a 16-digit coefficient
262
263 scale_ca = estimate_decimal_digits[bin_expon_ca];
264 if (coefficient_a >= power10_table_128[scale_ca].w[0])
265 scale_ca++;
266
267 scale_k = 16 - scale_ca;
268
269 coefficient_a *= power10_table_128[scale_k].w[0];
270
271 diff_dec_expon -= scale_k;
272 exponent_a -= scale_k;
273
274 /* get binary coefficients of x and y */
275
276 //--- get number of bits in the coefficients of x and y ---
277 tempx.d = (double) coefficient_a;
278 bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
279
280 if (diff_dec_expon > MAX_FORMAT_DIGITS) {
281 #ifdef SET_STATUS_FLAGS
282 if (coefficient_b) {
283 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
284 }
285 #endif
286
287 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
288 #ifndef IEEE_ROUND_NEAREST
289 if (((rnd_mode) & 3) && coefficient_b) // not ROUNDING_TO_NEAREST
290 {
291 switch (rnd_mode) {
292 case ROUNDING_DOWN:
293 if (sign_b) {
294 coefficient_a -= ((((SINT64) sign_a) >> 63) | 1);
295 if (coefficient_a < 1000000000000000ull) {
296 exponent_a--;
297 coefficient_a = 9999999999999999ull;
298 } else if (coefficient_a >= 10000000000000000ull) {
299 exponent_a++;
300 coefficient_a = 1000000000000000ull;
301 }
302 }
303 break;
304 case ROUNDING_UP:
305 if (!sign_b) {
306 coefficient_a += ((((SINT64) sign_a) >> 63) | 1);
307 if (coefficient_a < 1000000000000000ull) {
308 exponent_a--;
309 coefficient_a = 9999999999999999ull;
310 } else if (coefficient_a >= 10000000000000000ull) {
311 exponent_a++;
312 coefficient_a = 1000000000000000ull;
313 }
314 }
315 break;
316 default: // RZ
317 if (sign_a != sign_b) {
318 coefficient_a--;
319 if (coefficient_a < 1000000000000000ull) {
320 exponent_a--;
321 coefficient_a = 9999999999999999ull;
322 }
323 }
324 break;
325 }
326 } else
327 #endif
328 #endif
329 // check special case here
330 if ((coefficient_a == 1000000000000000ull)
331 && (diff_dec_expon == MAX_FORMAT_DIGITS + 1)
332 && (sign_a ^ sign_b)
333 && (coefficient_b > 5000000000000000ull)) {
334 coefficient_a = 9999999999999999ull;
335 exponent_a--;
336 }
337
338 res =
339 fast_get_BID64_check_OF (sign_a, exponent_a, coefficient_a,
340 rnd_mode, pfpsf);
341 BID_RETURN (res);
342 }
343 }
344 // test whether coefficient_a*10^(exponent_a-exponent_b) may exceed 2^62
345 if (bin_expon_ca + estimate_bin_expon[diff_dec_expon] < 60) {
346 // coefficient_a*10^(exponent_a-exponent_b)<2^63
347
348 // multiply by 10^(exponent_a-exponent_b)
349 coefficient_a *= power10_table_128[diff_dec_expon].w[0];
350
351 // sign mask
352 sign_b = ((SINT64) sign_b) >> 63;
353 // apply sign to coeff. of b
354 coefficient_b = (coefficient_b + sign_b) ^ sign_b;
355
356 // apply sign to coefficient a
357 sign_a = ((SINT64) sign_a) >> 63;
358 coefficient_a = (coefficient_a + sign_a) ^ sign_a;
359
360 coefficient_a += coefficient_b;
361 // get sign
362 sign_s = ((SINT64) coefficient_a) >> 63;
363 coefficient_a = (coefficient_a + sign_s) ^ sign_s;
364 sign_s &= 0x8000000000000000ull;
365
366 // coefficient_a < 10^16 ?
367 if (coefficient_a < power10_table_128[MAX_FORMAT_DIGITS].w[0]) {
368 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
369 #ifndef IEEE_ROUND_NEAREST
370 if (rnd_mode == ROUNDING_DOWN && (!coefficient_a)
371 && sign_a != sign_b)
372 sign_s = 0x8000000000000000ull;
373 #endif
374 #endif
375 res = very_fast_get_BID64 (sign_s, exponent_b, coefficient_a);
376 BID_RETURN (res);
377 }
378 // otherwise rounding is necessary
379
380 // already know coefficient_a<10^19
381 // coefficient_a < 10^17 ?
382 if (coefficient_a < power10_table_128[17].w[0])
383 extra_digits = 1;
384 else if (coefficient_a < power10_table_128[18].w[0])
385 extra_digits = 2;
386 else
387 extra_digits = 3;
388
389 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
390 #ifndef IEEE_ROUND_NEAREST
391 rmode = rnd_mode;
392 if (sign_s && (unsigned) (rmode - 1) < 2)
393 rmode = 3 - rmode;
394 #else
395 rmode = 0;
396 #endif
397 #else
398 rmode = 0;
399 #endif
400 coefficient_a += round_const_table[rmode][extra_digits];
401
402 // get P*(2^M[extra_digits])/10^extra_digits
403 __mul_64x64_to_128 (CT, coefficient_a,
404 reciprocals10_64[extra_digits]);
405
406 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
407 amount = short_recip_scale[extra_digits];
408 C64 = CT.w[1] >> amount;
409
410 } else {
411 // coefficient_a*10^(exponent_a-exponent_b) is large
412 sign_s = sign_a;
413
414 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
415 #ifndef IEEE_ROUND_NEAREST
416 rmode = rnd_mode;
417 if (sign_s && (unsigned) (rmode - 1) < 2)
418 rmode = 3 - rmode;
419 #else
420 rmode = 0;
421 #endif
422 #else
423 rmode = 0;
424 #endif
425
426 // check whether we can take faster path
427 scale_ca = estimate_decimal_digits[bin_expon_ca];
428
429 sign_ab = sign_a ^ sign_b;
430 sign_ab = ((SINT64) sign_ab) >> 63;
431
432 // T1 = 10^(16-diff_dec_expon)
433 T1 = power10_table_128[16 - diff_dec_expon].w[0];
434
435 // get number of digits in coefficient_a
436 if (coefficient_a >= power10_table_128[scale_ca].w[0]) {
437 scale_ca++;
438 }
439
440 scale_k = 16 - scale_ca;
441
442 // addition
443 saved_ca = coefficient_a - T1;
444 coefficient_a =
445 (SINT64) saved_ca *(SINT64) power10_table_128[scale_k].w[0];
446 extra_digits = diff_dec_expon - scale_k;
447
448 // apply sign
449 saved_cb = (coefficient_b + sign_ab) ^ sign_ab;
450 // add 10^16 and rounding constant
451 coefficient_b =
452 saved_cb + 10000000000000000ull +
453 round_const_table[rmode][extra_digits];
454
455 // get P*(2^M[extra_digits])/10^extra_digits
456 __mul_64x64_to_128 (CT, coefficient_b,
457 reciprocals10_64[extra_digits]);
458
459 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
460 amount = short_recip_scale[extra_digits];
461 C0_64 = CT.w[1] >> amount;
462
463 // result coefficient
464 C64 = C0_64 + coefficient_a;
465 // filter out difficult (corner) cases
466 // this test ensures the number of digits in coefficient_a does not change
467 // after adding (the appropriately scaled and rounded) coefficient_b
468 if ((UINT64) (C64 - 1000000000000000ull - 1) >
469 9000000000000000ull - 2) {
470 if (C64 >= 10000000000000000ull) {
471 // result has more than 16 digits
472 if (!scale_k) {
473 // must divide coeff_a by 10
474 saved_ca = saved_ca + T1;
475 __mul_64x64_to_128 (CA, saved_ca, 0x3333333333333334ull);
476 //reciprocals10_64[1]);
477 coefficient_a = CA.w[1] >> 1;
478 rem_a =
479 saved_ca - (coefficient_a << 3) - (coefficient_a << 1);
480 coefficient_a = coefficient_a - T1;
481
482 saved_cb += rem_a * power10_table_128[diff_dec_expon].w[0];
483 } else
484 coefficient_a =
485 (SINT64) (saved_ca - T1 -
486 (T1 << 3)) * (SINT64) power10_table_128[scale_k -
487 1].w[0];
488
489 extra_digits++;
490 coefficient_b =
491 saved_cb + 100000000000000000ull +
492 round_const_table[rmode][extra_digits];
493
494 // get P*(2^M[extra_digits])/10^extra_digits
495 __mul_64x64_to_128 (CT, coefficient_b,
496 reciprocals10_64[extra_digits]);
497
498 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
499 amount = short_recip_scale[extra_digits];
500 C0_64 = CT.w[1] >> amount;
501
502 // result coefficient
503 C64 = C0_64 + coefficient_a;
504 } else if (C64 <= 1000000000000000ull) {
505 // less than 16 digits in result
506 coefficient_a =
507 (SINT64) saved_ca *(SINT64) power10_table_128[scale_k +
508 1].w[0];
509 //extra_digits --;
510 exponent_b--;
511 coefficient_b =
512 (saved_cb << 3) + (saved_cb << 1) + 100000000000000000ull +
513 round_const_table[rmode][extra_digits];
514
515 // get P*(2^M[extra_digits])/10^extra_digits
516 __mul_64x64_to_128 (CT_new, coefficient_b,
517 reciprocals10_64[extra_digits]);
518
519 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
520 amount = short_recip_scale[extra_digits];
521 C0_64 = CT_new.w[1] >> amount;
522
523 // result coefficient
524 C64_new = C0_64 + coefficient_a;
525 if (C64_new < 10000000000000000ull) {
526 C64 = C64_new;
527 #ifdef SET_STATUS_FLAGS
528 CT = CT_new;
529 #endif
530 } else
531 exponent_b++;
532 }
533
534 }
535
536 }
537
538 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
539 #ifndef IEEE_ROUND_NEAREST
540 if (rmode == 0) //ROUNDING_TO_NEAREST
541 #endif
542 if (C64 & 1) {
543 // check whether fractional part of initial_P/10^extra_digits is
544 // exactly .5
545 // this is the same as fractional part of
546 // (initial_P + 0.5*10^extra_digits)/10^extra_digits is exactly zero
547
548 // get remainder
549 remainder_h = CT.w[1] << (64 - amount);
550
551 // test whether fractional part is 0
552 if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits])) {
553 C64--;
554 }
555 }
556 #endif
557
558 #ifdef SET_STATUS_FLAGS
559 status = INEXACT_EXCEPTION;
560
561 // get remainder
562 remainder_h = CT.w[1] << (64 - amount);
563
564 switch (rmode) {
565 case ROUNDING_TO_NEAREST:
566 case ROUNDING_TIES_AWAY:
567 // test whether fractional part is 0
568 if ((remainder_h == 0x8000000000000000ull)
569 && (CT.w[0] < reciprocals10_64[extra_digits]))
570 status = EXACT_STATUS;
571 break;
572 case ROUNDING_DOWN:
573 case ROUNDING_TO_ZERO:
574 if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits]))
575 status = EXACT_STATUS;
576 //if(!C64 && rmode==ROUNDING_DOWN) sign_s=sign_y;
577 break;
578 default:
579 // round up
580 __add_carry_out (tmp, carry, CT.w[0],
581 reciprocals10_64[extra_digits]);
582 if ((remainder_h >> (64 - amount)) + carry >=
583 (((UINT64) 1) << amount))
584 status = EXACT_STATUS;
585 break;
586 }
587 __set_status_flags (pfpsf, status);
588
589 #endif
590
591 res =
592 fast_get_BID64_check_OF (sign_s, exponent_b + extra_digits, C64,
593 rnd_mode, pfpsf);
594 BID_RETURN (res);
595 }