comparison libgcc/config/libbid/bid64_fma.c @ 0:a06113de4d67

first commit
author kent <kent@cr.ie.u-ryukyu.ac.jp>
date Fri, 17 Jul 2009 14:47:48 +0900
parents
children 04ced10e8804
comparison
equal deleted inserted replaced
-1:000000000000 0:a06113de4d67
1 /* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
9
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 /*****************************************************************************
25 * BID64 fma
26 *****************************************************************************
27 *
28 * Algorithm description:
29 *
30 * if multiplication is guranteed exact (short coefficients)
31 * call the unpacked arg. equivalent of bid64_add(x*y, z)
32 * else
33 * get full coefficient_x*coefficient_y product
34 * call subroutine to perform addition of 64-bit argument
35 * to 128-bit product
36 *
37 ****************************************************************************/
38
39 #include "bid_inline_add.h"
40
41 #if DECIMAL_CALL_BY_REFERENCE
42 extern void bid64_mul (UINT64 * pres, UINT64 * px,
43 UINT64 *
44 py _RND_MODE_PARAM _EXC_FLAGS_PARAM
45 _EXC_MASKS_PARAM _EXC_INFO_PARAM);
46 #else
47
48 extern UINT64 bid64_mul (UINT64 x,
49 UINT64 y _RND_MODE_PARAM
50 _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
51 _EXC_INFO_PARAM);
52 #endif
53
54 #if DECIMAL_CALL_BY_REFERENCE
55
56 void
57 bid64_fma (UINT64 * pres, UINT64 * px, UINT64 * py,
58 UINT64 *
59 pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
60 _EXC_INFO_PARAM) {
61 UINT64 x, y, z;
62 #else
63
64 UINT64
65 bid64_fma (UINT64 x, UINT64 y,
66 UINT64 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
67 _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
68 #endif
69 UINT128 P, PU, CT, CZ;
70 UINT64 sign_x, sign_y, coefficient_x, coefficient_y, sign_z,
71 coefficient_z;
72 UINT64 C64, remainder_y, res;
73 UINT64 CYh, CY0L, T, valid_x, valid_y, valid_z;
74 int_double tempx, tempy;
75 int extra_digits, exponent_x, exponent_y, bin_expon_cx, bin_expon_cy,
76 bin_expon_product, rmode;
77 int digits_p, bp, final_exponent, exponent_z, digits_z, ez, ey,
78 scale_z, uf_status;
79
80 #if DECIMAL_CALL_BY_REFERENCE
81 #if !DECIMAL_GLOBAL_ROUNDING
82 _IDEC_round rnd_mode = *prnd_mode;
83 #endif
84 x = *px;
85 y = *py;
86 z = *pz;
87 #endif
88
89 valid_x = unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x);
90 valid_y = unpack_BID64 (&sign_y, &exponent_y, &coefficient_y, y);
91 valid_z = unpack_BID64 (&sign_z, &exponent_z, &coefficient_z, z);
92
93 // unpack arguments, check for NaN, Infinity, or 0
94 if (!valid_x || !valid_y || !valid_z) {
95
96 if ((y & MASK_NAN) == MASK_NAN) { // y is NAN
97 // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
98 // check first for non-canonical NaN payload
99 y = y & 0xfe03ffffffffffffull; // clear G6-G12
100 if ((y & 0x0003ffffffffffffull) > 999999999999999ull) {
101 y = y & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
102 }
103 if ((y & MASK_SNAN) == MASK_SNAN) { // y is SNAN
104 // set invalid flag
105 *pfpsf |= INVALID_EXCEPTION;
106 // return quiet (y)
107 res = y & 0xfdffffffffffffffull;
108 } else { // y is QNaN
109 // return y
110 res = y;
111 // if z = SNaN or x = SNaN signal invalid exception
112 if ((z & MASK_SNAN) == MASK_SNAN
113 || (x & MASK_SNAN) == MASK_SNAN) {
114 // set invalid flag
115 *pfpsf |= INVALID_EXCEPTION;
116 }
117 }
118 BID_RETURN (res)
119 } else if ((z & MASK_NAN) == MASK_NAN) { // z is NAN
120 // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
121 // check first for non-canonical NaN payload
122 z = z & 0xfe03ffffffffffffull; // clear G6-G12
123 if ((z & 0x0003ffffffffffffull) > 999999999999999ull) {
124 z = z & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
125 }
126 if ((z & MASK_SNAN) == MASK_SNAN) { // z is SNAN
127 // set invalid flag
128 *pfpsf |= INVALID_EXCEPTION;
129 // return quiet (z)
130 res = z & 0xfdffffffffffffffull;
131 } else { // z is QNaN
132 // return z
133 res = z;
134 // if x = SNaN signal invalid exception
135 if ((x & MASK_SNAN) == MASK_SNAN) {
136 // set invalid flag
137 *pfpsf |= INVALID_EXCEPTION;
138 }
139 }
140 BID_RETURN (res)
141 } else if ((x & MASK_NAN) == MASK_NAN) { // x is NAN
142 // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
143 // check first for non-canonical NaN payload
144 x = x & 0xfe03ffffffffffffull; // clear G6-G12
145 if ((x & 0x0003ffffffffffffull) > 999999999999999ull) {
146 x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits
147 }
148 if ((x & MASK_SNAN) == MASK_SNAN) { // x is SNAN
149 // set invalid flag
150 *pfpsf |= INVALID_EXCEPTION;
151 // return quiet (x)
152 res = x & 0xfdffffffffffffffull;
153 } else { // x is QNaN
154 // return x
155 res = x; // clear out G[6]-G[16]
156 }
157 BID_RETURN (res)
158 }
159
160 if (!valid_x) {
161 // x is Inf. or 0
162
163 // x is Infinity?
164 if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) {
165 // check if y is 0
166 if (!coefficient_y) {
167 // y==0, return NaN
168 #ifdef SET_STATUS_FLAGS
169 if ((z & 0x7e00000000000000ull) != 0x7c00000000000000ull)
170 __set_status_flags (pfpsf, INVALID_EXCEPTION);
171 #endif
172 BID_RETURN (0x7c00000000000000ull);
173 }
174 // test if z is Inf of oposite sign
175 if (((z & 0x7c00000000000000ull) == 0x7800000000000000ull)
176 && (((x ^ y) ^ z) & 0x8000000000000000ull)) {
177 // return NaN
178 #ifdef SET_STATUS_FLAGS
179 __set_status_flags (pfpsf, INVALID_EXCEPTION);
180 #endif
181 BID_RETURN (0x7c00000000000000ull);
182 }
183 // otherwise return +/-Inf
184 BID_RETURN (((x ^ y) & 0x8000000000000000ull) |
185 0x7800000000000000ull);
186 }
187 // x is 0
188 if (((y & 0x7800000000000000ull) != 0x7800000000000000ull)
189 && ((z & 0x7800000000000000ull) != 0x7800000000000000ull)) {
190
191 if (coefficient_z) {
192 exponent_y = exponent_x - DECIMAL_EXPONENT_BIAS + exponent_y;
193
194 sign_z = z & 0x8000000000000000ull;
195
196 if (exponent_y >= exponent_z)
197 BID_RETURN (z);
198 res =
199 add_zero64 (exponent_y, sign_z, exponent_z, coefficient_z,
200 &rnd_mode, pfpsf);
201 BID_RETURN (res);
202 }
203 }
204 }
205 if (!valid_y) {
206 // y is Inf. or 0
207
208 // y is Infinity?
209 if ((y & 0x7800000000000000ull) == 0x7800000000000000ull) {
210 // check if x is 0
211 if (!coefficient_x) {
212 // y==0, return NaN
213 #ifdef SET_STATUS_FLAGS
214 __set_status_flags (pfpsf, INVALID_EXCEPTION);
215 #endif
216 BID_RETURN (0x7c00000000000000ull);
217 }
218 // test if z is Inf of oposite sign
219 if (((z & 0x7c00000000000000ull) == 0x7800000000000000ull)
220 && (((x ^ y) ^ z) & 0x8000000000000000ull)) {
221 #ifdef SET_STATUS_FLAGS
222 __set_status_flags (pfpsf, INVALID_EXCEPTION);
223 #endif
224 // return NaN
225 BID_RETURN (0x7c00000000000000ull);
226 }
227 // otherwise return +/-Inf
228 BID_RETURN (((x ^ y) & 0x8000000000000000ull) |
229 0x7800000000000000ull);
230 }
231 // y is 0
232 if (((z & 0x7800000000000000ull) != 0x7800000000000000ull)) {
233
234 if (coefficient_z) {
235 exponent_y += exponent_x - DECIMAL_EXPONENT_BIAS;
236
237 sign_z = z & 0x8000000000000000ull;
238
239 if (exponent_y >= exponent_z)
240 BID_RETURN (z);
241 res =
242 add_zero64 (exponent_y, sign_z, exponent_z, coefficient_z,
243 &rnd_mode, pfpsf);
244 BID_RETURN (res);
245 }
246 }
247 }
248
249 if (!valid_z) {
250 // y is Inf. or 0
251
252 // test if y is NaN/Inf
253 if ((z & 0x7800000000000000ull) == 0x7800000000000000ull) {
254 BID_RETURN (coefficient_z & QUIET_MASK64);
255 }
256 // z is 0, return x*y
257 if ((!coefficient_x) || (!coefficient_y)) {
258 //0+/-0
259 exponent_x += exponent_y - DECIMAL_EXPONENT_BIAS;
260 if (exponent_x > DECIMAL_MAX_EXPON_64)
261 exponent_x = DECIMAL_MAX_EXPON_64;
262 else if (exponent_x < 0)
263 exponent_x = 0;
264 if (exponent_x <= exponent_z)
265 res = ((UINT64) exponent_x) << 53;
266 else
267 res = ((UINT64) exponent_z) << 53;
268 if ((sign_x ^ sign_y) == sign_z)
269 res |= sign_z;
270 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
271 #ifndef IEEE_ROUND_NEAREST
272 else if (rnd_mode == ROUNDING_DOWN)
273 res |= 0x8000000000000000ull;
274 #endif
275 #endif
276 BID_RETURN (res);
277 }
278 }
279 }
280
281 /* get binary coefficients of x and y */
282
283 //--- get number of bits in the coefficients of x and y ---
284 // version 2 (original)
285 tempx.d = (double) coefficient_x;
286 bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52);
287
288 tempy.d = (double) coefficient_y;
289 bin_expon_cy = ((tempy.i & MASK_BINARY_EXPONENT) >> 52);
290
291 // magnitude estimate for coefficient_x*coefficient_y is
292 // 2^(unbiased_bin_expon_cx + unbiased_bin_expon_cx)
293 bin_expon_product = bin_expon_cx + bin_expon_cy;
294
295 // check if coefficient_x*coefficient_y<2^(10*k+3)
296 // equivalent to unbiased_bin_expon_cx + unbiased_bin_expon_cx < 10*k+1
297 if (bin_expon_product < UPPER_EXPON_LIMIT + 2 * BINARY_EXPONENT_BIAS) {
298 // easy multiply
299 C64 = coefficient_x * coefficient_y;
300 final_exponent = exponent_x + exponent_y - DECIMAL_EXPONENT_BIAS;
301 if ((final_exponent > 0) || (!coefficient_z)) {
302 res =
303 get_add64 (sign_x ^ sign_y,
304 final_exponent, C64, sign_z, exponent_z, coefficient_z, rnd_mode, pfpsf);
305 BID_RETURN (res);
306 } else {
307 P.w[0] = C64;
308 P.w[1] = 0;
309 extra_digits = 0;
310 }
311 } else {
312 if (!coefficient_z) {
313 #if DECIMAL_CALL_BY_REFERENCE
314 bid64_mul (&res, px,
315 py _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
316 _EXC_INFO_ARG);
317 #else
318 res =
319 bid64_mul (x,
320 y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
321 _EXC_INFO_ARG);
322 #endif
323 BID_RETURN (res);
324 }
325 // get 128-bit product: coefficient_x*coefficient_y
326 __mul_64x64_to_128 (P, coefficient_x, coefficient_y);
327
328 // tighten binary range of P: leading bit is 2^bp
329 // unbiased_bin_expon_product <= bp <= unbiased_bin_expon_product+1
330 bin_expon_product -= 2 * BINARY_EXPONENT_BIAS;
331 __tight_bin_range_128 (bp, P, bin_expon_product);
332
333 // get number of decimal digits in the product
334 digits_p = estimate_decimal_digits[bp];
335 if (!(__unsigned_compare_gt_128 (power10_table_128[digits_p], P)))
336 digits_p++; // if power10_table_128[digits_p] <= P
337
338 // determine number of decimal digits to be rounded out
339 extra_digits = digits_p - MAX_FORMAT_DIGITS;
340 final_exponent =
341 exponent_x + exponent_y + extra_digits - DECIMAL_EXPONENT_BIAS;
342 }
343
344 if (((unsigned) final_exponent) >= 3 * 256) {
345 if (final_exponent < 0) {
346 //--- get number of bits in the coefficients of z ---
347 tempx.d = (double) coefficient_z;
348 bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
349 // get number of decimal digits in the coeff_x
350 digits_z = estimate_decimal_digits[bin_expon_cx];
351 if (coefficient_z >= power10_table_128[digits_z].w[0])
352 digits_z++;
353 // underflow
354 if ((final_exponent + 16 < 0)
355 || (exponent_z + digits_z > 33 + final_exponent)) {
356 res =
357 BID_normalize (sign_z, exponent_z, coefficient_z,
358 sign_x ^ sign_y, 1, rnd_mode, pfpsf);
359 BID_RETURN (res);
360 }
361
362 ez = exponent_z + digits_z - 16;
363 if (ez < 0)
364 ez = 0;
365 scale_z = exponent_z - ez;
366 coefficient_z *= power10_table_128[scale_z].w[0];
367 ey = final_exponent - extra_digits;
368 extra_digits = ez - ey;
369 if (extra_digits > 33) {
370 res =
371 BID_normalize (sign_z, exponent_z, coefficient_z,
372 sign_x ^ sign_y, 1, rnd_mode, pfpsf);
373 BID_RETURN (res);
374 }
375 //else // extra_digits<=32
376
377 if (extra_digits > 17) {
378 CYh = __truncate (P, 16);
379 // get remainder
380 T = power10_table_128[16].w[0];
381 __mul_64x64_to_64 (CY0L, CYh, T);
382 remainder_y = P.w[0] - CY0L;
383
384 extra_digits -= 16;
385 P.w[0] = CYh;
386 P.w[1] = 0;
387 } else
388 remainder_y = 0;
389
390 // align coeff_x, CYh
391 __mul_64x64_to_128 (CZ, coefficient_z,
392 power10_table_128[extra_digits].w[0]);
393
394 if (sign_z == (sign_y ^ sign_x)) {
395 __add_128_128 (CT, CZ, P);
396 if (__unsigned_compare_ge_128
397 (CT, power10_table_128[16 + extra_digits])) {
398 extra_digits++;
399 ez++;
400 }
401 } else {
402 if (remainder_y && (__unsigned_compare_ge_128 (CZ, P))) {
403 P.w[0]++;
404 if (!P.w[0])
405 P.w[1]++;
406 }
407 __sub_128_128 (CT, CZ, P);
408 if (((SINT64) CT.w[1]) < 0) {
409 sign_z = sign_y ^ sign_x;
410 CT.w[0] = 0 - CT.w[0];
411 CT.w[1] = 0 - CT.w[1];
412 if (CT.w[0])
413 CT.w[1]--;
414 } else if(!(CT.w[1]|CT.w[0]))
415 sign_z = (rnd_mode!=ROUNDING_DOWN)? 0: 0x8000000000000000ull;
416 if (ez
417 &&
418 (__unsigned_compare_gt_128
419 (power10_table_128[15 + extra_digits], CT))) {
420 extra_digits--;
421 ez--;
422 }
423 }
424
425 #ifdef SET_STATUS_FLAGS
426 uf_status = 0;
427 if ((!ez)
428 &&
429 __unsigned_compare_gt_128 (power10_table_128
430 [extra_digits + 15], CT)) {
431 rmode = rnd_mode;
432 if (sign_z && (unsigned) (rmode - 1) < 2)
433 rmode = 3 - rmode;
434 //__add_128_64(PU, CT, round_const_table[rmode][extra_digits]);
435 PU = power10_table_128[extra_digits + 15];
436 PU.w[0]--;
437 if (__unsigned_compare_gt_128 (PU, CT)
438 || (rmode == ROUNDING_DOWN)
439 || (rmode == ROUNDING_TO_ZERO))
440 uf_status = UNDERFLOW_EXCEPTION;
441 else if (extra_digits < 2) {
442 if ((rmode == ROUNDING_UP)) {
443 if (!extra_digits)
444 uf_status = UNDERFLOW_EXCEPTION;
445 else {
446 if (remainder_y && (sign_z != (sign_y ^ sign_x)))
447 remainder_y = power10_table_128[16].w[0] - remainder_y;
448
449 if (power10_table_128[15].w[0] > remainder_y)
450 uf_status = UNDERFLOW_EXCEPTION;
451 }
452 } else // RN or RN_away
453 {
454 if (remainder_y && (sign_z != (sign_y ^ sign_x)))
455 remainder_y = power10_table_128[16].w[0] - remainder_y;
456
457 if (!extra_digits) {
458 remainder_y += round_const_table[rmode][15];
459 if (remainder_y < power10_table_128[16].w[0])
460 uf_status = UNDERFLOW_EXCEPTION;
461 } else {
462 if (remainder_y < round_const_table[rmode][16])
463 uf_status = UNDERFLOW_EXCEPTION;
464 }
465 }
466 //__set_status_flags (pfpsf, uf_status);
467 }
468 }
469 #endif
470 res =
471 __bid_full_round64_remainder (sign_z, ez - extra_digits, CT,
472 extra_digits, remainder_y,
473 rnd_mode, pfpsf, uf_status);
474 BID_RETURN (res);
475
476 } else {
477 if ((sign_z == (sign_x ^ sign_y))
478 || (final_exponent > 3 * 256 + 15)) {
479 res =
480 fast_get_BID64_check_OF (sign_x ^ sign_y, final_exponent,
481 1000000000000000ull, rnd_mode,
482 pfpsf);
483 BID_RETURN (res);
484 }
485 }
486 }
487
488
489 if (extra_digits > 0) {
490 res =
491 get_add128 (sign_z, exponent_z, coefficient_z, sign_x ^ sign_y,
492 final_exponent, P, extra_digits, rnd_mode, pfpsf);
493 BID_RETURN (res);
494 }
495 // go to convert_format and exit
496 else {
497 C64 = __low_64 (P);
498
499 res =
500 get_add64 (sign_x ^ sign_y,
501 exponent_x + exponent_y - DECIMAL_EXPONENT_BIAS, C64,
502 sign_z, exponent_z, coefficient_z,
503 rnd_mode, pfpsf);
504 BID_RETURN (res);
505 }
506 }