Mercurial > hg > CbC > CbC_gcc
diff libquadmath/math/fmaq.c @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | 561a7518be6b |
children | 1830386684a0 |
line wrap: on
line diff
--- a/libquadmath/math/fmaq.c Sun Aug 21 07:07:55 2011 +0900 +++ b/libquadmath/math/fmaq.c Fri Oct 27 22:46:09 2017 +0900 @@ -1,5 +1,5 @@ /* Compute x * y + z as ternary operation. - Copyright (C) 2010 Free Software Foundation, Inc. + Copyright (C) 2010-2017 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Jakub Jelinek <jakub@redhat.com>, 2010. @@ -14,9 +14,8 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ #include "quadmath-imp.h" #include <math.h> @@ -57,17 +56,52 @@ && u.ieee.exponent != 0x7fff && v.ieee.exponent != 0x7fff) return (z + x) + y; - /* If x or y or z is Inf/NaN, or if fma will certainly overflow, - or if x * y is less than half of FLT128_DENORM_MIN, - compute as x * y + z. */ + /* If z is zero and x are y are nonzero, compute the result + as x * y to avoid the wrong sign of a zero result if x * y + underflows to 0. */ + if (z == 0 && x != 0 && y != 0) + return x * y; + /* If x or y or z is Inf/NaN, or if x * y is zero, compute as + x * y + z. */ if (u.ieee.exponent == 0x7fff || v.ieee.exponent == 0x7fff || w.ieee.exponent == 0x7fff - || u.ieee.exponent + v.ieee.exponent - > 0x7fff + IEEE854_FLOAT128_BIAS - || u.ieee.exponent + v.ieee.exponent - < IEEE854_FLOAT128_BIAS - FLT128_MANT_DIG - 2) + || x == 0 + || y == 0) return x * y + z; + /* If fma will certainly overflow, compute as x * y. */ + if (u.ieee.exponent + v.ieee.exponent + > 0x7fff + IEEE854_FLOAT128_BIAS) + return x * y; + /* If x * y is less than 1/4 of FLT128_DENORM_MIN, neither the + result nor whether there is underflow depends on its exact + value, only on its sign. */ + if (u.ieee.exponent + v.ieee.exponent + < IEEE854_FLOAT128_BIAS - FLT128_MANT_DIG - 2) + { + int neg = u.ieee.negative ^ v.ieee.negative; + __float128 tiny = neg ? -0x1p-16494Q : 0x1p-16494Q; + if (w.ieee.exponent >= 3) + return tiny + z; + /* Scaling up, adding TINY and scaling down produces the + correct result, because in round-to-nearest mode adding + TINY has no effect and in other modes double rounding is + harmless. But it may not produce required underflow + exceptions. */ + v.value = z * 0x1p114Q + tiny; + if (TININESS_AFTER_ROUNDING + ? v.ieee.exponent < 115 + : (w.ieee.exponent == 0 + || (w.ieee.exponent == 1 + && w.ieee.negative != neg + && w.ieee.mant_low == 0 + && w.ieee.mant_high == 0))) + { + __float128 force_underflow = x * y; + math_force_eval (force_underflow); + } + return v.value * 0x1p-114Q; + } if (u.ieee.exponent + v.ieee.exponent >= 0x7fff + IEEE854_FLOAT128_BIAS - FLT128_MANT_DIG) { @@ -87,8 +121,17 @@ { /* Similarly. If z exponent is very large and x and y exponents are - very small, it doesn't matter if we don't adjust it. */ - if (u.ieee.exponent > v.ieee.exponent) + very small, adjust them up to avoid spurious underflows, + rather than down. */ + if (u.ieee.exponent + v.ieee.exponent + <= IEEE854_FLOAT128_BIAS + FLT128_MANT_DIG) + { + if (u.ieee.exponent > v.ieee.exponent) + u.ieee.exponent += 2 * FLT128_MANT_DIG + 2; + else + v.ieee.exponent += 2 * FLT128_MANT_DIG + 2; + } + else if (u.ieee.exponent > v.ieee.exponent) { if (u.ieee.exponent > FLT128_MANT_DIG) u.ieee.exponent -= FLT128_MANT_DIG; @@ -118,15 +161,15 @@ <= IEEE854_FLOAT128_BIAS + FLT128_MANT_DIG) */ { if (u.ieee.exponent > v.ieee.exponent) - u.ieee.exponent += 2 * FLT128_MANT_DIG; + u.ieee.exponent += 2 * FLT128_MANT_DIG + 2; else - v.ieee.exponent += 2 * FLT128_MANT_DIG; - if (w.ieee.exponent <= 4 * FLT128_MANT_DIG + 4) + v.ieee.exponent += 2 * FLT128_MANT_DIG + 2; + if (w.ieee.exponent <= 4 * FLT128_MANT_DIG + 6) { if (w.ieee.exponent) - w.ieee.exponent += 2 * FLT128_MANT_DIG; + w.ieee.exponent += 2 * FLT128_MANT_DIG + 2; else - w.value *= 0x1p226Q; + w.value *= 0x1p228Q; adjust = -1; } /* Otherwise x * y should just affect inexact @@ -136,6 +179,20 @@ y = v.value; z = w.value; } + + /* Ensure correct sign of exact 0 + 0. */ + if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0)) + { + x = math_opt_barrier (x); + return x * y + z; + } + +#ifdef USE_FENV_H + fenv_t env; + feholdexcept (&env); + fesetround (FE_TONEAREST); +#endif + /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ #define C ((1LL << (FLT128_MANT_DIG + 1) / 2) + 1) __float128 x1 = x * C; @@ -154,10 +211,25 @@ t1 = m1 - t1; t2 = z - t2; __float128 a2 = t1 + t2; + /* Ensure the arithmetic is not scheduled after feclearexcept call. */ + math_force_eval (m2); + math_force_eval (a2); +#ifdef USE_FENV_H + feclearexcept (FE_INEXACT); +#endif + + /* If the result is an exact zero, ensure it has the correct sign. */ + if (a1 == 0 && m2 == 0) + { +#ifdef USE_FENV_H + feupdateenv (&env); +#endif + /* Ensure that round-to-nearest value of z + m1 is not reused. */ + z = math_opt_barrier (z); + return z + m1; + } #ifdef USE_FENV_H - fenv_t env; - feholdexcept (&env); fesetround (FE_TOWARDZERO); #endif /* Perform m2 + a2 addition with round to odd. */ @@ -191,7 +263,7 @@ #endif v.value = a1 + u.value; /* Ensure the addition is not scheduled after fetestexcept call. */ - asm volatile ("" : : "m" (v)); + asm volatile ("" : : "m" (v.value)); #ifdef USE_FENV_H int j = fetestexcept (FE_INEXACT) != 0; feupdateenv (&env); @@ -204,38 +276,43 @@ /* If a1 + u.value is exact, the only rounding happens during scaling down. */ if (j == 0) - return v.value * 0x1p-226Q; + return v.value * 0x1p-228Q; /* If result rounded to zero is not subnormal, no double rounding will occur. */ - if (v.ieee.exponent > 226) - return (a1 + u.value) * 0x1p-226Q; - /* If v.value * 0x1p-226Q with round to zero is a subnormal above - or equal to FLT128_MIN / 2, then v.value * 0x1p-226Q shifts mantissa + if (v.ieee.exponent > 228) + return (a1 + u.value) * 0x1p-228Q; + /* If v.value * 0x1p-228Q with round to zero is a subnormal above + or equal to FLT128_MIN / 2, then v.value * 0x1p-228Q shifts mantissa down just by 1 bit, which means v.ieee.mant_low |= j would change the round bit, not sticky or guard bit. - v.value * 0x1p-226Q never normalizes by shifting up, + v.value * 0x1p-228Q never normalizes by shifting up, so round bit plus sticky bit should be already enough for proper rounding. */ - if (v.ieee.exponent == 226) + if (v.ieee.exponent == 228) { + /* If the exponent would be in the normal range when + rounding to normal precision with unbounded exponent + range, the exact result is known and spurious underflows + must be avoided on systems detecting tininess after + rounding. */ + if (TININESS_AFTER_ROUNDING) + { + w.value = a1 + u.value; + if (w.ieee.exponent == 229) + return w.value * 0x1p-228Q; + } /* v.ieee.mant_low & 2 is LSB bit of the result before rounding, v.ieee.mant_low & 1 is the round bit and j is our sticky - bit. In round-to-nearest 001 rounds down like 00, - 011 rounds up, even though 01 rounds down (thus we need - to adjust), 101 rounds down like 10 and 111 rounds up - like 11. */ - if ((v.ieee.mant_low & 3) == 1) - { - v.value *= 0x1p-226Q; - if (v.ieee.negative) - return v.value - 0x1p-16494Q /* __FLT128_DENORM_MIN__ */; - else - return v.value + 0x1p-16494Q /* __FLT128_DENORM_MIN__ */; - } - else - return v.value * 0x1p-226Q; + bit. */ + w.value = 0.0Q; + w.ieee.mant_low = ((v.ieee.mant_low & 3) << 1) | j; + w.ieee.negative = v.ieee.negative; + v.ieee.mant_low &= ~3U; + v.value *= 0x1p-228Q; + w.value *= 0x1p-2Q; + return v.value + w.value; } v.ieee.mant_low |= j; - return v.value * 0x1p-226Q; + return v.value * 0x1p-228Q; } }