comparison libquadmath/math/fmaq.c @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents 561a7518be6b
children 1830386684a0
comparison
equal deleted inserted replaced
68:561a7518be6b 111:04ced10e8804
1 /* Compute x * y + z as ternary operation. 1 /* Compute x * y + z as ternary operation.
2 Copyright (C) 2010 Free Software Foundation, Inc. 2 Copyright (C) 2010-2017 Free Software Foundation, Inc.
3 This file is part of the GNU C Library. 3 This file is part of the GNU C Library.
4 Contributed by Jakub Jelinek <jakub@redhat.com>, 2010. 4 Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
5 5
6 The GNU C Library is free software; you can redistribute it and/or 6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public 7 modify it under the terms of the GNU Lesser General Public
12 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details. 14 Lesser General Public License for more details.
15 15
16 You should have received a copy of the GNU Lesser General Public 16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free 17 License along with the GNU C Library; if not, see
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 18 <http://www.gnu.org/licenses/>. */
19 02111-1307 USA. */
20 19
21 #include "quadmath-imp.h" 20 #include "quadmath-imp.h"
22 #include <math.h> 21 #include <math.h>
23 #include <float.h> 22 #include <float.h>
24 #ifdef HAVE_FENV_H 23 #ifdef HAVE_FENV_H
55 z rather than NaN. */ 54 z rather than NaN. */
56 if (w.ieee.exponent == 0x7fff 55 if (w.ieee.exponent == 0x7fff
57 && u.ieee.exponent != 0x7fff 56 && u.ieee.exponent != 0x7fff
58 && v.ieee.exponent != 0x7fff) 57 && v.ieee.exponent != 0x7fff)
59 return (z + x) + y; 58 return (z + x) + y;
60 /* If x or y or z is Inf/NaN, or if fma will certainly overflow, 59 /* If z is zero and x are y are nonzero, compute the result
61 or if x * y is less than half of FLT128_DENORM_MIN, 60 as x * y to avoid the wrong sign of a zero result if x * y
62 compute as x * y + z. */ 61 underflows to 0. */
62 if (z == 0 && x != 0 && y != 0)
63 return x * y;
64 /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
65 x * y + z. */
63 if (u.ieee.exponent == 0x7fff 66 if (u.ieee.exponent == 0x7fff
64 || v.ieee.exponent == 0x7fff 67 || v.ieee.exponent == 0x7fff
65 || w.ieee.exponent == 0x7fff 68 || w.ieee.exponent == 0x7fff
66 || u.ieee.exponent + v.ieee.exponent 69 || x == 0
67 > 0x7fff + IEEE854_FLOAT128_BIAS 70 || y == 0)
68 || u.ieee.exponent + v.ieee.exponent
69 < IEEE854_FLOAT128_BIAS - FLT128_MANT_DIG - 2)
70 return x * y + z; 71 return x * y + z;
72 /* If fma will certainly overflow, compute as x * y. */
73 if (u.ieee.exponent + v.ieee.exponent
74 > 0x7fff + IEEE854_FLOAT128_BIAS)
75 return x * y;
76 /* If x * y is less than 1/4 of FLT128_DENORM_MIN, neither the
77 result nor whether there is underflow depends on its exact
78 value, only on its sign. */
79 if (u.ieee.exponent + v.ieee.exponent
80 < IEEE854_FLOAT128_BIAS - FLT128_MANT_DIG - 2)
81 {
82 int neg = u.ieee.negative ^ v.ieee.negative;
83 __float128 tiny = neg ? -0x1p-16494Q : 0x1p-16494Q;
84 if (w.ieee.exponent >= 3)
85 return tiny + z;
86 /* Scaling up, adding TINY and scaling down produces the
87 correct result, because in round-to-nearest mode adding
88 TINY has no effect and in other modes double rounding is
89 harmless. But it may not produce required underflow
90 exceptions. */
91 v.value = z * 0x1p114Q + tiny;
92 if (TININESS_AFTER_ROUNDING
93 ? v.ieee.exponent < 115
94 : (w.ieee.exponent == 0
95 || (w.ieee.exponent == 1
96 && w.ieee.negative != neg
97 && w.ieee.mant_low == 0
98 && w.ieee.mant_high == 0)))
99 {
100 __float128 force_underflow = x * y;
101 math_force_eval (force_underflow);
102 }
103 return v.value * 0x1p-114Q;
104 }
71 if (u.ieee.exponent + v.ieee.exponent 105 if (u.ieee.exponent + v.ieee.exponent
72 >= 0x7fff + IEEE854_FLOAT128_BIAS - FLT128_MANT_DIG) 106 >= 0x7fff + IEEE854_FLOAT128_BIAS - FLT128_MANT_DIG)
73 { 107 {
74 /* Compute 1p-113 times smaller result and multiply 108 /* Compute 1p-113 times smaller result and multiply
75 at the end. */ 109 at the end. */
85 } 119 }
86 else if (w.ieee.exponent >= 0x7fff - FLT128_MANT_DIG) 120 else if (w.ieee.exponent >= 0x7fff - FLT128_MANT_DIG)
87 { 121 {
88 /* Similarly. 122 /* Similarly.
89 If z exponent is very large and x and y exponents are 123 If z exponent is very large and x and y exponents are
90 very small, it doesn't matter if we don't adjust it. */ 124 very small, adjust them up to avoid spurious underflows,
91 if (u.ieee.exponent > v.ieee.exponent) 125 rather than down. */
126 if (u.ieee.exponent + v.ieee.exponent
127 <= IEEE854_FLOAT128_BIAS + FLT128_MANT_DIG)
128 {
129 if (u.ieee.exponent > v.ieee.exponent)
130 u.ieee.exponent += 2 * FLT128_MANT_DIG + 2;
131 else
132 v.ieee.exponent += 2 * FLT128_MANT_DIG + 2;
133 }
134 else if (u.ieee.exponent > v.ieee.exponent)
92 { 135 {
93 if (u.ieee.exponent > FLT128_MANT_DIG) 136 if (u.ieee.exponent > FLT128_MANT_DIG)
94 u.ieee.exponent -= FLT128_MANT_DIG; 137 u.ieee.exponent -= FLT128_MANT_DIG;
95 } 138 }
96 else if (v.ieee.exponent > FLT128_MANT_DIG) 139 else if (v.ieee.exponent > FLT128_MANT_DIG)
116 } 159 }
117 else /* if (u.ieee.exponent + v.ieee.exponent 160 else /* if (u.ieee.exponent + v.ieee.exponent
118 <= IEEE854_FLOAT128_BIAS + FLT128_MANT_DIG) */ 161 <= IEEE854_FLOAT128_BIAS + FLT128_MANT_DIG) */
119 { 162 {
120 if (u.ieee.exponent > v.ieee.exponent) 163 if (u.ieee.exponent > v.ieee.exponent)
121 u.ieee.exponent += 2 * FLT128_MANT_DIG; 164 u.ieee.exponent += 2 * FLT128_MANT_DIG + 2;
122 else 165 else
123 v.ieee.exponent += 2 * FLT128_MANT_DIG; 166 v.ieee.exponent += 2 * FLT128_MANT_DIG + 2;
124 if (w.ieee.exponent <= 4 * FLT128_MANT_DIG + 4) 167 if (w.ieee.exponent <= 4 * FLT128_MANT_DIG + 6)
125 { 168 {
126 if (w.ieee.exponent) 169 if (w.ieee.exponent)
127 w.ieee.exponent += 2 * FLT128_MANT_DIG; 170 w.ieee.exponent += 2 * FLT128_MANT_DIG + 2;
128 else 171 else
129 w.value *= 0x1p226Q; 172 w.value *= 0x1p228Q;
130 adjust = -1; 173 adjust = -1;
131 } 174 }
132 /* Otherwise x * y should just affect inexact 175 /* Otherwise x * y should just affect inexact
133 and nothing else. */ 176 and nothing else. */
134 } 177 }
135 x = u.value; 178 x = u.value;
136 y = v.value; 179 y = v.value;
137 z = w.value; 180 z = w.value;
138 } 181 }
182
183 /* Ensure correct sign of exact 0 + 0. */
184 if (__builtin_expect ((x == 0 || y == 0) && z == 0, 0))
185 {
186 x = math_opt_barrier (x);
187 return x * y + z;
188 }
189
190 #ifdef USE_FENV_H
191 fenv_t env;
192 feholdexcept (&env);
193 fesetround (FE_TONEAREST);
194 #endif
195
139 /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */ 196 /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
140 #define C ((1LL << (FLT128_MANT_DIG + 1) / 2) + 1) 197 #define C ((1LL << (FLT128_MANT_DIG + 1) / 2) + 1)
141 __float128 x1 = x * C; 198 __float128 x1 = x * C;
142 __float128 y1 = y * C; 199 __float128 y1 = y * C;
143 __float128 m1 = x * y; 200 __float128 m1 = x * y;
152 __float128 t1 = a1 - z; 209 __float128 t1 = a1 - z;
153 __float128 t2 = a1 - t1; 210 __float128 t2 = a1 - t1;
154 t1 = m1 - t1; 211 t1 = m1 - t1;
155 t2 = z - t2; 212 t2 = z - t2;
156 __float128 a2 = t1 + t2; 213 __float128 a2 = t1 + t2;
157 214 /* Ensure the arithmetic is not scheduled after feclearexcept call. */
158 #ifdef USE_FENV_H 215 math_force_eval (m2);
159 fenv_t env; 216 math_force_eval (a2);
160 feholdexcept (&env); 217 #ifdef USE_FENV_H
218 feclearexcept (FE_INEXACT);
219 #endif
220
221 /* If the result is an exact zero, ensure it has the correct sign. */
222 if (a1 == 0 && m2 == 0)
223 {
224 #ifdef USE_FENV_H
225 feupdateenv (&env);
226 #endif
227 /* Ensure that round-to-nearest value of z + m1 is not reused. */
228 z = math_opt_barrier (z);
229 return z + m1;
230 }
231
232 #ifdef USE_FENV_H
161 fesetround (FE_TOWARDZERO); 233 fesetround (FE_TOWARDZERO);
162 #endif 234 #endif
163 /* Perform m2 + a2 addition with round to odd. */ 235 /* Perform m2 + a2 addition with round to odd. */
164 u.value = a2 + m2; 236 u.value = a2 + m2;
165 237
189 if ((u.ieee.mant_low & 1) == 0) 261 if ((u.ieee.mant_low & 1) == 0)
190 u.ieee.mant_low |= fetestexcept (FE_INEXACT) != 0; 262 u.ieee.mant_low |= fetestexcept (FE_INEXACT) != 0;
191 #endif 263 #endif
192 v.value = a1 + u.value; 264 v.value = a1 + u.value;
193 /* Ensure the addition is not scheduled after fetestexcept call. */ 265 /* Ensure the addition is not scheduled after fetestexcept call. */
194 asm volatile ("" : : "m" (v)); 266 asm volatile ("" : : "m" (v.value));
195 #ifdef USE_FENV_H 267 #ifdef USE_FENV_H
196 int j = fetestexcept (FE_INEXACT) != 0; 268 int j = fetestexcept (FE_INEXACT) != 0;
197 feupdateenv (&env); 269 feupdateenv (&env);
198 #else 270 #else
199 int j = 0; 271 int j = 0;
202 mode instead of just reusing the round to zero computation. */ 274 mode instead of just reusing the round to zero computation. */
203 asm volatile ("" : "=m" (u) : "m" (u)); 275 asm volatile ("" : "=m" (u) : "m" (u));
204 /* If a1 + u.value is exact, the only rounding happens during 276 /* If a1 + u.value is exact, the only rounding happens during
205 scaling down. */ 277 scaling down. */
206 if (j == 0) 278 if (j == 0)
207 return v.value * 0x1p-226Q; 279 return v.value * 0x1p-228Q;
208 /* If result rounded to zero is not subnormal, no double 280 /* If result rounded to zero is not subnormal, no double
209 rounding will occur. */ 281 rounding will occur. */
210 if (v.ieee.exponent > 226) 282 if (v.ieee.exponent > 228)
211 return (a1 + u.value) * 0x1p-226Q; 283 return (a1 + u.value) * 0x1p-228Q;
212 /* If v.value * 0x1p-226Q with round to zero is a subnormal above 284 /* If v.value * 0x1p-228Q with round to zero is a subnormal above
213 or equal to FLT128_MIN / 2, then v.value * 0x1p-226Q shifts mantissa 285 or equal to FLT128_MIN / 2, then v.value * 0x1p-228Q shifts mantissa
214 down just by 1 bit, which means v.ieee.mant_low |= j would 286 down just by 1 bit, which means v.ieee.mant_low |= j would
215 change the round bit, not sticky or guard bit. 287 change the round bit, not sticky or guard bit.
216 v.value * 0x1p-226Q never normalizes by shifting up, 288 v.value * 0x1p-228Q never normalizes by shifting up,
217 so round bit plus sticky bit should be already enough 289 so round bit plus sticky bit should be already enough
218 for proper rounding. */ 290 for proper rounding. */
219 if (v.ieee.exponent == 226) 291 if (v.ieee.exponent == 228)
220 { 292 {
293 /* If the exponent would be in the normal range when
294 rounding to normal precision with unbounded exponent
295 range, the exact result is known and spurious underflows
296 must be avoided on systems detecting tininess after
297 rounding. */
298 if (TININESS_AFTER_ROUNDING)
299 {
300 w.value = a1 + u.value;
301 if (w.ieee.exponent == 229)
302 return w.value * 0x1p-228Q;
303 }
221 /* v.ieee.mant_low & 2 is LSB bit of the result before rounding, 304 /* v.ieee.mant_low & 2 is LSB bit of the result before rounding,
222 v.ieee.mant_low & 1 is the round bit and j is our sticky 305 v.ieee.mant_low & 1 is the round bit and j is our sticky
223 bit. In round-to-nearest 001 rounds down like 00, 306 bit. */
224 011 rounds up, even though 01 rounds down (thus we need 307 w.value = 0.0Q;
225 to adjust), 101 rounds down like 10 and 111 rounds up 308 w.ieee.mant_low = ((v.ieee.mant_low & 3) << 1) | j;
226 like 11. */ 309 w.ieee.negative = v.ieee.negative;
227 if ((v.ieee.mant_low & 3) == 1) 310 v.ieee.mant_low &= ~3U;
228 { 311 v.value *= 0x1p-228Q;
229 v.value *= 0x1p-226Q; 312 w.value *= 0x1p-2Q;
230 if (v.ieee.negative) 313 return v.value + w.value;
231 return v.value - 0x1p-16494Q /* __FLT128_DENORM_MIN__ */;
232 else
233 return v.value + 0x1p-16494Q /* __FLT128_DENORM_MIN__ */;
234 }
235 else
236 return v.value * 0x1p-226Q;
237 } 314 }
238 v.ieee.mant_low |= j; 315 v.ieee.mant_low |= j;
239 return v.value * 0x1p-226Q; 316 return v.value * 0x1p-228Q;
240 } 317 }
241 } 318 }