68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
1 /* Compute x * y + z as ternary operation.
|
145
|
2 Copyright (C) 2010-2018 Free Software Foundation, Inc.
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
3 This file is part of the GNU C Library.
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
4 Contributed by Jakub Jelinek <jakub@redhat.com>, 2010.
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
5
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
6 The GNU C Library is free software; you can redistribute it and/or
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
7 modify it under the terms of the GNU Lesser General Public
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
8 License as published by the Free Software Foundation; either
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
9 version 2.1 of the License, or (at your option) any later version.
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
10
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
11 The GNU C Library is distributed in the hope that it will be useful,
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 Lesser General Public License for more details.
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
15
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
16 You should have received a copy of the GNU Lesser General Public
|
111
|
17 License along with the GNU C Library; if not, see
|
|
18 <http://www.gnu.org/licenses/>. */
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
19
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
20 #include "quadmath-imp.h"
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
21
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
22 /* This implementation uses rounding to odd to avoid problems with
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
23 double rounding. See a paper by Boldo and Melquiond:
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
24 http://www.lri.fr/~melquion/doc/08-tc.pdf */
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
25
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
26 __float128
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
27 fmaq (__float128 x, __float128 y, __float128 z)
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
28 {
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
29 ieee854_float128 u, v, w;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
30 int adjust = 0;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
31 u.value = x;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
32 v.value = y;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
33 w.value = z;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
34 if (__builtin_expect (u.ieee.exponent + v.ieee.exponent
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
35 >= 0x7fff + IEEE854_FLOAT128_BIAS
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
36 - FLT128_MANT_DIG, 0)
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
37 || __builtin_expect (u.ieee.exponent >= 0x7fff - FLT128_MANT_DIG, 0)
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
38 || __builtin_expect (v.ieee.exponent >= 0x7fff - FLT128_MANT_DIG, 0)
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
39 || __builtin_expect (w.ieee.exponent >= 0x7fff - FLT128_MANT_DIG, 0)
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
40 || __builtin_expect (u.ieee.exponent + v.ieee.exponent
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
41 <= IEEE854_FLOAT128_BIAS + FLT128_MANT_DIG, 0))
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
42 {
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
43 /* If z is Inf, but x and y are finite, the result should be
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
44 z rather than NaN. */
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
45 if (w.ieee.exponent == 0x7fff
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
46 && u.ieee.exponent != 0x7fff
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
47 && v.ieee.exponent != 0x7fff)
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
48 return (z + x) + y;
|
111
|
49 /* If z is zero and x are y are nonzero, compute the result
|
|
50 as x * y to avoid the wrong sign of a zero result if x * y
|
|
51 underflows to 0. */
|
|
52 if (z == 0 && x != 0 && y != 0)
|
|
53 return x * y;
|
|
54 /* If x or y or z is Inf/NaN, or if x * y is zero, compute as
|
|
55 x * y + z. */
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
56 if (u.ieee.exponent == 0x7fff
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
57 || v.ieee.exponent == 0x7fff
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
58 || w.ieee.exponent == 0x7fff
|
111
|
59 || x == 0
|
|
60 || y == 0)
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
61 return x * y + z;
|
111
|
62 /* If fma will certainly overflow, compute as x * y. */
|
|
63 if (u.ieee.exponent + v.ieee.exponent
|
|
64 > 0x7fff + IEEE854_FLOAT128_BIAS)
|
|
65 return x * y;
|
145
|
66 /* If x * y is less than 1/4 of FLT128_TRUE_MIN, neither the
|
111
|
67 result nor whether there is underflow depends on its exact
|
|
68 value, only on its sign. */
|
|
69 if (u.ieee.exponent + v.ieee.exponent
|
|
70 < IEEE854_FLOAT128_BIAS - FLT128_MANT_DIG - 2)
|
|
71 {
|
|
72 int neg = u.ieee.negative ^ v.ieee.negative;
|
|
73 __float128 tiny = neg ? -0x1p-16494Q : 0x1p-16494Q;
|
|
74 if (w.ieee.exponent >= 3)
|
|
75 return tiny + z;
|
|
76 /* Scaling up, adding TINY and scaling down produces the
|
|
77 correct result, because in round-to-nearest mode adding
|
|
78 TINY has no effect and in other modes double rounding is
|
|
79 harmless. But it may not produce required underflow
|
|
80 exceptions. */
|
|
81 v.value = z * 0x1p114Q + tiny;
|
|
82 if (TININESS_AFTER_ROUNDING
|
|
83 ? v.ieee.exponent < 115
|
|
84 : (w.ieee.exponent == 0
|
|
85 || (w.ieee.exponent == 1
|
|
86 && w.ieee.negative != neg
|
145
|
87 && w.ieee.mantissa3 == 0
|
|
88 && w.ieee.mantissa2 == 0
|
|
89 && w.ieee.mantissa1 == 0
|
|
90 && w.ieee.mantissa0 == 0)))
|
111
|
91 {
|
|
92 __float128 force_underflow = x * y;
|
|
93 math_force_eval (force_underflow);
|
|
94 }
|
|
95 return v.value * 0x1p-114Q;
|
|
96 }
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
97 if (u.ieee.exponent + v.ieee.exponent
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
98 >= 0x7fff + IEEE854_FLOAT128_BIAS - FLT128_MANT_DIG)
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
99 {
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
100 /* Compute 1p-113 times smaller result and multiply
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
101 at the end. */
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
102 if (u.ieee.exponent > v.ieee.exponent)
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
103 u.ieee.exponent -= FLT128_MANT_DIG;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
104 else
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
105 v.ieee.exponent -= FLT128_MANT_DIG;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
106 /* If x + y exponent is very large and z exponent is very small,
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
107 it doesn't matter if we don't adjust it. */
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
108 if (w.ieee.exponent > FLT128_MANT_DIG)
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
109 w.ieee.exponent -= FLT128_MANT_DIG;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
110 adjust = 1;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
111 }
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
112 else if (w.ieee.exponent >= 0x7fff - FLT128_MANT_DIG)
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
113 {
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
114 /* Similarly.
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
115 If z exponent is very large and x and y exponents are
|
111
|
116 very small, adjust them up to avoid spurious underflows,
|
|
117 rather than down. */
|
|
118 if (u.ieee.exponent + v.ieee.exponent
|
145
|
119 <= IEEE854_FLOAT128_BIAS + 2 * FLT128_MANT_DIG)
|
111
|
120 {
|
|
121 if (u.ieee.exponent > v.ieee.exponent)
|
|
122 u.ieee.exponent += 2 * FLT128_MANT_DIG + 2;
|
|
123 else
|
|
124 v.ieee.exponent += 2 * FLT128_MANT_DIG + 2;
|
|
125 }
|
|
126 else if (u.ieee.exponent > v.ieee.exponent)
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
127 {
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
128 if (u.ieee.exponent > FLT128_MANT_DIG)
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
129 u.ieee.exponent -= FLT128_MANT_DIG;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
130 }
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
131 else if (v.ieee.exponent > FLT128_MANT_DIG)
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
132 v.ieee.exponent -= FLT128_MANT_DIG;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
133 w.ieee.exponent -= FLT128_MANT_DIG;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
134 adjust = 1;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
135 }
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
136 else if (u.ieee.exponent >= 0x7fff - FLT128_MANT_DIG)
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
137 {
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
138 u.ieee.exponent -= FLT128_MANT_DIG;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
139 if (v.ieee.exponent)
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
140 v.ieee.exponent += FLT128_MANT_DIG;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
141 else
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
142 v.value *= 0x1p113Q;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
143 }
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
144 else if (v.ieee.exponent >= 0x7fff - FLT128_MANT_DIG)
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
145 {
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
146 v.ieee.exponent -= FLT128_MANT_DIG;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
147 if (u.ieee.exponent)
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
148 u.ieee.exponent += FLT128_MANT_DIG;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
149 else
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
150 u.value *= 0x1p113Q;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
151 }
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
152 else /* if (u.ieee.exponent + v.ieee.exponent
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
153 <= IEEE854_FLOAT128_BIAS + FLT128_MANT_DIG) */
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
154 {
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
155 if (u.ieee.exponent > v.ieee.exponent)
|
111
|
156 u.ieee.exponent += 2 * FLT128_MANT_DIG + 2;
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
157 else
|
111
|
158 v.ieee.exponent += 2 * FLT128_MANT_DIG + 2;
|
|
159 if (w.ieee.exponent <= 4 * FLT128_MANT_DIG + 6)
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
160 {
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
161 if (w.ieee.exponent)
|
111
|
162 w.ieee.exponent += 2 * FLT128_MANT_DIG + 2;
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
163 else
|
111
|
164 w.value *= 0x1p228Q;
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
165 adjust = -1;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
166 }
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
167 /* Otherwise x * y should just affect inexact
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
168 and nothing else. */
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
169 }
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
170 x = u.value;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
171 y = v.value;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
172 z = w.value;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
173 }
|
111
|
174
|
|
175 /* Ensure correct sign of exact 0 + 0. */
|
145
|
176 if (__glibc_unlikely ((x == 0 || y == 0) && z == 0))
|
111
|
177 {
|
|
178 x = math_opt_barrier (x);
|
|
179 return x * y + z;
|
|
180 }
|
|
181
|
|
182 fenv_t env;
|
|
183 feholdexcept (&env);
|
|
184 fesetround (FE_TONEAREST);
|
|
185
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
186 /* Multiplication m1 + m2 = x * y using Dekker's algorithm. */
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
187 #define C ((1LL << (FLT128_MANT_DIG + 1) / 2) + 1)
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
188 __float128 x1 = x * C;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
189 __float128 y1 = y * C;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
190 __float128 m1 = x * y;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
191 x1 = (x - x1) + x1;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
192 y1 = (y - y1) + y1;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
193 __float128 x2 = x - x1;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
194 __float128 y2 = y - y1;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
195 __float128 m2 = (((x1 * y1 - m1) + x1 * y2) + x2 * y1) + x2 * y2;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
196
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
197 /* Addition a1 + a2 = z + m1 using Knuth's algorithm. */
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
198 __float128 a1 = z + m1;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
199 __float128 t1 = a1 - z;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
200 __float128 t2 = a1 - t1;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
201 t1 = m1 - t1;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
202 t2 = z - t2;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
203 __float128 a2 = t1 + t2;
|
111
|
204 /* Ensure the arithmetic is not scheduled after feclearexcept call. */
|
|
205 math_force_eval (m2);
|
|
206 math_force_eval (a2);
|
|
207 feclearexcept (FE_INEXACT);
|
|
208
|
|
209 /* If the result is an exact zero, ensure it has the correct sign. */
|
|
210 if (a1 == 0 && m2 == 0)
|
|
211 {
|
|
212 feupdateenv (&env);
|
|
213 /* Ensure that round-to-nearest value of z + m1 is not reused. */
|
|
214 z = math_opt_barrier (z);
|
|
215 return z + m1;
|
|
216 }
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
217
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
218 fesetround (FE_TOWARDZERO);
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
219 /* Perform m2 + a2 addition with round to odd. */
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
220 u.value = a2 + m2;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
221
|
145
|
222 if (__glibc_likely (adjust == 0))
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
223 {
|
145
|
224 if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
|
|
225 u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
226 feupdateenv (&env);
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
227 /* Result is a1 + u.value. */
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
228 return a1 + u.value;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
229 }
|
145
|
230 else if (__glibc_likely (adjust > 0))
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
231 {
|
145
|
232 if ((u.ieee.mantissa3 & 1) == 0 && u.ieee.exponent != 0x7fff)
|
|
233 u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
234 feupdateenv (&env);
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
235 /* Result is a1 + u.value, scaled up. */
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
236 return (a1 + u.value) * 0x1p113Q;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
237 }
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
238 else
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
239 {
|
145
|
240 if ((u.ieee.mantissa3 & 1) == 0)
|
|
241 u.ieee.mantissa3 |= fetestexcept (FE_INEXACT) != 0;
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
242 v.value = a1 + u.value;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
243 /* Ensure the addition is not scheduled after fetestexcept call. */
|
145
|
244 math_force_eval (v.value);
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
245 int j = fetestexcept (FE_INEXACT) != 0;
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
246 feupdateenv (&env);
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
247 /* Ensure the following computations are performed in default rounding
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
248 mode instead of just reusing the round to zero computation. */
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
249 asm volatile ("" : "=m" (u) : "m" (u));
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
250 /* If a1 + u.value is exact, the only rounding happens during
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
251 scaling down. */
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
252 if (j == 0)
|
111
|
253 return v.value * 0x1p-228Q;
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
254 /* If result rounded to zero is not subnormal, no double
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
255 rounding will occur. */
|
111
|
256 if (v.ieee.exponent > 228)
|
|
257 return (a1 + u.value) * 0x1p-228Q;
|
145
|
258 /* If v.value * 0x1p-228L with round to zero is a subnormal above
|
|
259 or equal to FLT128_MIN / 2, then v.value * 0x1p-228L shifts mantissa
|
|
260 down just by 1 bit, which means v.ieee.mantissa3 |= j would
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
261 change the round bit, not sticky or guard bit.
|
145
|
262 v.value * 0x1p-228L never normalizes by shifting up,
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
263 so round bit plus sticky bit should be already enough
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
264 for proper rounding. */
|
111
|
265 if (v.ieee.exponent == 228)
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
266 {
|
111
|
267 /* If the exponent would be in the normal range when
|
|
268 rounding to normal precision with unbounded exponent
|
|
269 range, the exact result is known and spurious underflows
|
|
270 must be avoided on systems detecting tininess after
|
|
271 rounding. */
|
|
272 if (TININESS_AFTER_ROUNDING)
|
|
273 {
|
|
274 w.value = a1 + u.value;
|
|
275 if (w.ieee.exponent == 229)
|
|
276 return w.value * 0x1p-228Q;
|
|
277 }
|
145
|
278 /* v.ieee.mantissa3 & 2 is LSB bit of the result before rounding,
|
|
279 v.ieee.mantissa3 & 1 is the round bit and j is our sticky
|
|
280 bit. */
|
|
281 w.value = 0;
|
|
282 w.ieee.mantissa3 = ((v.ieee.mantissa3 & 3) << 1) | j;
|
111
|
283 w.ieee.negative = v.ieee.negative;
|
145
|
284 v.ieee.mantissa3 &= ~3U;
|
111
|
285 v.value *= 0x1p-228Q;
|
|
286 w.value *= 0x1p-2Q;
|
|
287 return v.value + w.value;
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
288 }
|
145
|
289 v.ieee.mantissa3 |= j;
|
111
|
290 return v.value * 0x1p-228Q;
|
68
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
291 }
|
Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
292 }
|