0
|
1 /* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
|
|
2
|
|
3 This file is part of GCC.
|
|
4
|
|
5 GCC is free software; you can redistribute it and/or modify it under
|
|
6 the terms of the GNU General Public License as published by the Free
|
|
7 Software Foundation; either version 3, or (at your option) any later
|
|
8 version.
|
|
9
|
|
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
13 for more details.
|
|
14
|
|
15 Under Section 7 of GPL version 3, you are granted additional
|
|
16 permissions described in the GCC Runtime Library Exception, version
|
|
17 3.1, as published by the Free Software Foundation.
|
|
18
|
|
19 You should have received a copy of the GNU General Public License and
|
|
20 a copy of the GCC Runtime Library Exception along with this program;
|
|
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
22 <http://www.gnu.org/licenses/>. */
|
|
23
|
|
24 #define BID_128RES
|
|
25 #include "bid_div_macros.h"
|
|
26 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
27 #include <fenv.h>
|
|
28
|
|
29 #define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT
|
|
30 #endif
|
|
31
|
|
32 extern UINT32 convert_table[5][128][2];
|
|
33 extern SINT8 factors[][2];
|
|
34 extern UINT8 packed_10000_zeros[];
|
|
35
|
|
36 BID128_FUNCTION_ARG2 (bid128_div, x, y)
|
|
37
|
|
38 UINT256 CA4, CA4r, P256;
|
|
39 UINT128 CX, CY, T128, CQ, CR, CA, TP128, Qh, Ql, res;
|
|
40 UINT64 sign_x, sign_y, T, carry64, D, Q_high, Q_low, QX, PD,
|
|
41 valid_y;
|
|
42 int_float fx, fy, f64;
|
|
43 UINT32 QX32, tdigit[3], digit, digit_h, digit_low;
|
|
44 int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2,
|
|
45 digits_q, amount;
|
|
46 int nzeros, i, j, k, d5;
|
|
47 unsigned rmode;
|
|
48 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
49 fexcept_t binaryflags = 0;
|
|
50 #endif
|
|
51
|
|
52 valid_y = unpack_BID128_value (&sign_y, &exponent_y, &CY, y);
|
|
53
|
|
54 // unpack arguments, check for NaN or Infinity
|
|
55 if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
|
|
56 // test if x is NaN
|
|
57 if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
|
|
58 #ifdef SET_STATUS_FLAGS
|
|
59 if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull || // sNaN
|
|
60 (y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull)
|
|
61 __set_status_flags (pfpsf, INVALID_EXCEPTION);
|
|
62 #endif
|
|
63 res.w[1] = (CX.w[1]) & QUIET_MASK64;
|
|
64 res.w[0] = CX.w[0];
|
|
65 BID_RETURN (res);
|
|
66 }
|
|
67 // x is Infinity?
|
|
68 if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
|
|
69 // check if y is Inf.
|
|
70 if (((y.w[1] & 0x7c00000000000000ull) == 0x7800000000000000ull))
|
|
71 // return NaN
|
|
72 {
|
|
73 #ifdef SET_STATUS_FLAGS
|
|
74 __set_status_flags (pfpsf, INVALID_EXCEPTION);
|
|
75 #endif
|
|
76 res.w[1] = 0x7c00000000000000ull;
|
|
77 res.w[0] = 0;
|
|
78 BID_RETURN (res);
|
|
79 }
|
|
80 // y is NaN?
|
|
81 if (((y.w[1] & 0x7c00000000000000ull) != 0x7c00000000000000ull))
|
|
82 // return NaN
|
|
83 {
|
|
84 // return +/-Inf
|
|
85 res.w[1] = ((x.w[1] ^ y.w[1]) & 0x8000000000000000ull) |
|
|
86 0x7800000000000000ull;
|
|
87 res.w[0] = 0;
|
|
88 BID_RETURN (res);
|
|
89 }
|
|
90 }
|
|
91 // x is 0
|
|
92 if ((y.w[1] & 0x7800000000000000ull) < 0x7800000000000000ull) {
|
|
93 if ((!CY.w[0]) && !(CY.w[1] & 0x0001ffffffffffffull)) {
|
|
94 #ifdef SET_STATUS_FLAGS
|
|
95 __set_status_flags (pfpsf, INVALID_EXCEPTION);
|
|
96 #endif
|
|
97 // x=y=0, return NaN
|
|
98 res.w[1] = 0x7c00000000000000ull;
|
|
99 res.w[0] = 0;
|
|
100 BID_RETURN (res);
|
|
101 }
|
|
102 // return 0
|
|
103 res.w[1] = (x.w[1] ^ y.w[1]) & 0x8000000000000000ull;
|
|
104 exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128;
|
|
105 if (exponent_x > DECIMAL_MAX_EXPON_128)
|
|
106 exponent_x = DECIMAL_MAX_EXPON_128;
|
|
107 else if (exponent_x < 0)
|
|
108 exponent_x = 0;
|
|
109 res.w[1] |= (((UINT64) exponent_x) << 49);
|
|
110 res.w[0] = 0;
|
|
111 BID_RETURN (res);
|
|
112 }
|
|
113 }
|
|
114 if (!valid_y) {
|
|
115 // y is Inf. or NaN
|
|
116
|
|
117 // test if y is NaN
|
|
118 if ((y.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
|
|
119 #ifdef SET_STATUS_FLAGS
|
|
120 if ((y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN
|
|
121 __set_status_flags (pfpsf, INVALID_EXCEPTION);
|
|
122 #endif
|
|
123 res.w[1] = CY.w[1] & QUIET_MASK64;
|
|
124 res.w[0] = CY.w[0];
|
|
125 BID_RETURN (res);
|
|
126 }
|
|
127 // y is Infinity?
|
|
128 if ((y.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
|
|
129 // return +/-0
|
|
130 res.w[1] = sign_x ^ sign_y;
|
|
131 res.w[0] = 0;
|
|
132 BID_RETURN (res);
|
|
133 }
|
|
134 // y is 0, return +/-Inf
|
|
135 #ifdef SET_STATUS_FLAGS
|
|
136 __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION);
|
|
137 #endif
|
|
138 res.w[1] =
|
|
139 ((x.w[1] ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull;
|
|
140 res.w[0] = 0;
|
|
141 BID_RETURN (res);
|
|
142 }
|
|
143 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
144 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
145 #endif
|
|
146 diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128;
|
|
147
|
|
148 if (__unsigned_compare_gt_128 (CY, CX)) {
|
|
149 // CX < CY
|
|
150
|
|
151 // 2^64
|
|
152 f64.i = 0x5f800000;
|
|
153
|
|
154 // fx ~ CX, fy ~ CY
|
|
155 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
|
|
156 fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0];
|
|
157 // expon_cy - expon_cx
|
|
158 bin_index = (fy.i - fx.i) >> 23;
|
|
159
|
|
160 if (CX.w[1]) {
|
|
161 T = power10_index_binexp_128[bin_index].w[0];
|
|
162 __mul_64x128_short (CA, T, CX);
|
|
163 } else {
|
|
164 T128 = power10_index_binexp_128[bin_index];
|
|
165 __mul_64x128_short (CA, CX.w[0], T128);
|
|
166 }
|
|
167
|
|
168 ed2 = 33;
|
|
169 if (__unsigned_compare_gt_128 (CY, CA))
|
|
170 ed2++;
|
|
171
|
|
172 T128 = power10_table_128[ed2];
|
|
173 __mul_128x128_to_256 (CA4, CA, T128);
|
|
174
|
|
175 ed2 += estimate_decimal_digits[bin_index];
|
|
176 CQ.w[0] = CQ.w[1] = 0;
|
|
177 diff_expon = diff_expon - ed2;
|
|
178
|
|
179 } else {
|
|
180 // get CQ = CX/CY
|
|
181 __div_128_by_128 (&CQ, &CR, CX, CY);
|
|
182
|
|
183 if (!CR.w[1] && !CR.w[0]) {
|
|
184 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode,
|
|
185 pfpsf);
|
|
186 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
187 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
188 #endif
|
|
189 BID_RETURN (res);
|
|
190 }
|
|
191 // get number of decimal digits in CQ
|
|
192 // 2^64
|
|
193 f64.i = 0x5f800000;
|
|
194 fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0];
|
|
195 // binary expon. of CQ
|
|
196 bin_expon = (fx.i - 0x3f800000) >> 23;
|
|
197
|
|
198 digits_q = estimate_decimal_digits[bin_expon];
|
|
199 TP128.w[0] = power10_index_binexp_128[bin_expon].w[0];
|
|
200 TP128.w[1] = power10_index_binexp_128[bin_expon].w[1];
|
|
201 if (__unsigned_compare_ge_128 (CQ, TP128))
|
|
202 digits_q++;
|
|
203
|
|
204 ed2 = 34 - digits_q;
|
|
205 T128.w[0] = power10_table_128[ed2].w[0];
|
|
206 T128.w[1] = power10_table_128[ed2].w[1];
|
|
207 __mul_128x128_to_256 (CA4, CR, T128);
|
|
208 diff_expon = diff_expon - ed2;
|
|
209 __mul_128x128_low (CQ, CQ, T128);
|
|
210
|
|
211 }
|
|
212
|
|
213 __div_256_by_128 (&CQ, &CA4, CY);
|
|
214
|
|
215 #ifdef SET_STATUS_FLAGS
|
|
216 if (CA4.w[0] || CA4.w[1]) {
|
|
217 // set status flags
|
|
218 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
|
|
219 }
|
|
220 #ifndef LEAVE_TRAILING_ZEROS
|
|
221 else
|
|
222 #endif
|
|
223 #else
|
|
224 #ifndef LEAVE_TRAILING_ZEROS
|
|
225 if (!CA4.w[0] && !CA4.w[1])
|
|
226 #endif
|
|
227 #endif
|
|
228 #ifndef LEAVE_TRAILING_ZEROS
|
|
229 // check whether result is exact
|
|
230 {
|
|
231 // check whether CX, CY are short
|
|
232 if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) {
|
|
233 i = (int) CY.w[0] - 1;
|
|
234 j = (int) CX.w[0] - 1;
|
|
235 // difference in powers of 2 factors for Y and X
|
|
236 nzeros = ed2 - factors[i][0] + factors[j][0];
|
|
237 // difference in powers of 5 factors
|
|
238 d5 = ed2 - factors[i][1] + factors[j][1];
|
|
239 if (d5 < nzeros)
|
|
240 nzeros = d5;
|
|
241 // get P*(2^M[extra_digits])/10^extra_digits
|
|
242 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
|
|
243
|
|
244 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
|
|
245 amount = recip_scale[nzeros];
|
|
246 __shr_128_long (CQ, Qh, amount);
|
|
247
|
|
248 diff_expon += nzeros;
|
|
249 } else {
|
|
250 // decompose Q as Qh*10^17 + Ql
|
|
251 //T128 = reciprocals10_128[17];
|
|
252 T128.w[0] = 0x44909befeb9fad49ull;
|
|
253 T128.w[1] = 0x000b877aa3236a4bull;
|
|
254 __mul_128x128_to_256 (P256, CQ, T128);
|
|
255 //amount = recip_scale[17];
|
|
256 Q_high = (P256.w[2] >> 44) | (P256.w[3] << (64 - 44));
|
|
257 Q_low = CQ.w[0] - Q_high * 100000000000000000ull;
|
|
258
|
|
259 if (!Q_low) {
|
|
260 diff_expon += 17;
|
|
261
|
|
262 tdigit[0] = Q_high & 0x3ffffff;
|
|
263 tdigit[1] = 0;
|
|
264 QX = Q_high >> 26;
|
|
265 QX32 = QX;
|
|
266 nzeros = 0;
|
|
267
|
|
268 for (j = 0; QX32; j++, QX32 >>= 7) {
|
|
269 k = (QX32 & 127);
|
|
270 tdigit[0] += convert_table[j][k][0];
|
|
271 tdigit[1] += convert_table[j][k][1];
|
|
272 if (tdigit[0] >= 100000000) {
|
|
273 tdigit[0] -= 100000000;
|
|
274 tdigit[1]++;
|
|
275 }
|
|
276 }
|
|
277
|
|
278 if (tdigit[1] >= 100000000) {
|
|
279 tdigit[1] -= 100000000;
|
|
280 if (tdigit[1] >= 100000000)
|
|
281 tdigit[1] -= 100000000;
|
|
282 }
|
|
283
|
|
284 digit = tdigit[0];
|
|
285 if (!digit && !tdigit[1])
|
|
286 nzeros += 16;
|
|
287 else {
|
|
288 if (!digit) {
|
|
289 nzeros += 8;
|
|
290 digit = tdigit[1];
|
|
291 }
|
|
292 // decompose digit
|
|
293 PD = (UINT64) digit *0x068DB8BBull;
|
|
294 digit_h = (UINT32) (PD >> 40);
|
|
295 digit_low = digit - digit_h * 10000;
|
|
296
|
|
297 if (!digit_low)
|
|
298 nzeros += 4;
|
|
299 else
|
|
300 digit_h = digit_low;
|
|
301
|
|
302 if (!(digit_h & 1))
|
|
303 nzeros +=
|
|
304 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
|
|
305 (digit_h & 7));
|
|
306 }
|
|
307
|
|
308 if (nzeros) {
|
|
309 __mul_64x64_to_128 (CQ, Q_high, reciprocals10_64[nzeros]);
|
|
310
|
|
311 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-64
|
|
312 amount = short_recip_scale[nzeros];
|
|
313 CQ.w[0] = CQ.w[1] >> amount;
|
|
314 } else
|
|
315 CQ.w[0] = Q_high;
|
|
316 CQ.w[1] = 0;
|
|
317
|
|
318 diff_expon += nzeros;
|
|
319 } else {
|
|
320 tdigit[0] = Q_low & 0x3ffffff;
|
|
321 tdigit[1] = 0;
|
|
322 QX = Q_low >> 26;
|
|
323 QX32 = QX;
|
|
324 nzeros = 0;
|
|
325
|
|
326 for (j = 0; QX32; j++, QX32 >>= 7) {
|
|
327 k = (QX32 & 127);
|
|
328 tdigit[0] += convert_table[j][k][0];
|
|
329 tdigit[1] += convert_table[j][k][1];
|
|
330 if (tdigit[0] >= 100000000) {
|
|
331 tdigit[0] -= 100000000;
|
|
332 tdigit[1]++;
|
|
333 }
|
|
334 }
|
|
335
|
|
336 if (tdigit[1] >= 100000000) {
|
|
337 tdigit[1] -= 100000000;
|
|
338 if (tdigit[1] >= 100000000)
|
|
339 tdigit[1] -= 100000000;
|
|
340 }
|
|
341
|
|
342 digit = tdigit[0];
|
|
343 if (!digit && !tdigit[1])
|
|
344 nzeros += 16;
|
|
345 else {
|
|
346 if (!digit) {
|
|
347 nzeros += 8;
|
|
348 digit = tdigit[1];
|
|
349 }
|
|
350 // decompose digit
|
|
351 PD = (UINT64) digit *0x068DB8BBull;
|
|
352 digit_h = (UINT32) (PD >> 40);
|
|
353 digit_low = digit - digit_h * 10000;
|
|
354
|
|
355 if (!digit_low)
|
|
356 nzeros += 4;
|
|
357 else
|
|
358 digit_h = digit_low;
|
|
359
|
|
360 if (!(digit_h & 1))
|
|
361 nzeros +=
|
|
362 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
|
|
363 (digit_h & 7));
|
|
364 }
|
|
365
|
|
366 if (nzeros) {
|
|
367 // get P*(2^M[extra_digits])/10^extra_digits
|
|
368 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
|
|
369
|
|
370 //now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
|
|
371 amount = recip_scale[nzeros];
|
|
372 __shr_128 (CQ, Qh, amount);
|
|
373 }
|
|
374 diff_expon += nzeros;
|
|
375
|
|
376 }
|
|
377 }
|
|
378 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf);
|
|
379 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
380 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
381 #endif
|
|
382 BID_RETURN (res);
|
|
383 }
|
|
384 #endif
|
|
385
|
|
386 if (diff_expon >= 0) {
|
|
387 #ifdef IEEE_ROUND_NEAREST
|
|
388 // rounding
|
|
389 // 2*CA4 - CY
|
|
390 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
|
|
391 CA4r.w[0] = CA4.w[0] + CA4.w[0];
|
|
392 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
|
|
393 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
|
|
394
|
|
395 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
|
|
396 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
|
|
397
|
|
398 CQ.w[0] += carry64;
|
|
399 if (CQ.w[0] < carry64)
|
|
400 CQ.w[1]++;
|
|
401 #else
|
|
402 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
403 // rounding
|
|
404 // 2*CA4 - CY
|
|
405 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
|
|
406 CA4r.w[0] = CA4.w[0] + CA4.w[0];
|
|
407 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
|
|
408 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
|
|
409
|
|
410 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
|
|
411 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
|
|
412
|
|
413 CQ.w[0] += carry64;
|
|
414 if (CQ.w[0] < carry64)
|
|
415 CQ.w[1]++;
|
|
416 #else
|
|
417 rmode = rnd_mode;
|
|
418 if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2)
|
|
419 rmode = 3 - rmode;
|
|
420 switch (rmode) {
|
|
421 case ROUNDING_TO_NEAREST: // round to nearest code
|
|
422 // rounding
|
|
423 // 2*CA4 - CY
|
|
424 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
|
|
425 CA4r.w[0] = CA4.w[0] + CA4.w[0];
|
|
426 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
|
|
427 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
|
|
428 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
|
|
429 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
|
|
430 CQ.w[0] += carry64;
|
|
431 if (CQ.w[0] < carry64)
|
|
432 CQ.w[1]++;
|
|
433 break;
|
|
434 case ROUNDING_TIES_AWAY:
|
|
435 // rounding
|
|
436 // 2*CA4 - CY
|
|
437 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
|
|
438 CA4r.w[0] = CA4.w[0] + CA4.w[0];
|
|
439 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
|
|
440 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
|
|
441 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
|
|
442 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
|
|
443 CQ.w[0] += carry64;
|
|
444 if (CQ.w[0] < carry64)
|
|
445 CQ.w[1]++;
|
|
446 break;
|
|
447 case ROUNDING_DOWN:
|
|
448 case ROUNDING_TO_ZERO:
|
|
449 break;
|
|
450 default: // rounding up
|
|
451 CQ.w[0]++;
|
|
452 if (!CQ.w[0])
|
|
453 CQ.w[1]++;
|
|
454 break;
|
|
455 }
|
|
456 #endif
|
|
457 #endif
|
|
458
|
|
459 } else {
|
|
460 #ifdef SET_STATUS_FLAGS
|
|
461 if (CA4.w[0] || CA4.w[1]) {
|
|
462 // set status flags
|
|
463 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
|
|
464 }
|
|
465 #endif
|
|
466
|
|
467 handle_UF_128_rem (&res, sign_x ^ sign_y, diff_expon, CQ,
|
|
468 CA4.w[1] | CA4.w[0], &rnd_mode, pfpsf);
|
|
469 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
470 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
471 #endif
|
|
472 BID_RETURN (res);
|
|
473
|
|
474 }
|
|
475
|
|
476 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf);
|
|
477 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
478 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
479 #endif
|
|
480 BID_RETURN (res);
|
|
481 }
|
|
482
|
|
483
|
|
484 //#define LEAVE_TRAILING_ZEROS
|
|
485
|
|
486 TYPE0_FUNCTION_ARGTYPE1_ARGTYPE2 (UINT128, bid128dd_div, UINT64, x,
|
|
487 UINT64, y)
|
|
488
|
|
489 UINT256 CA4, CA4r, P256;
|
|
490 UINT128 CX, CY, T128, CQ, CR, CA, TP128, Qh, Ql, res;
|
|
491 UINT64 sign_x, sign_y, T, carry64, D, Q_high, Q_low, QX, PD,
|
|
492 valid_y;
|
|
493 int_float fx, fy, f64;
|
|
494 UINT32 QX32, tdigit[3], digit, digit_h, digit_low;
|
|
495 int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2,
|
|
496 digits_q, amount;
|
|
497 int nzeros, i, j, k, d5;
|
|
498 unsigned rmode;
|
|
499 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
500 fexcept_t binaryflags = 0;
|
|
501 #endif
|
|
502
|
|
503 valid_y = unpack_BID64 (&sign_y, &exponent_y, &CY.w[0], y);
|
|
504
|
|
505 // unpack arguments, check for NaN or Infinity
|
|
506 CX.w[1] = 0;
|
|
507 if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], (x))) {
|
|
508 #ifdef SET_STATUS_FLAGS
|
|
509 if ((y & SNAN_MASK64) == SNAN_MASK64) // y is sNaN
|
|
510 __set_status_flags (pfpsf, INVALID_EXCEPTION);
|
|
511 #endif
|
|
512
|
|
513 // test if x is NaN
|
|
514 if ((x & NAN_MASK64) == NAN_MASK64) {
|
|
515 #ifdef SET_STATUS_FLAGS
|
|
516 if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN
|
|
517 __set_status_flags (pfpsf, INVALID_EXCEPTION);
|
|
518 #endif
|
|
519 res.w[0] = (CX.w[0] & 0x0003ffffffffffffull);
|
|
520 __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]);
|
|
521 res.w[1] |= ((CX.w[0]) & 0xfc00000000000000ull);
|
|
522 BID_RETURN (res);
|
|
523 }
|
|
524 // x is Infinity?
|
|
525 if (((x) & 0x7800000000000000ull) == 0x7800000000000000ull) {
|
|
526 // check if y is Inf.
|
|
527 if ((((y) & 0x7c00000000000000ull) == 0x7800000000000000ull))
|
|
528 // return NaN
|
|
529 {
|
|
530 #ifdef SET_STATUS_FLAGS
|
|
531 __set_status_flags (pfpsf, INVALID_EXCEPTION);
|
|
532 #endif
|
|
533 res.w[1] = 0x7c00000000000000ull;
|
|
534 res.w[0] = 0;
|
|
535 BID_RETURN (res);
|
|
536 }
|
|
537 if ((((y) & 0x7c00000000000000ull) != 0x7c00000000000000ull)) {
|
|
538 // otherwise return +/-Inf
|
|
539 res.w[1] =
|
|
540 (((x) ^ (y)) & 0x8000000000000000ull) | 0x7800000000000000ull;
|
|
541 res.w[0] = 0;
|
|
542 BID_RETURN (res);
|
|
543 }
|
|
544 }
|
|
545 // x is 0
|
|
546 if ((((y) & 0x7800000000000000ull) != 0x7800000000000000ull)) {
|
|
547 if(!CY.w[0]) {
|
|
548 #ifdef SET_STATUS_FLAGS
|
|
549 __set_status_flags (pfpsf, INVALID_EXCEPTION);
|
|
550 #endif
|
|
551 // x=y=0, return NaN
|
|
552 res.w[1] = 0x7c00000000000000ull;
|
|
553 res.w[0] = 0;
|
|
554 BID_RETURN (res);
|
|
555 }
|
|
556 // return 0
|
|
557 res.w[1] = ((x) ^ (y)) & 0x8000000000000000ull;
|
|
558 if (((y) & 0x6000000000000000ull) == 0x6000000000000000ull)
|
|
559 exponent_y = ((UINT32) ((y) >> 51)) & 0x3ff;
|
|
560 else
|
|
561 exponent_y = ((UINT32) ((y) >> 53)) & 0x3ff;
|
|
562 exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128;
|
|
563 if (exponent_x > DECIMAL_MAX_EXPON_128)
|
|
564 exponent_x = DECIMAL_MAX_EXPON_128;
|
|
565 else if (exponent_x < 0)
|
|
566 exponent_x = 0;
|
|
567 res.w[1] |= (((UINT64) exponent_x) << 49);
|
|
568 res.w[0] = 0;
|
|
569 BID_RETURN (res);
|
|
570 }
|
|
571 }
|
|
572
|
|
573 CY.w[1] = 0;
|
|
574 if (!valid_y) {
|
|
575 // y is Inf. or NaN
|
|
576
|
|
577 // test if y is NaN
|
|
578 if ((y & NAN_MASK64) == NAN_MASK64) {
|
|
579 #ifdef SET_STATUS_FLAGS
|
|
580 if ((y & SNAN_MASK64) == SNAN_MASK64) // sNaN
|
|
581 __set_status_flags (pfpsf, INVALID_EXCEPTION);
|
|
582 #endif
|
|
583 res.w[0] = (CY.w[0] & 0x0003ffffffffffffull);
|
|
584 __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]);
|
|
585 res.w[1] |= ((CY.w[0]) & 0xfc00000000000000ull);
|
|
586 BID_RETURN (res);
|
|
587 }
|
|
588 // y is Infinity?
|
|
589 if (((y) & 0x7800000000000000ull) == 0x7800000000000000ull) {
|
|
590 // return +/-0
|
|
591 res.w[1] = sign_x ^ sign_y;
|
|
592 res.w[0] = 0;
|
|
593 BID_RETURN (res);
|
|
594 }
|
|
595 // y is 0, return +/-Inf
|
|
596 res.w[1] =
|
|
597 (((x) ^ (y)) & 0x8000000000000000ull) | 0x7800000000000000ull;
|
|
598 res.w[0] = 0;
|
|
599 #ifdef SET_STATUS_FLAGS
|
|
600 __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION);
|
|
601 #endif
|
|
602 BID_RETURN (res);
|
|
603 }
|
|
604 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
605 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
606 #endif
|
|
607 diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128;
|
|
608
|
|
609 if (__unsigned_compare_gt_128 (CY, CX)) {
|
|
610 // CX < CY
|
|
611
|
|
612 // 2^64
|
|
613 f64.i = 0x5f800000;
|
|
614
|
|
615 // fx ~ CX, fy ~ CY
|
|
616 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
|
|
617 fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0];
|
|
618 // expon_cy - expon_cx
|
|
619 bin_index = (fy.i - fx.i) >> 23;
|
|
620
|
|
621 if (CX.w[1]) {
|
|
622 T = power10_index_binexp_128[bin_index].w[0];
|
|
623 __mul_64x128_short (CA, T, CX);
|
|
624 } else {
|
|
625 T128 = power10_index_binexp_128[bin_index];
|
|
626 __mul_64x128_short (CA, CX.w[0], T128);
|
|
627 }
|
|
628
|
|
629 ed2 = 33;
|
|
630 if (__unsigned_compare_gt_128 (CY, CA))
|
|
631 ed2++;
|
|
632
|
|
633 T128 = power10_table_128[ed2];
|
|
634 __mul_128x128_to_256 (CA4, CA, T128);
|
|
635
|
|
636 ed2 += estimate_decimal_digits[bin_index];
|
|
637 CQ.w[0] = CQ.w[1] = 0;
|
|
638 diff_expon = diff_expon - ed2;
|
|
639
|
|
640 } else {
|
|
641 // get CQ = CX/CY
|
|
642 __div_128_by_128 (&CQ, &CR, CX, CY);
|
|
643
|
|
644 if (!CR.w[1] && !CR.w[0]) {
|
|
645 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode,
|
|
646 pfpsf);
|
|
647 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
648 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
649 #endif
|
|
650 BID_RETURN (res);
|
|
651 }
|
|
652 // get number of decimal digits in CQ
|
|
653 // 2^64
|
|
654 f64.i = 0x5f800000;
|
|
655 fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0];
|
|
656 // binary expon. of CQ
|
|
657 bin_expon = (fx.i - 0x3f800000) >> 23;
|
|
658
|
|
659 digits_q = estimate_decimal_digits[bin_expon];
|
|
660 TP128.w[0] = power10_index_binexp_128[bin_expon].w[0];
|
|
661 TP128.w[1] = power10_index_binexp_128[bin_expon].w[1];
|
|
662 if (__unsigned_compare_ge_128 (CQ, TP128))
|
|
663 digits_q++;
|
|
664
|
|
665 ed2 = 34 - digits_q;
|
|
666 T128.w[0] = power10_table_128[ed2].w[0];
|
|
667 T128.w[1] = power10_table_128[ed2].w[1];
|
|
668 __mul_128x128_to_256 (CA4, CR, T128);
|
|
669 diff_expon = diff_expon - ed2;
|
|
670 __mul_128x128_low (CQ, CQ, T128);
|
|
671
|
|
672 }
|
|
673
|
|
674 __div_256_by_128 (&CQ, &CA4, CY);
|
|
675
|
|
676
|
|
677 #ifdef SET_STATUS_FLAGS
|
|
678 if (CA4.w[0] || CA4.w[1]) {
|
|
679 // set status flags
|
|
680 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
|
|
681 }
|
|
682 #ifndef LEAVE_TRAILING_ZEROS
|
|
683 else
|
|
684 #endif
|
|
685 #else
|
|
686 #ifndef LEAVE_TRAILING_ZEROS
|
|
687 if (!CA4.w[0] && !CA4.w[1])
|
|
688 #endif
|
|
689 #endif
|
|
690 #ifndef LEAVE_TRAILING_ZEROS
|
|
691 // check whether result is exact
|
|
692 {
|
|
693 // check whether CX, CY are short
|
|
694 if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) {
|
|
695 i = (int) CY.w[0] - 1;
|
|
696 j = (int) CX.w[0] - 1;
|
|
697 // difference in powers of 2 factors for Y and X
|
|
698 nzeros = ed2 - factors[i][0] + factors[j][0];
|
|
699 // difference in powers of 5 factors
|
|
700 d5 = ed2 - factors[i][1] + factors[j][1];
|
|
701 if (d5 < nzeros)
|
|
702 nzeros = d5;
|
|
703 // get P*(2^M[extra_digits])/10^extra_digits
|
|
704 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
|
|
705 //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2];
|
|
706
|
|
707 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
|
|
708 amount = recip_scale[nzeros];
|
|
709 __shr_128_long (CQ, Qh, amount);
|
|
710
|
|
711 diff_expon += nzeros;
|
|
712 } else {
|
|
713 // decompose Q as Qh*10^17 + Ql
|
|
714 //T128 = reciprocals10_128[17];
|
|
715 T128.w[0] = 0x44909befeb9fad49ull;
|
|
716 T128.w[1] = 0x000b877aa3236a4bull;
|
|
717 __mul_128x128_to_256 (P256, CQ, T128);
|
|
718 //amount = recip_scale[17];
|
|
719 Q_high = (P256.w[2] >> 44) | (P256.w[3] << (64 - 44));
|
|
720 Q_low = CQ.w[0] - Q_high * 100000000000000000ull;
|
|
721
|
|
722 if (!Q_low) {
|
|
723 diff_expon += 17;
|
|
724
|
|
725 tdigit[0] = Q_high & 0x3ffffff;
|
|
726 tdigit[1] = 0;
|
|
727 QX = Q_high >> 26;
|
|
728 QX32 = QX;
|
|
729 nzeros = 0;
|
|
730
|
|
731 for (j = 0; QX32; j++, QX32 >>= 7) {
|
|
732 k = (QX32 & 127);
|
|
733 tdigit[0] += convert_table[j][k][0];
|
|
734 tdigit[1] += convert_table[j][k][1];
|
|
735 if (tdigit[0] >= 100000000) {
|
|
736 tdigit[0] -= 100000000;
|
|
737 tdigit[1]++;
|
|
738 }
|
|
739 }
|
|
740
|
|
741
|
|
742 if (tdigit[1] >= 100000000) {
|
|
743 tdigit[1] -= 100000000;
|
|
744 if (tdigit[1] >= 100000000)
|
|
745 tdigit[1] -= 100000000;
|
|
746 }
|
|
747
|
|
748 digit = tdigit[0];
|
|
749 if (!digit && !tdigit[1])
|
|
750 nzeros += 16;
|
|
751 else {
|
|
752 if (!digit) {
|
|
753 nzeros += 8;
|
|
754 digit = tdigit[1];
|
|
755 }
|
|
756 // decompose digit
|
|
757 PD = (UINT64) digit *0x068DB8BBull;
|
|
758 digit_h = (UINT32) (PD >> 40);
|
|
759 digit_low = digit - digit_h * 10000;
|
|
760
|
|
761 if (!digit_low)
|
|
762 nzeros += 4;
|
|
763 else
|
|
764 digit_h = digit_low;
|
|
765
|
|
766 if (!(digit_h & 1))
|
|
767 nzeros +=
|
|
768 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
|
|
769 (digit_h & 7));
|
|
770 }
|
|
771
|
|
772 if (nzeros) {
|
|
773 __mul_64x64_to_128 (CQ, Q_high, reciprocals10_64[nzeros]);
|
|
774
|
|
775 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-64
|
|
776 amount = short_recip_scale[nzeros];
|
|
777 CQ.w[0] = CQ.w[1] >> amount;
|
|
778 } else
|
|
779 CQ.w[0] = Q_high;
|
|
780 CQ.w[1] = 0;
|
|
781
|
|
782 diff_expon += nzeros;
|
|
783 } else {
|
|
784 tdigit[0] = Q_low & 0x3ffffff;
|
|
785 tdigit[1] = 0;
|
|
786 QX = Q_low >> 26;
|
|
787 QX32 = QX;
|
|
788 nzeros = 0;
|
|
789
|
|
790 for (j = 0; QX32; j++, QX32 >>= 7) {
|
|
791 k = (QX32 & 127);
|
|
792 tdigit[0] += convert_table[j][k][0];
|
|
793 tdigit[1] += convert_table[j][k][1];
|
|
794 if (tdigit[0] >= 100000000) {
|
|
795 tdigit[0] -= 100000000;
|
|
796 tdigit[1]++;
|
|
797 }
|
|
798 }
|
|
799
|
|
800 if (tdigit[1] >= 100000000) {
|
|
801 tdigit[1] -= 100000000;
|
|
802 if (tdigit[1] >= 100000000)
|
|
803 tdigit[1] -= 100000000;
|
|
804 }
|
|
805
|
|
806 digit = tdigit[0];
|
|
807 if (!digit && !tdigit[1])
|
|
808 nzeros += 16;
|
|
809 else {
|
|
810 if (!digit) {
|
|
811 nzeros += 8;
|
|
812 digit = tdigit[1];
|
|
813 }
|
|
814 // decompose digit
|
|
815 PD = (UINT64) digit *0x068DB8BBull;
|
|
816 digit_h = (UINT32) (PD >> 40);
|
|
817 digit_low = digit - digit_h * 10000;
|
|
818
|
|
819 if (!digit_low)
|
|
820 nzeros += 4;
|
|
821 else
|
|
822 digit_h = digit_low;
|
|
823
|
|
824 if (!(digit_h & 1))
|
|
825 nzeros +=
|
|
826 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
|
|
827 (digit_h & 7));
|
|
828 }
|
|
829
|
|
830 if (nzeros) {
|
|
831 // get P*(2^M[extra_digits])/10^extra_digits
|
|
832 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
|
|
833
|
|
834 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
|
|
835 amount = recip_scale[nzeros];
|
|
836 __shr_128 (CQ, Qh, amount);
|
|
837 }
|
|
838 diff_expon += nzeros;
|
|
839
|
|
840 }
|
|
841 }
|
|
842 get_BID128(&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode,pfpsf);
|
|
843 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
844 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
845 #endif
|
|
846 BID_RETURN (res);
|
|
847 }
|
|
848 #endif
|
|
849
|
|
850 if (diff_expon >= 0) {
|
|
851 #ifdef IEEE_ROUND_NEAREST
|
|
852 // rounding
|
|
853 // 2*CA4 - CY
|
|
854 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
|
|
855 CA4r.w[0] = CA4.w[0] + CA4.w[0];
|
|
856 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
|
|
857 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
|
|
858
|
|
859 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
|
|
860 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
|
|
861
|
|
862 CQ.w[0] += carry64;
|
|
863 if (CQ.w[0] < carry64)
|
|
864 CQ.w[1]++;
|
|
865 #else
|
|
866 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
867 // rounding
|
|
868 // 2*CA4 - CY
|
|
869 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
|
|
870 CA4r.w[0] = CA4.w[0] + CA4.w[0];
|
|
871 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
|
|
872 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
|
|
873
|
|
874 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
|
|
875 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
|
|
876
|
|
877 CQ.w[0] += carry64;
|
|
878 if (CQ.w[0] < carry64)
|
|
879 CQ.w[1]++;
|
|
880 #else
|
|
881 rmode = rnd_mode;
|
|
882 if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2)
|
|
883 rmode = 3 - rmode;
|
|
884 switch (rmode) {
|
|
885 case ROUNDING_TO_NEAREST: // round to nearest code
|
|
886 // rounding
|
|
887 // 2*CA4 - CY
|
|
888 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
|
|
889 CA4r.w[0] = CA4.w[0] + CA4.w[0];
|
|
890 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
|
|
891 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
|
|
892 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
|
|
893 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
|
|
894 CQ.w[0] += carry64;
|
|
895 if (CQ.w[0] < carry64)
|
|
896 CQ.w[1]++;
|
|
897 break;
|
|
898 case ROUNDING_TIES_AWAY:
|
|
899 // rounding
|
|
900 // 2*CA4 - CY
|
|
901 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
|
|
902 CA4r.w[0] = CA4.w[0] + CA4.w[0];
|
|
903 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
|
|
904 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
|
|
905 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
|
|
906 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
|
|
907 CQ.w[0] += carry64;
|
|
908 if (CQ.w[0] < carry64)
|
|
909 CQ.w[1]++;
|
|
910 break;
|
|
911 case ROUNDING_DOWN:
|
|
912 case ROUNDING_TO_ZERO:
|
|
913 break;
|
|
914 default: // rounding up
|
|
915 CQ.w[0]++;
|
|
916 if (!CQ.w[0])
|
|
917 CQ.w[1]++;
|
|
918 break;
|
|
919 }
|
|
920 #endif
|
|
921 #endif
|
|
922
|
|
923 } else {
|
|
924 #ifdef SET_STATUS_FLAGS
|
|
925 if (CA4.w[0] || CA4.w[1]) {
|
|
926 // set status flags
|
|
927 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
|
|
928 }
|
|
929 #endif
|
|
930 handle_UF_128_rem (&res, sign_x ^ sign_y, diff_expon, CQ,
|
|
931 CA4.w[1] | CA4.w[0], &rnd_mode, pfpsf);
|
|
932 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
933 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
934 #endif
|
|
935 BID_RETURN (res);
|
|
936
|
|
937 }
|
|
938
|
|
939 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf);
|
|
940 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
941 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
942 #endif
|
|
943 BID_RETURN (res);
|
|
944 }
|
|
945
|
|
946
|
|
947 BID128_FUNCTION_ARGTYPE1_ARG128 (bid128dq_div, UINT64, x, y)
|
|
948 UINT256 CA4, CA4r, P256;
|
|
949 UINT128 CX, CY, T128, CQ, CR, CA, TP128, Qh, Ql, res;
|
|
950 UINT64 sign_x, sign_y, T, carry64, D, Q_high, Q_low, QX, valid_y,
|
|
951 PD;
|
|
952 int_float fx, fy, f64;
|
|
953 UINT32 QX32, tdigit[3], digit, digit_h, digit_low;
|
|
954 int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2,
|
|
955 digits_q, amount;
|
|
956 int nzeros, i, j, k, d5;
|
|
957 unsigned rmode;
|
|
958 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
959 fexcept_t binaryflags = 0;
|
|
960 #endif
|
|
961
|
|
962 valid_y = unpack_BID128_value (&sign_y, &exponent_y, &CY, y);
|
|
963
|
|
964 // unpack arguments, check for NaN or Infinity
|
|
965 CX.w[1] = 0;
|
|
966 if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], x)) {
|
|
967 #ifdef SET_STATUS_FLAGS
|
|
968 if ((y.w[1] & SNAN_MASK64) == SNAN_MASK64) // y is sNaN
|
|
969 __set_status_flags (pfpsf, INVALID_EXCEPTION);
|
|
970 #endif
|
|
971
|
|
972 // test if x is NaN
|
|
973 if ((x & NAN_MASK64) == NAN_MASK64) {
|
|
974 #ifdef SET_STATUS_FLAGS
|
|
975 if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN
|
|
976 __set_status_flags (pfpsf, INVALID_EXCEPTION);
|
|
977 #endif
|
|
978 res.w[0] = (CX.w[0] & 0x0003ffffffffffffull);
|
|
979 __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]);
|
|
980 res.w[1] |= ((CX.w[0]) & 0xfc00000000000000ull);
|
|
981 BID_RETURN (res);
|
|
982 }
|
|
983 // x is Infinity?
|
|
984 if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) {
|
|
985 // check if y is Inf.
|
|
986 if (((y.w[1] & 0x7c00000000000000ull) == 0x7800000000000000ull))
|
|
987 // return NaN
|
|
988 {
|
|
989 #ifdef SET_STATUS_FLAGS
|
|
990 __set_status_flags (pfpsf, INVALID_EXCEPTION);
|
|
991 #endif
|
|
992 res.w[1] = 0x7c00000000000000ull;
|
|
993 res.w[0] = 0;
|
|
994 BID_RETURN (res);
|
|
995 }
|
|
996 if (((y.w[1] & 0x7c00000000000000ull) != 0x7c00000000000000ull)) {
|
|
997 // otherwise return +/-Inf
|
|
998 res.w[1] =
|
|
999 ((x ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull;
|
|
1000 res.w[0] = 0;
|
|
1001 BID_RETURN (res);
|
|
1002 }
|
|
1003 }
|
|
1004 // x is 0
|
|
1005 if ((y.w[1] & INFINITY_MASK64) != INFINITY_MASK64) {
|
|
1006 if ((!CY.w[0]) && !(CY.w[1] & 0x0001ffffffffffffull)) {
|
|
1007 #ifdef SET_STATUS_FLAGS
|
|
1008 __set_status_flags (pfpsf, INVALID_EXCEPTION);
|
|
1009 #endif
|
|
1010 // x=y=0, return NaN
|
|
1011 res.w[1] = 0x7c00000000000000ull;
|
|
1012 res.w[0] = 0;
|
|
1013 BID_RETURN (res);
|
|
1014 }
|
|
1015 // return 0
|
|
1016 res.w[1] = (x ^ y.w[1]) & 0x8000000000000000ull;
|
|
1017 exponent_x = exponent_x - exponent_y + (DECIMAL_EXPONENT_BIAS_128<<1) - DECIMAL_EXPONENT_BIAS;
|
|
1018 if (exponent_x > DECIMAL_MAX_EXPON_128)
|
|
1019 exponent_x = DECIMAL_MAX_EXPON_128;
|
|
1020 else if (exponent_x < 0)
|
|
1021 exponent_x = 0;
|
|
1022 res.w[1] |= (((UINT64) exponent_x) << 49);
|
|
1023 res.w[0] = 0;
|
|
1024 BID_RETURN (res);
|
|
1025 }
|
|
1026 }
|
|
1027 exponent_x += (DECIMAL_EXPONENT_BIAS_128 - DECIMAL_EXPONENT_BIAS);
|
|
1028
|
|
1029 if (!valid_y) {
|
|
1030 // y is Inf. or NaN
|
|
1031
|
|
1032 // test if y is NaN
|
|
1033 if ((y.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
|
|
1034 #ifdef SET_STATUS_FLAGS
|
|
1035 if ((y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN
|
|
1036 __set_status_flags (pfpsf, INVALID_EXCEPTION);
|
|
1037 #endif
|
|
1038 res.w[1] = CY.w[1] & QUIET_MASK64;
|
|
1039 res.w[0] = CY.w[0];
|
|
1040 BID_RETURN (res);
|
|
1041 }
|
|
1042 // y is Infinity?
|
|
1043 if ((y.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
|
|
1044 // return +/-0
|
|
1045 res.w[1] = sign_x ^ sign_y;
|
|
1046 res.w[0] = 0;
|
|
1047 BID_RETURN (res);
|
|
1048 }
|
|
1049 // y is 0, return +/-Inf
|
|
1050 res.w[1] =
|
|
1051 ((x ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull;
|
|
1052 res.w[0] = 0;
|
|
1053 #ifdef SET_STATUS_FLAGS
|
|
1054 __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION);
|
|
1055 #endif
|
|
1056 BID_RETURN (res);
|
|
1057 }
|
|
1058 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
1059 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
1060 #endif
|
|
1061 diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128;
|
|
1062
|
|
1063 if (__unsigned_compare_gt_128 (CY, CX)) {
|
|
1064 // CX < CY
|
|
1065
|
|
1066 // 2^64
|
|
1067 f64.i = 0x5f800000;
|
|
1068
|
|
1069 // fx ~ CX, fy ~ CY
|
|
1070 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
|
|
1071 fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0];
|
|
1072 // expon_cy - expon_cx
|
|
1073 bin_index = (fy.i - fx.i) >> 23;
|
|
1074
|
|
1075 if (CX.w[1]) {
|
|
1076 T = power10_index_binexp_128[bin_index].w[0];
|
|
1077 __mul_64x128_short (CA, T, CX);
|
|
1078 } else {
|
|
1079 T128 = power10_index_binexp_128[bin_index];
|
|
1080 __mul_64x128_short (CA, CX.w[0], T128);
|
|
1081 }
|
|
1082
|
|
1083 ed2 = 33;
|
|
1084 if (__unsigned_compare_gt_128 (CY, CA))
|
|
1085 ed2++;
|
|
1086
|
|
1087 T128 = power10_table_128[ed2];
|
|
1088 __mul_128x128_to_256 (CA4, CA, T128);
|
|
1089
|
|
1090 ed2 += estimate_decimal_digits[bin_index];
|
|
1091 CQ.w[0] = CQ.w[1] = 0;
|
|
1092 diff_expon = diff_expon - ed2;
|
|
1093
|
|
1094 } else {
|
|
1095 // get CQ = CX/CY
|
|
1096 __div_128_by_128 (&CQ, &CR, CX, CY);
|
|
1097
|
|
1098 if (!CR.w[1] && !CR.w[0]) {
|
|
1099 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode,
|
|
1100 pfpsf);
|
|
1101 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
1102 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
1103 #endif
|
|
1104 BID_RETURN (res);
|
|
1105 }
|
|
1106 // get number of decimal digits in CQ
|
|
1107 // 2^64
|
|
1108 f64.i = 0x5f800000;
|
|
1109 fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0];
|
|
1110 // binary expon. of CQ
|
|
1111 bin_expon = (fx.i - 0x3f800000) >> 23;
|
|
1112
|
|
1113 digits_q = estimate_decimal_digits[bin_expon];
|
|
1114 TP128.w[0] = power10_index_binexp_128[bin_expon].w[0];
|
|
1115 TP128.w[1] = power10_index_binexp_128[bin_expon].w[1];
|
|
1116 if (__unsigned_compare_ge_128 (CQ, TP128))
|
|
1117 digits_q++;
|
|
1118
|
|
1119 ed2 = 34 - digits_q;
|
|
1120 T128.w[0] = power10_table_128[ed2].w[0];
|
|
1121 T128.w[1] = power10_table_128[ed2].w[1];
|
|
1122 __mul_128x128_to_256 (CA4, CR, T128);
|
|
1123 diff_expon = diff_expon - ed2;
|
|
1124 __mul_128x128_low (CQ, CQ, T128);
|
|
1125
|
|
1126 }
|
|
1127
|
|
1128 __div_256_by_128 (&CQ, &CA4, CY);
|
|
1129
|
|
1130 #ifdef SET_STATUS_FLAGS
|
|
1131 if (CA4.w[0] || CA4.w[1]) {
|
|
1132 // set status flags
|
|
1133 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
|
|
1134 }
|
|
1135 #ifndef LEAVE_TRAILING_ZEROS
|
|
1136 else
|
|
1137 #endif
|
|
1138 #else
|
|
1139 #ifndef LEAVE_TRAILING_ZEROS
|
|
1140 if (!CA4.w[0] && !CA4.w[1])
|
|
1141 #endif
|
|
1142 #endif
|
|
1143 #ifndef LEAVE_TRAILING_ZEROS
|
|
1144 // check whether result is exact
|
|
1145 {
|
|
1146 //printf("ed2=%d,nz=%d,a=%d,CQ="LX16","LX16", RH="LX16", RL="LX16"\n",ed2,nzeros,amount,CQ.w[1],CQ.w[0],reciprocals10_128[nzeros].w[1],reciprocals10_128[nzeros].w[0]);fflush(stdout);
|
|
1147 // check whether CX, CY are short
|
|
1148 if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) {
|
|
1149 i = (int) CY.w[0] - 1;
|
|
1150 j = (int) CX.w[0] - 1;
|
|
1151 // difference in powers of 2 factors for Y and X
|
|
1152 nzeros = ed2 - factors[i][0] + factors[j][0];
|
|
1153 // difference in powers of 5 factors
|
|
1154 d5 = ed2 - factors[i][1] + factors[j][1];
|
|
1155 if (d5 < nzeros)
|
|
1156 nzeros = d5;
|
|
1157 // get P*(2^M[extra_digits])/10^extra_digits
|
|
1158 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
|
|
1159 //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2];
|
|
1160
|
|
1161 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
|
|
1162 amount = recip_scale[nzeros];
|
|
1163 __shr_128_long (CQ, Qh, amount);
|
|
1164
|
|
1165 diff_expon += nzeros;
|
|
1166 } else {
|
|
1167 // decompose Q as Qh*10^17 + Ql
|
|
1168 //T128 = reciprocals10_128[17];
|
|
1169 T128.w[0] = 0x44909befeb9fad49ull;
|
|
1170 T128.w[1] = 0x000b877aa3236a4bull;
|
|
1171 __mul_128x128_to_256 (P256, CQ, T128);
|
|
1172 //amount = recip_scale[17];
|
|
1173 Q_high = (P256.w[2] >> 44) | (P256.w[3] << (64 - 44));
|
|
1174 Q_low = CQ.w[0] - Q_high * 100000000000000000ull;
|
|
1175
|
|
1176 if (!Q_low) {
|
|
1177 diff_expon += 17;
|
|
1178
|
|
1179 tdigit[0] = Q_high & 0x3ffffff;
|
|
1180 tdigit[1] = 0;
|
|
1181 QX = Q_high >> 26;
|
|
1182 QX32 = QX;
|
|
1183 nzeros = 0;
|
|
1184
|
|
1185 for (j = 0; QX32; j++, QX32 >>= 7) {
|
|
1186 k = (QX32 & 127);
|
|
1187 tdigit[0] += convert_table[j][k][0];
|
|
1188 tdigit[1] += convert_table[j][k][1];
|
|
1189 if (tdigit[0] >= 100000000) {
|
|
1190 tdigit[0] -= 100000000;
|
|
1191 tdigit[1]++;
|
|
1192 }
|
|
1193 }
|
|
1194
|
|
1195
|
|
1196 if (tdigit[1] >= 100000000) {
|
|
1197 tdigit[1] -= 100000000;
|
|
1198 if (tdigit[1] >= 100000000)
|
|
1199 tdigit[1] -= 100000000;
|
|
1200 }
|
|
1201
|
|
1202 digit = tdigit[0];
|
|
1203 if (!digit && !tdigit[1])
|
|
1204 nzeros += 16;
|
|
1205 else {
|
|
1206 if (!digit) {
|
|
1207 nzeros += 8;
|
|
1208 digit = tdigit[1];
|
|
1209 }
|
|
1210 // decompose digit
|
|
1211 PD = (UINT64) digit *0x068DB8BBull;
|
|
1212 digit_h = (UINT32) (PD >> 40);
|
|
1213 //printf("i=%d, nz=%d, digit=%d (%d, %016I64x %016I64x)\n",i,nzeros,digit_h,digit,PD,digit_h);fflush(stdout);
|
|
1214 digit_low = digit - digit_h * 10000;
|
|
1215
|
|
1216 if (!digit_low)
|
|
1217 nzeros += 4;
|
|
1218 else
|
|
1219 digit_h = digit_low;
|
|
1220
|
|
1221 if (!(digit_h & 1))
|
|
1222 nzeros +=
|
|
1223 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
|
|
1224 (digit_h & 7));
|
|
1225 }
|
|
1226
|
|
1227 if (nzeros) {
|
|
1228 __mul_64x64_to_128 (CQ, Q_high, reciprocals10_64[nzeros]);
|
|
1229
|
|
1230 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-64
|
|
1231 amount = short_recip_scale[nzeros];
|
|
1232 CQ.w[0] = CQ.w[1] >> amount;
|
|
1233 } else
|
|
1234 CQ.w[0] = Q_high;
|
|
1235 CQ.w[1] = 0;
|
|
1236
|
|
1237 diff_expon += nzeros;
|
|
1238 } else {
|
|
1239 tdigit[0] = Q_low & 0x3ffffff;
|
|
1240 tdigit[1] = 0;
|
|
1241 QX = Q_low >> 26;
|
|
1242 QX32 = QX;
|
|
1243 nzeros = 0;
|
|
1244
|
|
1245 for (j = 0; QX32; j++, QX32 >>= 7) {
|
|
1246 k = (QX32 & 127);
|
|
1247 tdigit[0] += convert_table[j][k][0];
|
|
1248 tdigit[1] += convert_table[j][k][1];
|
|
1249 if (tdigit[0] >= 100000000) {
|
|
1250 tdigit[0] -= 100000000;
|
|
1251 tdigit[1]++;
|
|
1252 }
|
|
1253 }
|
|
1254
|
|
1255 if (tdigit[1] >= 100000000) {
|
|
1256 tdigit[1] -= 100000000;
|
|
1257 if (tdigit[1] >= 100000000)
|
|
1258 tdigit[1] -= 100000000;
|
|
1259 }
|
|
1260
|
|
1261 digit = tdigit[0];
|
|
1262 if (!digit && !tdigit[1])
|
|
1263 nzeros += 16;
|
|
1264 else {
|
|
1265 if (!digit) {
|
|
1266 nzeros += 8;
|
|
1267 digit = tdigit[1];
|
|
1268 }
|
|
1269 // decompose digit
|
|
1270 PD = (UINT64) digit *0x068DB8BBull;
|
|
1271 digit_h = (UINT32) (PD >> 40);
|
|
1272 //printf("i=%d, nz=%d, digit=%d (%d, %016I64x %016I64x)\n",i,nzeros,digit_h,digit,PD,digit_h);fflush(stdout);
|
|
1273 digit_low = digit - digit_h * 10000;
|
|
1274
|
|
1275 if (!digit_low)
|
|
1276 nzeros += 4;
|
|
1277 else
|
|
1278 digit_h = digit_low;
|
|
1279
|
|
1280 if (!(digit_h & 1))
|
|
1281 nzeros +=
|
|
1282 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
|
|
1283 (digit_h & 7));
|
|
1284 }
|
|
1285
|
|
1286 if (nzeros) {
|
|
1287 // get P*(2^M[extra_digits])/10^extra_digits
|
|
1288 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
|
|
1289
|
|
1290 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
|
|
1291 amount = recip_scale[nzeros];
|
|
1292 __shr_128 (CQ, Qh, amount);
|
|
1293 }
|
|
1294 diff_expon += nzeros;
|
|
1295
|
|
1296 }
|
|
1297 }
|
|
1298 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode,
|
|
1299 pfpsf);
|
|
1300 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
1301 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
1302 #endif
|
|
1303 BID_RETURN (res);
|
|
1304 }
|
|
1305 #endif
|
|
1306
|
|
1307 if (diff_expon >= 0) {
|
|
1308 #ifdef IEEE_ROUND_NEAREST
|
|
1309 // rounding
|
|
1310 // 2*CA4 - CY
|
|
1311 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
|
|
1312 CA4r.w[0] = CA4.w[0] + CA4.w[0];
|
|
1313 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
|
|
1314 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
|
|
1315
|
|
1316 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
|
|
1317 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
|
|
1318
|
|
1319 CQ.w[0] += carry64;
|
|
1320 if (CQ.w[0] < carry64)
|
|
1321 CQ.w[1]++;
|
|
1322 #else
|
|
1323 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
1324 // rounding
|
|
1325 // 2*CA4 - CY
|
|
1326 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
|
|
1327 CA4r.w[0] = CA4.w[0] + CA4.w[0];
|
|
1328 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
|
|
1329 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
|
|
1330
|
|
1331 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
|
|
1332 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
|
|
1333
|
|
1334 CQ.w[0] += carry64;
|
|
1335 if (CQ.w[0] < carry64)
|
|
1336 CQ.w[1]++;
|
|
1337 #else
|
|
1338 rmode = rnd_mode;
|
|
1339 if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2)
|
|
1340 rmode = 3 - rmode;
|
|
1341 switch (rmode) {
|
|
1342 case ROUNDING_TO_NEAREST: // round to nearest code
|
|
1343 // rounding
|
|
1344 // 2*CA4 - CY
|
|
1345 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
|
|
1346 CA4r.w[0] = CA4.w[0] + CA4.w[0];
|
|
1347 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
|
|
1348 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
|
|
1349 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
|
|
1350 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
|
|
1351 CQ.w[0] += carry64;
|
|
1352 if (CQ.w[0] < carry64)
|
|
1353 CQ.w[1]++;
|
|
1354 break;
|
|
1355 case ROUNDING_TIES_AWAY:
|
|
1356 // rounding
|
|
1357 // 2*CA4 - CY
|
|
1358 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
|
|
1359 CA4r.w[0] = CA4.w[0] + CA4.w[0];
|
|
1360 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
|
|
1361 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
|
|
1362 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
|
|
1363 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
|
|
1364 CQ.w[0] += carry64;
|
|
1365 if (CQ.w[0] < carry64)
|
|
1366 CQ.w[1]++;
|
|
1367 break;
|
|
1368 case ROUNDING_DOWN:
|
|
1369 case ROUNDING_TO_ZERO:
|
|
1370 break;
|
|
1371 default: // rounding up
|
|
1372 CQ.w[0]++;
|
|
1373 if (!CQ.w[0])
|
|
1374 CQ.w[1]++;
|
|
1375 break;
|
|
1376 }
|
|
1377 #endif
|
|
1378 #endif
|
|
1379
|
|
1380 } else {
|
|
1381 #ifdef SET_STATUS_FLAGS
|
|
1382 if (CA4.w[0] || CA4.w[1]) {
|
|
1383 // set status flags
|
|
1384 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
|
|
1385 }
|
|
1386 #endif
|
|
1387 handle_UF_128_rem (&res, sign_x ^ sign_y, diff_expon, CQ,
|
|
1388 CA4.w[1] | CA4.w[0], &rnd_mode, pfpsf);
|
|
1389 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
1390 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
1391 #endif
|
|
1392 BID_RETURN (res);
|
|
1393 }
|
|
1394
|
|
1395 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf);
|
|
1396 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
1397 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
1398 #endif
|
|
1399 BID_RETURN (res);
|
|
1400
|
|
1401 }
|
|
1402
|
|
1403
|
|
1404 BID128_FUNCTION_ARG128_ARGTYPE2 (bid128qd_div, x, UINT64, y)
|
|
1405 UINT256 CA4, CA4r, P256;
|
|
1406 UINT128 CX, CY, T128, CQ, CR, CA, TP128, Qh, Ql, res;
|
|
1407 UINT64 sign_x, sign_y, T, carry64, D, Q_high, Q_low, QX, PD,
|
|
1408 valid_y;
|
|
1409 int_float fx, fy, f64;
|
|
1410 UINT32 QX32, tdigit[3], digit, digit_h, digit_low;
|
|
1411 int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2,
|
|
1412 digits_q, amount;
|
|
1413 int nzeros, i, j, k, d5, rmode;
|
|
1414 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
1415 fexcept_t binaryflags = 0;
|
|
1416 #endif
|
|
1417
|
|
1418
|
|
1419 valid_y = unpack_BID64 (&sign_y, &exponent_y, &CY.w[0], y);
|
|
1420 // unpack arguments, check for NaN or Infinity
|
|
1421 if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
|
|
1422 // test if x is NaN
|
|
1423 if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
|
|
1424 #ifdef SET_STATUS_FLAGS
|
|
1425 if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull || // sNaN
|
|
1426 (y & 0x7e00000000000000ull) == 0x7e00000000000000ull)
|
|
1427 __set_status_flags (pfpsf, INVALID_EXCEPTION);
|
|
1428 #endif
|
|
1429 res.w[1] = (CX.w[1]) & QUIET_MASK64;
|
|
1430 res.w[0] = CX.w[0];
|
|
1431 BID_RETURN (res);
|
|
1432 }
|
|
1433 // x is Infinity?
|
|
1434 if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
|
|
1435 // check if y is Inf.
|
|
1436 if (((y & 0x7c00000000000000ull) == 0x7800000000000000ull))
|
|
1437 // return NaN
|
|
1438 {
|
|
1439 #ifdef SET_STATUS_FLAGS
|
|
1440 __set_status_flags (pfpsf, INVALID_EXCEPTION);
|
|
1441 #endif
|
|
1442 res.w[1] = 0x7c00000000000000ull;
|
|
1443 res.w[0] = 0;
|
|
1444 BID_RETURN (res);
|
|
1445 }
|
|
1446 // y is NaN?
|
|
1447 if (((y & 0x7c00000000000000ull) != 0x7c00000000000000ull))
|
|
1448 // return NaN
|
|
1449 {
|
|
1450 // return +/-Inf
|
|
1451 res.w[1] = ((x.w[1] ^ y) & 0x8000000000000000ull) |
|
|
1452 0x7800000000000000ull;
|
|
1453 res.w[0] = 0;
|
|
1454 BID_RETURN (res);
|
|
1455 }
|
|
1456 }
|
|
1457 // x is 0
|
|
1458 if ((y & 0x7800000000000000ull) < 0x7800000000000000ull) {
|
|
1459 if (!CY.w[0]) {
|
|
1460 #ifdef SET_STATUS_FLAGS
|
|
1461 __set_status_flags (pfpsf, INVALID_EXCEPTION);
|
|
1462 #endif
|
|
1463 // x=y=0, return NaN
|
|
1464 res.w[1] = 0x7c00000000000000ull;
|
|
1465 res.w[0] = 0;
|
|
1466 BID_RETURN (res);
|
|
1467 }
|
|
1468 // return 0
|
|
1469 res.w[1] = (x.w[1] ^ y) & 0x8000000000000000ull;
|
|
1470 exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS;
|
|
1471 if (exponent_x > DECIMAL_MAX_EXPON_128)
|
|
1472 exponent_x = DECIMAL_MAX_EXPON_128;
|
|
1473 else if (exponent_x < 0)
|
|
1474 exponent_x = 0;
|
|
1475 res.w[1] |= (((UINT64) exponent_x) << 49);
|
|
1476 res.w[0] = 0;
|
|
1477 BID_RETURN (res);
|
|
1478 }
|
|
1479 }
|
|
1480 CY.w[1] = 0;
|
|
1481 if (!valid_y) {
|
|
1482 // y is Inf. or NaN
|
|
1483
|
|
1484 // test if y is NaN
|
|
1485 if ((y & NAN_MASK64) == NAN_MASK64) {
|
|
1486 #ifdef SET_STATUS_FLAGS
|
|
1487 if ((y & SNAN_MASK64) == SNAN_MASK64) // sNaN
|
|
1488 __set_status_flags (pfpsf, INVALID_EXCEPTION);
|
|
1489 #endif
|
|
1490 res.w[0] = (CY.w[0] & 0x0003ffffffffffffull);
|
|
1491 __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]);
|
|
1492 res.w[1] |= ((CY.w[0]) & 0xfc00000000000000ull);
|
|
1493 BID_RETURN (res);
|
|
1494 }
|
|
1495 // y is Infinity?
|
|
1496 if ((y & INFINITY_MASK64) == INFINITY_MASK64) {
|
|
1497 // return +/-0
|
|
1498 res.w[1] = ((x.w[1] ^ y) & 0x8000000000000000ull);
|
|
1499 res.w[0] = 0;
|
|
1500 BID_RETURN (res);
|
|
1501 }
|
|
1502 // y is 0
|
|
1503 #ifdef SET_STATUS_FLAGS
|
|
1504 __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION);
|
|
1505 #endif
|
|
1506 res.w[1] = (sign_x ^ sign_y) | INFINITY_MASK64;
|
|
1507 res.w[0] = 0;
|
|
1508 BID_RETURN (res);
|
|
1509 }
|
|
1510 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
1511 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
1512 #endif
|
|
1513 diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS;
|
|
1514
|
|
1515 if (__unsigned_compare_gt_128 (CY, CX)) {
|
|
1516 // CX < CY
|
|
1517
|
|
1518 // 2^64
|
|
1519 f64.i = 0x5f800000;
|
|
1520
|
|
1521 // fx ~ CX, fy ~ CY
|
|
1522 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
|
|
1523 fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0];
|
|
1524 // expon_cy - expon_cx
|
|
1525 bin_index = (fy.i - fx.i) >> 23;
|
|
1526
|
|
1527 if (CX.w[1]) {
|
|
1528 T = power10_index_binexp_128[bin_index].w[0];
|
|
1529 __mul_64x128_short (CA, T, CX);
|
|
1530 } else {
|
|
1531 T128 = power10_index_binexp_128[bin_index];
|
|
1532 __mul_64x128_short (CA, CX.w[0], T128);
|
|
1533 }
|
|
1534
|
|
1535 ed2 = 33;
|
|
1536 if (__unsigned_compare_gt_128 (CY, CA))
|
|
1537 ed2++;
|
|
1538
|
|
1539 T128 = power10_table_128[ed2];
|
|
1540 __mul_128x128_to_256 (CA4, CA, T128);
|
|
1541
|
|
1542 ed2 += estimate_decimal_digits[bin_index];
|
|
1543 CQ.w[0] = CQ.w[1] = 0;
|
|
1544 diff_expon = diff_expon - ed2;
|
|
1545
|
|
1546 } else {
|
|
1547 // get CQ = CX/CY
|
|
1548 __div_128_by_128 (&CQ, &CR, CX, CY);
|
|
1549
|
|
1550 if (!CR.w[1] && !CR.w[0]) {
|
|
1551 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode,
|
|
1552 pfpsf);
|
|
1553 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
1554 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
1555 #endif
|
|
1556 BID_RETURN (res);
|
|
1557 }
|
|
1558 // get number of decimal digits in CQ
|
|
1559 // 2^64
|
|
1560 f64.i = 0x5f800000;
|
|
1561 fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0];
|
|
1562 // binary expon. of CQ
|
|
1563 bin_expon = (fx.i - 0x3f800000) >> 23;
|
|
1564
|
|
1565 digits_q = estimate_decimal_digits[bin_expon];
|
|
1566 TP128.w[0] = power10_index_binexp_128[bin_expon].w[0];
|
|
1567 TP128.w[1] = power10_index_binexp_128[bin_expon].w[1];
|
|
1568 if (__unsigned_compare_ge_128 (CQ, TP128))
|
|
1569 digits_q++;
|
|
1570
|
|
1571 ed2 = 34 - digits_q;
|
|
1572 T128.w[0] = power10_table_128[ed2].w[0];
|
|
1573 T128.w[1] = power10_table_128[ed2].w[1];
|
|
1574 __mul_128x128_to_256 (CA4, CR, T128);
|
|
1575 diff_expon = diff_expon - ed2;
|
|
1576 __mul_128x128_low (CQ, CQ, T128);
|
|
1577
|
|
1578 }
|
|
1579
|
|
1580 __div_256_by_128 (&CQ, &CA4, CY);
|
|
1581
|
|
1582
|
|
1583 #ifdef SET_STATUS_FLAGS
|
|
1584 if (CA4.w[0] || CA4.w[1]) {
|
|
1585 // set status flags
|
|
1586 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
|
|
1587 }
|
|
1588 #ifndef LEAVE_TRAILING_ZEROS
|
|
1589 else
|
|
1590 #endif
|
|
1591 #else
|
|
1592 #ifndef LEAVE_TRAILING_ZEROS
|
|
1593 if (!CA4.w[0] && !CA4.w[1])
|
|
1594 #endif
|
|
1595 #endif
|
|
1596 #ifndef LEAVE_TRAILING_ZEROS
|
|
1597 // check whether result is exact
|
|
1598 {
|
|
1599 // check whether CX, CY are short
|
|
1600 if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) {
|
|
1601 i = (int) CY.w[0] - 1;
|
|
1602 j = (int) CX.w[0] - 1;
|
|
1603 // difference in powers of 2 factors for Y and X
|
|
1604 nzeros = ed2 - factors[i][0] + factors[j][0];
|
|
1605 // difference in powers of 5 factors
|
|
1606 d5 = ed2 - factors[i][1] + factors[j][1];
|
|
1607 if (d5 < nzeros)
|
|
1608 nzeros = d5;
|
|
1609 // get P*(2^M[extra_digits])/10^extra_digits
|
|
1610 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
|
|
1611 //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2];
|
|
1612
|
|
1613 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
|
|
1614 amount = recip_scale[nzeros];
|
|
1615 __shr_128_long (CQ, Qh, amount);
|
|
1616
|
|
1617 diff_expon += nzeros;
|
|
1618 } else {
|
|
1619 // decompose Q as Qh*10^17 + Ql
|
|
1620 //T128 = reciprocals10_128[17];
|
|
1621 T128.w[0] = 0x44909befeb9fad49ull;
|
|
1622 T128.w[1] = 0x000b877aa3236a4bull;
|
|
1623 __mul_128x128_to_256 (P256, CQ, T128);
|
|
1624 //amount = recip_scale[17];
|
|
1625 Q_high = (P256.w[2] >> 44) | (P256.w[3] << (64 - 44));
|
|
1626 Q_low = CQ.w[0] - Q_high * 100000000000000000ull;
|
|
1627
|
|
1628 if (!Q_low) {
|
|
1629 diff_expon += 17;
|
|
1630
|
|
1631 tdigit[0] = Q_high & 0x3ffffff;
|
|
1632 tdigit[1] = 0;
|
|
1633 QX = Q_high >> 26;
|
|
1634 QX32 = QX;
|
|
1635 nzeros = 0;
|
|
1636
|
|
1637 for (j = 0; QX32; j++, QX32 >>= 7) {
|
|
1638 k = (QX32 & 127);
|
|
1639 tdigit[0] += convert_table[j][k][0];
|
|
1640 tdigit[1] += convert_table[j][k][1];
|
|
1641 if (tdigit[0] >= 100000000) {
|
|
1642 tdigit[0] -= 100000000;
|
|
1643 tdigit[1]++;
|
|
1644 }
|
|
1645 }
|
|
1646
|
|
1647
|
|
1648 if (tdigit[1] >= 100000000) {
|
|
1649 tdigit[1] -= 100000000;
|
|
1650 if (tdigit[1] >= 100000000)
|
|
1651 tdigit[1] -= 100000000;
|
|
1652 }
|
|
1653
|
|
1654 digit = tdigit[0];
|
|
1655 if (!digit && !tdigit[1])
|
|
1656 nzeros += 16;
|
|
1657 else {
|
|
1658 if (!digit) {
|
|
1659 nzeros += 8;
|
|
1660 digit = tdigit[1];
|
|
1661 }
|
|
1662 // decompose digit
|
|
1663 PD = (UINT64) digit *0x068DB8BBull;
|
|
1664 digit_h = (UINT32) (PD >> 40);
|
|
1665 digit_low = digit - digit_h * 10000;
|
|
1666
|
|
1667 if (!digit_low)
|
|
1668 nzeros += 4;
|
|
1669 else
|
|
1670 digit_h = digit_low;
|
|
1671
|
|
1672 if (!(digit_h & 1))
|
|
1673 nzeros +=
|
|
1674 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
|
|
1675 (digit_h & 7));
|
|
1676 }
|
|
1677
|
|
1678 if (nzeros) {
|
|
1679 __mul_64x64_to_128 (CQ, Q_high, reciprocals10_64[nzeros]);
|
|
1680
|
|
1681 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-64
|
|
1682 amount = short_recip_scale[nzeros];
|
|
1683 CQ.w[0] = CQ.w[1] >> amount;
|
|
1684 } else
|
|
1685 CQ.w[0] = Q_high;
|
|
1686 CQ.w[1] = 0;
|
|
1687
|
|
1688 diff_expon += nzeros;
|
|
1689 } else {
|
|
1690 tdigit[0] = Q_low & 0x3ffffff;
|
|
1691 tdigit[1] = 0;
|
|
1692 QX = Q_low >> 26;
|
|
1693 QX32 = QX;
|
|
1694 nzeros = 0;
|
|
1695
|
|
1696 for (j = 0; QX32; j++, QX32 >>= 7) {
|
|
1697 k = (QX32 & 127);
|
|
1698 tdigit[0] += convert_table[j][k][0];
|
|
1699 tdigit[1] += convert_table[j][k][1];
|
|
1700 if (tdigit[0] >= 100000000) {
|
|
1701 tdigit[0] -= 100000000;
|
|
1702 tdigit[1]++;
|
|
1703 }
|
|
1704 }
|
|
1705
|
|
1706 if (tdigit[1] >= 100000000) {
|
|
1707 tdigit[1] -= 100000000;
|
|
1708 if (tdigit[1] >= 100000000)
|
|
1709 tdigit[1] -= 100000000;
|
|
1710 }
|
|
1711
|
|
1712 digit = tdigit[0];
|
|
1713 if (!digit && !tdigit[1])
|
|
1714 nzeros += 16;
|
|
1715 else {
|
|
1716 if (!digit) {
|
|
1717 nzeros += 8;
|
|
1718 digit = tdigit[1];
|
|
1719 }
|
|
1720 // decompose digit
|
|
1721 PD = (UINT64) digit *0x068DB8BBull;
|
|
1722 digit_h = (UINT32) (PD >> 40);
|
|
1723 digit_low = digit - digit_h * 10000;
|
|
1724
|
|
1725 if (!digit_low)
|
|
1726 nzeros += 4;
|
|
1727 else
|
|
1728 digit_h = digit_low;
|
|
1729
|
|
1730 if (!(digit_h & 1))
|
|
1731 nzeros +=
|
|
1732 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >>
|
|
1733 (digit_h & 7));
|
|
1734 }
|
|
1735
|
|
1736 if (nzeros) {
|
|
1737 // get P*(2^M[extra_digits])/10^extra_digits
|
|
1738 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]);
|
|
1739
|
|
1740 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
|
|
1741 amount = recip_scale[nzeros];
|
|
1742 __shr_128 (CQ, Qh, amount);
|
|
1743 }
|
|
1744 diff_expon += nzeros;
|
|
1745
|
|
1746 }
|
|
1747 }
|
|
1748 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode,pfpsf);
|
|
1749 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
1750 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
1751 #endif
|
|
1752 BID_RETURN (res);
|
|
1753 }
|
|
1754 #endif
|
|
1755
|
|
1756 if (diff_expon >= 0) {
|
|
1757 #ifdef IEEE_ROUND_NEAREST
|
|
1758 // rounding
|
|
1759 // 2*CA4 - CY
|
|
1760 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
|
|
1761 CA4r.w[0] = CA4.w[0] + CA4.w[0];
|
|
1762 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
|
|
1763 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
|
|
1764
|
|
1765 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
|
|
1766 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
|
|
1767
|
|
1768 CQ.w[0] += carry64;
|
|
1769 if (CQ.w[0] < carry64)
|
|
1770 CQ.w[1]++;
|
|
1771 #else
|
|
1772 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
|
|
1773 // rounding
|
|
1774 // 2*CA4 - CY
|
|
1775 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
|
|
1776 CA4r.w[0] = CA4.w[0] + CA4.w[0];
|
|
1777 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
|
|
1778 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
|
|
1779
|
|
1780 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
|
|
1781 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
|
|
1782
|
|
1783 CQ.w[0] += carry64;
|
|
1784 if (CQ.w[0] < carry64)
|
|
1785 CQ.w[1]++;
|
|
1786 #else
|
|
1787 rmode = rnd_mode;
|
|
1788 if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2)
|
|
1789 rmode = 3 - rmode;
|
|
1790 switch (rmode) {
|
|
1791 case ROUNDING_TO_NEAREST: // round to nearest code
|
|
1792 // rounding
|
|
1793 // 2*CA4 - CY
|
|
1794 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
|
|
1795 CA4r.w[0] = CA4.w[0] + CA4.w[0];
|
|
1796 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
|
|
1797 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
|
|
1798 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0;
|
|
1799 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D);
|
|
1800 CQ.w[0] += carry64;
|
|
1801 if (CQ.w[0] < carry64)
|
|
1802 CQ.w[1]++;
|
|
1803 break;
|
|
1804 case ROUNDING_TIES_AWAY:
|
|
1805 // rounding
|
|
1806 // 2*CA4 - CY
|
|
1807 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63);
|
|
1808 CA4r.w[0] = CA4.w[0] + CA4.w[0];
|
|
1809 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]);
|
|
1810 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64;
|
|
1811 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1;
|
|
1812 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D;
|
|
1813 CQ.w[0] += carry64;
|
|
1814 if (CQ.w[0] < carry64)
|
|
1815 CQ.w[1]++;
|
|
1816 break;
|
|
1817 case ROUNDING_DOWN:
|
|
1818 case ROUNDING_TO_ZERO:
|
|
1819 break;
|
|
1820 default: // rounding up
|
|
1821 CQ.w[0]++;
|
|
1822 if (!CQ.w[0])
|
|
1823 CQ.w[1]++;
|
|
1824 break;
|
|
1825 }
|
|
1826 #endif
|
|
1827 #endif
|
|
1828
|
|
1829 } else {
|
|
1830 #ifdef SET_STATUS_FLAGS
|
|
1831 if (CA4.w[0] || CA4.w[1]) {
|
|
1832 // set status flags
|
|
1833 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
|
|
1834 }
|
|
1835 #endif
|
|
1836 handle_UF_128_rem (&res, sign_x ^ sign_y, diff_expon, CQ,
|
|
1837 CA4.w[1] | CA4.w[0], &rnd_mode, pfpsf);
|
|
1838 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
1839 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
1840 #endif
|
|
1841 BID_RETURN (res);
|
|
1842
|
|
1843 }
|
|
1844
|
|
1845 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf);
|
|
1846 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
|
|
1847 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
|
|
1848 #endif
|
|
1849 BID_RETURN (res);
|
|
1850
|
|
1851 }
|