Mercurial > hg > CbC > CbC_gcc
comparison libgcc/config/libbid/bid128_div.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 04ced10e8804 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Copyright (C) 2007, 2009 Free Software Foundation, Inc. | |
2 | |
3 This file is part of GCC. | |
4 | |
5 GCC is free software; you can redistribute it and/or modify it under | |
6 the terms of the GNU General Public License as published by the Free | |
7 Software Foundation; either version 3, or (at your option) any later | |
8 version. | |
9 | |
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 for more details. | |
14 | |
15 Under Section 7 of GPL version 3, you are granted additional | |
16 permissions described in the GCC Runtime Library Exception, version | |
17 3.1, as published by the Free Software Foundation. | |
18 | |
19 You should have received a copy of the GNU General Public License and | |
20 a copy of the GCC Runtime Library Exception along with this program; | |
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 <http://www.gnu.org/licenses/>. */ | |
23 | |
24 #define BID_128RES | |
25 #include "bid_div_macros.h" | |
26 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
27 #include <fenv.h> | |
28 | |
29 #define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT | |
30 #endif | |
31 | |
32 extern UINT32 convert_table[5][128][2]; | |
33 extern SINT8 factors[][2]; | |
34 extern UINT8 packed_10000_zeros[]; | |
35 | |
36 BID128_FUNCTION_ARG2 (bid128_div, x, y) | |
37 | |
38 UINT256 CA4, CA4r, P256; | |
39 UINT128 CX, CY, T128, CQ, CR, CA, TP128, Qh, Ql, res; | |
40 UINT64 sign_x, sign_y, T, carry64, D, Q_high, Q_low, QX, PD, | |
41 valid_y; | |
42 int_float fx, fy, f64; | |
43 UINT32 QX32, tdigit[3], digit, digit_h, digit_low; | |
44 int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2, | |
45 digits_q, amount; | |
46 int nzeros, i, j, k, d5; | |
47 unsigned rmode; | |
48 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
49 fexcept_t binaryflags = 0; | |
50 #endif | |
51 | |
52 valid_y = unpack_BID128_value (&sign_y, &exponent_y, &CY, y); | |
53 | |
54 // unpack arguments, check for NaN or Infinity | |
55 if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) { | |
56 // test if x is NaN | |
57 if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { | |
58 #ifdef SET_STATUS_FLAGS | |
59 if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull || // sNaN | |
60 (y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) | |
61 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
62 #endif | |
63 res.w[1] = (CX.w[1]) & QUIET_MASK64; | |
64 res.w[0] = CX.w[0]; | |
65 BID_RETURN (res); | |
66 } | |
67 // x is Infinity? | |
68 if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
69 // check if y is Inf. | |
70 if (((y.w[1] & 0x7c00000000000000ull) == 0x7800000000000000ull)) | |
71 // return NaN | |
72 { | |
73 #ifdef SET_STATUS_FLAGS | |
74 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
75 #endif | |
76 res.w[1] = 0x7c00000000000000ull; | |
77 res.w[0] = 0; | |
78 BID_RETURN (res); | |
79 } | |
80 // y is NaN? | |
81 if (((y.w[1] & 0x7c00000000000000ull) != 0x7c00000000000000ull)) | |
82 // return NaN | |
83 { | |
84 // return +/-Inf | |
85 res.w[1] = ((x.w[1] ^ y.w[1]) & 0x8000000000000000ull) | | |
86 0x7800000000000000ull; | |
87 res.w[0] = 0; | |
88 BID_RETURN (res); | |
89 } | |
90 } | |
91 // x is 0 | |
92 if ((y.w[1] & 0x7800000000000000ull) < 0x7800000000000000ull) { | |
93 if ((!CY.w[0]) && !(CY.w[1] & 0x0001ffffffffffffull)) { | |
94 #ifdef SET_STATUS_FLAGS | |
95 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
96 #endif | |
97 // x=y=0, return NaN | |
98 res.w[1] = 0x7c00000000000000ull; | |
99 res.w[0] = 0; | |
100 BID_RETURN (res); | |
101 } | |
102 // return 0 | |
103 res.w[1] = (x.w[1] ^ y.w[1]) & 0x8000000000000000ull; | |
104 exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128; | |
105 if (exponent_x > DECIMAL_MAX_EXPON_128) | |
106 exponent_x = DECIMAL_MAX_EXPON_128; | |
107 else if (exponent_x < 0) | |
108 exponent_x = 0; | |
109 res.w[1] |= (((UINT64) exponent_x) << 49); | |
110 res.w[0] = 0; | |
111 BID_RETURN (res); | |
112 } | |
113 } | |
114 if (!valid_y) { | |
115 // y is Inf. or NaN | |
116 | |
117 // test if y is NaN | |
118 if ((y.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { | |
119 #ifdef SET_STATUS_FLAGS | |
120 if ((y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN | |
121 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
122 #endif | |
123 res.w[1] = CY.w[1] & QUIET_MASK64; | |
124 res.w[0] = CY.w[0]; | |
125 BID_RETURN (res); | |
126 } | |
127 // y is Infinity? | |
128 if ((y.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
129 // return +/-0 | |
130 res.w[1] = sign_x ^ sign_y; | |
131 res.w[0] = 0; | |
132 BID_RETURN (res); | |
133 } | |
134 // y is 0, return +/-Inf | |
135 #ifdef SET_STATUS_FLAGS | |
136 __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); | |
137 #endif | |
138 res.w[1] = | |
139 ((x.w[1] ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull; | |
140 res.w[0] = 0; | |
141 BID_RETURN (res); | |
142 } | |
143 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
144 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
145 #endif | |
146 diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128; | |
147 | |
148 if (__unsigned_compare_gt_128 (CY, CX)) { | |
149 // CX < CY | |
150 | |
151 // 2^64 | |
152 f64.i = 0x5f800000; | |
153 | |
154 // fx ~ CX, fy ~ CY | |
155 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; | |
156 fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0]; | |
157 // expon_cy - expon_cx | |
158 bin_index = (fy.i - fx.i) >> 23; | |
159 | |
160 if (CX.w[1]) { | |
161 T = power10_index_binexp_128[bin_index].w[0]; | |
162 __mul_64x128_short (CA, T, CX); | |
163 } else { | |
164 T128 = power10_index_binexp_128[bin_index]; | |
165 __mul_64x128_short (CA, CX.w[0], T128); | |
166 } | |
167 | |
168 ed2 = 33; | |
169 if (__unsigned_compare_gt_128 (CY, CA)) | |
170 ed2++; | |
171 | |
172 T128 = power10_table_128[ed2]; | |
173 __mul_128x128_to_256 (CA4, CA, T128); | |
174 | |
175 ed2 += estimate_decimal_digits[bin_index]; | |
176 CQ.w[0] = CQ.w[1] = 0; | |
177 diff_expon = diff_expon - ed2; | |
178 | |
179 } else { | |
180 // get CQ = CX/CY | |
181 __div_128_by_128 (&CQ, &CR, CX, CY); | |
182 | |
183 if (!CR.w[1] && !CR.w[0]) { | |
184 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, | |
185 pfpsf); | |
186 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
187 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
188 #endif | |
189 BID_RETURN (res); | |
190 } | |
191 // get number of decimal digits in CQ | |
192 // 2^64 | |
193 f64.i = 0x5f800000; | |
194 fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0]; | |
195 // binary expon. of CQ | |
196 bin_expon = (fx.i - 0x3f800000) >> 23; | |
197 | |
198 digits_q = estimate_decimal_digits[bin_expon]; | |
199 TP128.w[0] = power10_index_binexp_128[bin_expon].w[0]; | |
200 TP128.w[1] = power10_index_binexp_128[bin_expon].w[1]; | |
201 if (__unsigned_compare_ge_128 (CQ, TP128)) | |
202 digits_q++; | |
203 | |
204 ed2 = 34 - digits_q; | |
205 T128.w[0] = power10_table_128[ed2].w[0]; | |
206 T128.w[1] = power10_table_128[ed2].w[1]; | |
207 __mul_128x128_to_256 (CA4, CR, T128); | |
208 diff_expon = diff_expon - ed2; | |
209 __mul_128x128_low (CQ, CQ, T128); | |
210 | |
211 } | |
212 | |
213 __div_256_by_128 (&CQ, &CA4, CY); | |
214 | |
215 #ifdef SET_STATUS_FLAGS | |
216 if (CA4.w[0] || CA4.w[1]) { | |
217 // set status flags | |
218 __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
219 } | |
220 #ifndef LEAVE_TRAILING_ZEROS | |
221 else | |
222 #endif | |
223 #else | |
224 #ifndef LEAVE_TRAILING_ZEROS | |
225 if (!CA4.w[0] && !CA4.w[1]) | |
226 #endif | |
227 #endif | |
228 #ifndef LEAVE_TRAILING_ZEROS | |
229 // check whether result is exact | |
230 { | |
231 // check whether CX, CY are short | |
232 if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) { | |
233 i = (int) CY.w[0] - 1; | |
234 j = (int) CX.w[0] - 1; | |
235 // difference in powers of 2 factors for Y and X | |
236 nzeros = ed2 - factors[i][0] + factors[j][0]; | |
237 // difference in powers of 5 factors | |
238 d5 = ed2 - factors[i][1] + factors[j][1]; | |
239 if (d5 < nzeros) | |
240 nzeros = d5; | |
241 // get P*(2^M[extra_digits])/10^extra_digits | |
242 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]); | |
243 | |
244 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
245 amount = recip_scale[nzeros]; | |
246 __shr_128_long (CQ, Qh, amount); | |
247 | |
248 diff_expon += nzeros; | |
249 } else { | |
250 // decompose Q as Qh*10^17 + Ql | |
251 //T128 = reciprocals10_128[17]; | |
252 T128.w[0] = 0x44909befeb9fad49ull; | |
253 T128.w[1] = 0x000b877aa3236a4bull; | |
254 __mul_128x128_to_256 (P256, CQ, T128); | |
255 //amount = recip_scale[17]; | |
256 Q_high = (P256.w[2] >> 44) | (P256.w[3] << (64 - 44)); | |
257 Q_low = CQ.w[0] - Q_high * 100000000000000000ull; | |
258 | |
259 if (!Q_low) { | |
260 diff_expon += 17; | |
261 | |
262 tdigit[0] = Q_high & 0x3ffffff; | |
263 tdigit[1] = 0; | |
264 QX = Q_high >> 26; | |
265 QX32 = QX; | |
266 nzeros = 0; | |
267 | |
268 for (j = 0; QX32; j++, QX32 >>= 7) { | |
269 k = (QX32 & 127); | |
270 tdigit[0] += convert_table[j][k][0]; | |
271 tdigit[1] += convert_table[j][k][1]; | |
272 if (tdigit[0] >= 100000000) { | |
273 tdigit[0] -= 100000000; | |
274 tdigit[1]++; | |
275 } | |
276 } | |
277 | |
278 if (tdigit[1] >= 100000000) { | |
279 tdigit[1] -= 100000000; | |
280 if (tdigit[1] >= 100000000) | |
281 tdigit[1] -= 100000000; | |
282 } | |
283 | |
284 digit = tdigit[0]; | |
285 if (!digit && !tdigit[1]) | |
286 nzeros += 16; | |
287 else { | |
288 if (!digit) { | |
289 nzeros += 8; | |
290 digit = tdigit[1]; | |
291 } | |
292 // decompose digit | |
293 PD = (UINT64) digit *0x068DB8BBull; | |
294 digit_h = (UINT32) (PD >> 40); | |
295 digit_low = digit - digit_h * 10000; | |
296 | |
297 if (!digit_low) | |
298 nzeros += 4; | |
299 else | |
300 digit_h = digit_low; | |
301 | |
302 if (!(digit_h & 1)) | |
303 nzeros += | |
304 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> | |
305 (digit_h & 7)); | |
306 } | |
307 | |
308 if (nzeros) { | |
309 __mul_64x64_to_128 (CQ, Q_high, reciprocals10_64[nzeros]); | |
310 | |
311 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-64 | |
312 amount = short_recip_scale[nzeros]; | |
313 CQ.w[0] = CQ.w[1] >> amount; | |
314 } else | |
315 CQ.w[0] = Q_high; | |
316 CQ.w[1] = 0; | |
317 | |
318 diff_expon += nzeros; | |
319 } else { | |
320 tdigit[0] = Q_low & 0x3ffffff; | |
321 tdigit[1] = 0; | |
322 QX = Q_low >> 26; | |
323 QX32 = QX; | |
324 nzeros = 0; | |
325 | |
326 for (j = 0; QX32; j++, QX32 >>= 7) { | |
327 k = (QX32 & 127); | |
328 tdigit[0] += convert_table[j][k][0]; | |
329 tdigit[1] += convert_table[j][k][1]; | |
330 if (tdigit[0] >= 100000000) { | |
331 tdigit[0] -= 100000000; | |
332 tdigit[1]++; | |
333 } | |
334 } | |
335 | |
336 if (tdigit[1] >= 100000000) { | |
337 tdigit[1] -= 100000000; | |
338 if (tdigit[1] >= 100000000) | |
339 tdigit[1] -= 100000000; | |
340 } | |
341 | |
342 digit = tdigit[0]; | |
343 if (!digit && !tdigit[1]) | |
344 nzeros += 16; | |
345 else { | |
346 if (!digit) { | |
347 nzeros += 8; | |
348 digit = tdigit[1]; | |
349 } | |
350 // decompose digit | |
351 PD = (UINT64) digit *0x068DB8BBull; | |
352 digit_h = (UINT32) (PD >> 40); | |
353 digit_low = digit - digit_h * 10000; | |
354 | |
355 if (!digit_low) | |
356 nzeros += 4; | |
357 else | |
358 digit_h = digit_low; | |
359 | |
360 if (!(digit_h & 1)) | |
361 nzeros += | |
362 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> | |
363 (digit_h & 7)); | |
364 } | |
365 | |
366 if (nzeros) { | |
367 // get P*(2^M[extra_digits])/10^extra_digits | |
368 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]); | |
369 | |
370 //now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
371 amount = recip_scale[nzeros]; | |
372 __shr_128 (CQ, Qh, amount); | |
373 } | |
374 diff_expon += nzeros; | |
375 | |
376 } | |
377 } | |
378 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf); | |
379 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
380 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
381 #endif | |
382 BID_RETURN (res); | |
383 } | |
384 #endif | |
385 | |
386 if (diff_expon >= 0) { | |
387 #ifdef IEEE_ROUND_NEAREST | |
388 // rounding | |
389 // 2*CA4 - CY | |
390 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
391 CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
392 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
393 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
394 | |
395 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
396 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
397 | |
398 CQ.w[0] += carry64; | |
399 if (CQ.w[0] < carry64) | |
400 CQ.w[1]++; | |
401 #else | |
402 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY | |
403 // rounding | |
404 // 2*CA4 - CY | |
405 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
406 CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
407 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
408 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
409 | |
410 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
411 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
412 | |
413 CQ.w[0] += carry64; | |
414 if (CQ.w[0] < carry64) | |
415 CQ.w[1]++; | |
416 #else | |
417 rmode = rnd_mode; | |
418 if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) | |
419 rmode = 3 - rmode; | |
420 switch (rmode) { | |
421 case ROUNDING_TO_NEAREST: // round to nearest code | |
422 // rounding | |
423 // 2*CA4 - CY | |
424 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
425 CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
426 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
427 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
428 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
429 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
430 CQ.w[0] += carry64; | |
431 if (CQ.w[0] < carry64) | |
432 CQ.w[1]++; | |
433 break; | |
434 case ROUNDING_TIES_AWAY: | |
435 // rounding | |
436 // 2*CA4 - CY | |
437 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
438 CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
439 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
440 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
441 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
442 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
443 CQ.w[0] += carry64; | |
444 if (CQ.w[0] < carry64) | |
445 CQ.w[1]++; | |
446 break; | |
447 case ROUNDING_DOWN: | |
448 case ROUNDING_TO_ZERO: | |
449 break; | |
450 default: // rounding up | |
451 CQ.w[0]++; | |
452 if (!CQ.w[0]) | |
453 CQ.w[1]++; | |
454 break; | |
455 } | |
456 #endif | |
457 #endif | |
458 | |
459 } else { | |
460 #ifdef SET_STATUS_FLAGS | |
461 if (CA4.w[0] || CA4.w[1]) { | |
462 // set status flags | |
463 __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
464 } | |
465 #endif | |
466 | |
467 handle_UF_128_rem (&res, sign_x ^ sign_y, diff_expon, CQ, | |
468 CA4.w[1] | CA4.w[0], &rnd_mode, pfpsf); | |
469 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
470 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
471 #endif | |
472 BID_RETURN (res); | |
473 | |
474 } | |
475 | |
476 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf); | |
477 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
478 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
479 #endif | |
480 BID_RETURN (res); | |
481 } | |
482 | |
483 | |
484 //#define LEAVE_TRAILING_ZEROS | |
485 | |
486 TYPE0_FUNCTION_ARGTYPE1_ARGTYPE2 (UINT128, bid128dd_div, UINT64, x, | |
487 UINT64, y) | |
488 | |
489 UINT256 CA4, CA4r, P256; | |
490 UINT128 CX, CY, T128, CQ, CR, CA, TP128, Qh, Ql, res; | |
491 UINT64 sign_x, sign_y, T, carry64, D, Q_high, Q_low, QX, PD, | |
492 valid_y; | |
493 int_float fx, fy, f64; | |
494 UINT32 QX32, tdigit[3], digit, digit_h, digit_low; | |
495 int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2, | |
496 digits_q, amount; | |
497 int nzeros, i, j, k, d5; | |
498 unsigned rmode; | |
499 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
500 fexcept_t binaryflags = 0; | |
501 #endif | |
502 | |
503 valid_y = unpack_BID64 (&sign_y, &exponent_y, &CY.w[0], y); | |
504 | |
505 // unpack arguments, check for NaN or Infinity | |
506 CX.w[1] = 0; | |
507 if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], (x))) { | |
508 #ifdef SET_STATUS_FLAGS | |
509 if ((y & SNAN_MASK64) == SNAN_MASK64) // y is sNaN | |
510 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
511 #endif | |
512 | |
513 // test if x is NaN | |
514 if ((x & NAN_MASK64) == NAN_MASK64) { | |
515 #ifdef SET_STATUS_FLAGS | |
516 if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN | |
517 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
518 #endif | |
519 res.w[0] = (CX.w[0] & 0x0003ffffffffffffull); | |
520 __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]); | |
521 res.w[1] |= ((CX.w[0]) & 0xfc00000000000000ull); | |
522 BID_RETURN (res); | |
523 } | |
524 // x is Infinity? | |
525 if (((x) & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
526 // check if y is Inf. | |
527 if ((((y) & 0x7c00000000000000ull) == 0x7800000000000000ull)) | |
528 // return NaN | |
529 { | |
530 #ifdef SET_STATUS_FLAGS | |
531 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
532 #endif | |
533 res.w[1] = 0x7c00000000000000ull; | |
534 res.w[0] = 0; | |
535 BID_RETURN (res); | |
536 } | |
537 if ((((y) & 0x7c00000000000000ull) != 0x7c00000000000000ull)) { | |
538 // otherwise return +/-Inf | |
539 res.w[1] = | |
540 (((x) ^ (y)) & 0x8000000000000000ull) | 0x7800000000000000ull; | |
541 res.w[0] = 0; | |
542 BID_RETURN (res); | |
543 } | |
544 } | |
545 // x is 0 | |
546 if ((((y) & 0x7800000000000000ull) != 0x7800000000000000ull)) { | |
547 if(!CY.w[0]) { | |
548 #ifdef SET_STATUS_FLAGS | |
549 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
550 #endif | |
551 // x=y=0, return NaN | |
552 res.w[1] = 0x7c00000000000000ull; | |
553 res.w[0] = 0; | |
554 BID_RETURN (res); | |
555 } | |
556 // return 0 | |
557 res.w[1] = ((x) ^ (y)) & 0x8000000000000000ull; | |
558 if (((y) & 0x6000000000000000ull) == 0x6000000000000000ull) | |
559 exponent_y = ((UINT32) ((y) >> 51)) & 0x3ff; | |
560 else | |
561 exponent_y = ((UINT32) ((y) >> 53)) & 0x3ff; | |
562 exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128; | |
563 if (exponent_x > DECIMAL_MAX_EXPON_128) | |
564 exponent_x = DECIMAL_MAX_EXPON_128; | |
565 else if (exponent_x < 0) | |
566 exponent_x = 0; | |
567 res.w[1] |= (((UINT64) exponent_x) << 49); | |
568 res.w[0] = 0; | |
569 BID_RETURN (res); | |
570 } | |
571 } | |
572 | |
573 CY.w[1] = 0; | |
574 if (!valid_y) { | |
575 // y is Inf. or NaN | |
576 | |
577 // test if y is NaN | |
578 if ((y & NAN_MASK64) == NAN_MASK64) { | |
579 #ifdef SET_STATUS_FLAGS | |
580 if ((y & SNAN_MASK64) == SNAN_MASK64) // sNaN | |
581 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
582 #endif | |
583 res.w[0] = (CY.w[0] & 0x0003ffffffffffffull); | |
584 __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]); | |
585 res.w[1] |= ((CY.w[0]) & 0xfc00000000000000ull); | |
586 BID_RETURN (res); | |
587 } | |
588 // y is Infinity? | |
589 if (((y) & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
590 // return +/-0 | |
591 res.w[1] = sign_x ^ sign_y; | |
592 res.w[0] = 0; | |
593 BID_RETURN (res); | |
594 } | |
595 // y is 0, return +/-Inf | |
596 res.w[1] = | |
597 (((x) ^ (y)) & 0x8000000000000000ull) | 0x7800000000000000ull; | |
598 res.w[0] = 0; | |
599 #ifdef SET_STATUS_FLAGS | |
600 __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); | |
601 #endif | |
602 BID_RETURN (res); | |
603 } | |
604 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
605 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
606 #endif | |
607 diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128; | |
608 | |
609 if (__unsigned_compare_gt_128 (CY, CX)) { | |
610 // CX < CY | |
611 | |
612 // 2^64 | |
613 f64.i = 0x5f800000; | |
614 | |
615 // fx ~ CX, fy ~ CY | |
616 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; | |
617 fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0]; | |
618 // expon_cy - expon_cx | |
619 bin_index = (fy.i - fx.i) >> 23; | |
620 | |
621 if (CX.w[1]) { | |
622 T = power10_index_binexp_128[bin_index].w[0]; | |
623 __mul_64x128_short (CA, T, CX); | |
624 } else { | |
625 T128 = power10_index_binexp_128[bin_index]; | |
626 __mul_64x128_short (CA, CX.w[0], T128); | |
627 } | |
628 | |
629 ed2 = 33; | |
630 if (__unsigned_compare_gt_128 (CY, CA)) | |
631 ed2++; | |
632 | |
633 T128 = power10_table_128[ed2]; | |
634 __mul_128x128_to_256 (CA4, CA, T128); | |
635 | |
636 ed2 += estimate_decimal_digits[bin_index]; | |
637 CQ.w[0] = CQ.w[1] = 0; | |
638 diff_expon = diff_expon - ed2; | |
639 | |
640 } else { | |
641 // get CQ = CX/CY | |
642 __div_128_by_128 (&CQ, &CR, CX, CY); | |
643 | |
644 if (!CR.w[1] && !CR.w[0]) { | |
645 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, | |
646 pfpsf); | |
647 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
648 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
649 #endif | |
650 BID_RETURN (res); | |
651 } | |
652 // get number of decimal digits in CQ | |
653 // 2^64 | |
654 f64.i = 0x5f800000; | |
655 fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0]; | |
656 // binary expon. of CQ | |
657 bin_expon = (fx.i - 0x3f800000) >> 23; | |
658 | |
659 digits_q = estimate_decimal_digits[bin_expon]; | |
660 TP128.w[0] = power10_index_binexp_128[bin_expon].w[0]; | |
661 TP128.w[1] = power10_index_binexp_128[bin_expon].w[1]; | |
662 if (__unsigned_compare_ge_128 (CQ, TP128)) | |
663 digits_q++; | |
664 | |
665 ed2 = 34 - digits_q; | |
666 T128.w[0] = power10_table_128[ed2].w[0]; | |
667 T128.w[1] = power10_table_128[ed2].w[1]; | |
668 __mul_128x128_to_256 (CA4, CR, T128); | |
669 diff_expon = diff_expon - ed2; | |
670 __mul_128x128_low (CQ, CQ, T128); | |
671 | |
672 } | |
673 | |
674 __div_256_by_128 (&CQ, &CA4, CY); | |
675 | |
676 | |
677 #ifdef SET_STATUS_FLAGS | |
678 if (CA4.w[0] || CA4.w[1]) { | |
679 // set status flags | |
680 __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
681 } | |
682 #ifndef LEAVE_TRAILING_ZEROS | |
683 else | |
684 #endif | |
685 #else | |
686 #ifndef LEAVE_TRAILING_ZEROS | |
687 if (!CA4.w[0] && !CA4.w[1]) | |
688 #endif | |
689 #endif | |
690 #ifndef LEAVE_TRAILING_ZEROS | |
691 // check whether result is exact | |
692 { | |
693 // check whether CX, CY are short | |
694 if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) { | |
695 i = (int) CY.w[0] - 1; | |
696 j = (int) CX.w[0] - 1; | |
697 // difference in powers of 2 factors for Y and X | |
698 nzeros = ed2 - factors[i][0] + factors[j][0]; | |
699 // difference in powers of 5 factors | |
700 d5 = ed2 - factors[i][1] + factors[j][1]; | |
701 if (d5 < nzeros) | |
702 nzeros = d5; | |
703 // get P*(2^M[extra_digits])/10^extra_digits | |
704 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]); | |
705 //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2]; | |
706 | |
707 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
708 amount = recip_scale[nzeros]; | |
709 __shr_128_long (CQ, Qh, amount); | |
710 | |
711 diff_expon += nzeros; | |
712 } else { | |
713 // decompose Q as Qh*10^17 + Ql | |
714 //T128 = reciprocals10_128[17]; | |
715 T128.w[0] = 0x44909befeb9fad49ull; | |
716 T128.w[1] = 0x000b877aa3236a4bull; | |
717 __mul_128x128_to_256 (P256, CQ, T128); | |
718 //amount = recip_scale[17]; | |
719 Q_high = (P256.w[2] >> 44) | (P256.w[3] << (64 - 44)); | |
720 Q_low = CQ.w[0] - Q_high * 100000000000000000ull; | |
721 | |
722 if (!Q_low) { | |
723 diff_expon += 17; | |
724 | |
725 tdigit[0] = Q_high & 0x3ffffff; | |
726 tdigit[1] = 0; | |
727 QX = Q_high >> 26; | |
728 QX32 = QX; | |
729 nzeros = 0; | |
730 | |
731 for (j = 0; QX32; j++, QX32 >>= 7) { | |
732 k = (QX32 & 127); | |
733 tdigit[0] += convert_table[j][k][0]; | |
734 tdigit[1] += convert_table[j][k][1]; | |
735 if (tdigit[0] >= 100000000) { | |
736 tdigit[0] -= 100000000; | |
737 tdigit[1]++; | |
738 } | |
739 } | |
740 | |
741 | |
742 if (tdigit[1] >= 100000000) { | |
743 tdigit[1] -= 100000000; | |
744 if (tdigit[1] >= 100000000) | |
745 tdigit[1] -= 100000000; | |
746 } | |
747 | |
748 digit = tdigit[0]; | |
749 if (!digit && !tdigit[1]) | |
750 nzeros += 16; | |
751 else { | |
752 if (!digit) { | |
753 nzeros += 8; | |
754 digit = tdigit[1]; | |
755 } | |
756 // decompose digit | |
757 PD = (UINT64) digit *0x068DB8BBull; | |
758 digit_h = (UINT32) (PD >> 40); | |
759 digit_low = digit - digit_h * 10000; | |
760 | |
761 if (!digit_low) | |
762 nzeros += 4; | |
763 else | |
764 digit_h = digit_low; | |
765 | |
766 if (!(digit_h & 1)) | |
767 nzeros += | |
768 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> | |
769 (digit_h & 7)); | |
770 } | |
771 | |
772 if (nzeros) { | |
773 __mul_64x64_to_128 (CQ, Q_high, reciprocals10_64[nzeros]); | |
774 | |
775 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-64 | |
776 amount = short_recip_scale[nzeros]; | |
777 CQ.w[0] = CQ.w[1] >> amount; | |
778 } else | |
779 CQ.w[0] = Q_high; | |
780 CQ.w[1] = 0; | |
781 | |
782 diff_expon += nzeros; | |
783 } else { | |
784 tdigit[0] = Q_low & 0x3ffffff; | |
785 tdigit[1] = 0; | |
786 QX = Q_low >> 26; | |
787 QX32 = QX; | |
788 nzeros = 0; | |
789 | |
790 for (j = 0; QX32; j++, QX32 >>= 7) { | |
791 k = (QX32 & 127); | |
792 tdigit[0] += convert_table[j][k][0]; | |
793 tdigit[1] += convert_table[j][k][1]; | |
794 if (tdigit[0] >= 100000000) { | |
795 tdigit[0] -= 100000000; | |
796 tdigit[1]++; | |
797 } | |
798 } | |
799 | |
800 if (tdigit[1] >= 100000000) { | |
801 tdigit[1] -= 100000000; | |
802 if (tdigit[1] >= 100000000) | |
803 tdigit[1] -= 100000000; | |
804 } | |
805 | |
806 digit = tdigit[0]; | |
807 if (!digit && !tdigit[1]) | |
808 nzeros += 16; | |
809 else { | |
810 if (!digit) { | |
811 nzeros += 8; | |
812 digit = tdigit[1]; | |
813 } | |
814 // decompose digit | |
815 PD = (UINT64) digit *0x068DB8BBull; | |
816 digit_h = (UINT32) (PD >> 40); | |
817 digit_low = digit - digit_h * 10000; | |
818 | |
819 if (!digit_low) | |
820 nzeros += 4; | |
821 else | |
822 digit_h = digit_low; | |
823 | |
824 if (!(digit_h & 1)) | |
825 nzeros += | |
826 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> | |
827 (digit_h & 7)); | |
828 } | |
829 | |
830 if (nzeros) { | |
831 // get P*(2^M[extra_digits])/10^extra_digits | |
832 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]); | |
833 | |
834 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
835 amount = recip_scale[nzeros]; | |
836 __shr_128 (CQ, Qh, amount); | |
837 } | |
838 diff_expon += nzeros; | |
839 | |
840 } | |
841 } | |
842 get_BID128(&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode,pfpsf); | |
843 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
844 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
845 #endif | |
846 BID_RETURN (res); | |
847 } | |
848 #endif | |
849 | |
850 if (diff_expon >= 0) { | |
851 #ifdef IEEE_ROUND_NEAREST | |
852 // rounding | |
853 // 2*CA4 - CY | |
854 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
855 CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
856 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
857 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
858 | |
859 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
860 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
861 | |
862 CQ.w[0] += carry64; | |
863 if (CQ.w[0] < carry64) | |
864 CQ.w[1]++; | |
865 #else | |
866 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY | |
867 // rounding | |
868 // 2*CA4 - CY | |
869 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
870 CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
871 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
872 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
873 | |
874 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
875 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
876 | |
877 CQ.w[0] += carry64; | |
878 if (CQ.w[0] < carry64) | |
879 CQ.w[1]++; | |
880 #else | |
881 rmode = rnd_mode; | |
882 if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) | |
883 rmode = 3 - rmode; | |
884 switch (rmode) { | |
885 case ROUNDING_TO_NEAREST: // round to nearest code | |
886 // rounding | |
887 // 2*CA4 - CY | |
888 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
889 CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
890 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
891 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
892 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
893 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
894 CQ.w[0] += carry64; | |
895 if (CQ.w[0] < carry64) | |
896 CQ.w[1]++; | |
897 break; | |
898 case ROUNDING_TIES_AWAY: | |
899 // rounding | |
900 // 2*CA4 - CY | |
901 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
902 CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
903 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
904 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
905 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
906 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
907 CQ.w[0] += carry64; | |
908 if (CQ.w[0] < carry64) | |
909 CQ.w[1]++; | |
910 break; | |
911 case ROUNDING_DOWN: | |
912 case ROUNDING_TO_ZERO: | |
913 break; | |
914 default: // rounding up | |
915 CQ.w[0]++; | |
916 if (!CQ.w[0]) | |
917 CQ.w[1]++; | |
918 break; | |
919 } | |
920 #endif | |
921 #endif | |
922 | |
923 } else { | |
924 #ifdef SET_STATUS_FLAGS | |
925 if (CA4.w[0] || CA4.w[1]) { | |
926 // set status flags | |
927 __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
928 } | |
929 #endif | |
930 handle_UF_128_rem (&res, sign_x ^ sign_y, diff_expon, CQ, | |
931 CA4.w[1] | CA4.w[0], &rnd_mode, pfpsf); | |
932 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
933 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
934 #endif | |
935 BID_RETURN (res); | |
936 | |
937 } | |
938 | |
939 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf); | |
940 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
941 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
942 #endif | |
943 BID_RETURN (res); | |
944 } | |
945 | |
946 | |
947 BID128_FUNCTION_ARGTYPE1_ARG128 (bid128dq_div, UINT64, x, y) | |
948 UINT256 CA4, CA4r, P256; | |
949 UINT128 CX, CY, T128, CQ, CR, CA, TP128, Qh, Ql, res; | |
950 UINT64 sign_x, sign_y, T, carry64, D, Q_high, Q_low, QX, valid_y, | |
951 PD; | |
952 int_float fx, fy, f64; | |
953 UINT32 QX32, tdigit[3], digit, digit_h, digit_low; | |
954 int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2, | |
955 digits_q, amount; | |
956 int nzeros, i, j, k, d5; | |
957 unsigned rmode; | |
958 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
959 fexcept_t binaryflags = 0; | |
960 #endif | |
961 | |
962 valid_y = unpack_BID128_value (&sign_y, &exponent_y, &CY, y); | |
963 | |
964 // unpack arguments, check for NaN or Infinity | |
965 CX.w[1] = 0; | |
966 if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], x)) { | |
967 #ifdef SET_STATUS_FLAGS | |
968 if ((y.w[1] & SNAN_MASK64) == SNAN_MASK64) // y is sNaN | |
969 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
970 #endif | |
971 | |
972 // test if x is NaN | |
973 if ((x & NAN_MASK64) == NAN_MASK64) { | |
974 #ifdef SET_STATUS_FLAGS | |
975 if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN | |
976 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
977 #endif | |
978 res.w[0] = (CX.w[0] & 0x0003ffffffffffffull); | |
979 __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]); | |
980 res.w[1] |= ((CX.w[0]) & 0xfc00000000000000ull); | |
981 BID_RETURN (res); | |
982 } | |
983 // x is Infinity? | |
984 if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
985 // check if y is Inf. | |
986 if (((y.w[1] & 0x7c00000000000000ull) == 0x7800000000000000ull)) | |
987 // return NaN | |
988 { | |
989 #ifdef SET_STATUS_FLAGS | |
990 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
991 #endif | |
992 res.w[1] = 0x7c00000000000000ull; | |
993 res.w[0] = 0; | |
994 BID_RETURN (res); | |
995 } | |
996 if (((y.w[1] & 0x7c00000000000000ull) != 0x7c00000000000000ull)) { | |
997 // otherwise return +/-Inf | |
998 res.w[1] = | |
999 ((x ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull; | |
1000 res.w[0] = 0; | |
1001 BID_RETURN (res); | |
1002 } | |
1003 } | |
1004 // x is 0 | |
1005 if ((y.w[1] & INFINITY_MASK64) != INFINITY_MASK64) { | |
1006 if ((!CY.w[0]) && !(CY.w[1] & 0x0001ffffffffffffull)) { | |
1007 #ifdef SET_STATUS_FLAGS | |
1008 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
1009 #endif | |
1010 // x=y=0, return NaN | |
1011 res.w[1] = 0x7c00000000000000ull; | |
1012 res.w[0] = 0; | |
1013 BID_RETURN (res); | |
1014 } | |
1015 // return 0 | |
1016 res.w[1] = (x ^ y.w[1]) & 0x8000000000000000ull; | |
1017 exponent_x = exponent_x - exponent_y + (DECIMAL_EXPONENT_BIAS_128<<1) - DECIMAL_EXPONENT_BIAS; | |
1018 if (exponent_x > DECIMAL_MAX_EXPON_128) | |
1019 exponent_x = DECIMAL_MAX_EXPON_128; | |
1020 else if (exponent_x < 0) | |
1021 exponent_x = 0; | |
1022 res.w[1] |= (((UINT64) exponent_x) << 49); | |
1023 res.w[0] = 0; | |
1024 BID_RETURN (res); | |
1025 } | |
1026 } | |
1027 exponent_x += (DECIMAL_EXPONENT_BIAS_128 - DECIMAL_EXPONENT_BIAS); | |
1028 | |
1029 if (!valid_y) { | |
1030 // y is Inf. or NaN | |
1031 | |
1032 // test if y is NaN | |
1033 if ((y.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { | |
1034 #ifdef SET_STATUS_FLAGS | |
1035 if ((y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN | |
1036 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
1037 #endif | |
1038 res.w[1] = CY.w[1] & QUIET_MASK64; | |
1039 res.w[0] = CY.w[0]; | |
1040 BID_RETURN (res); | |
1041 } | |
1042 // y is Infinity? | |
1043 if ((y.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
1044 // return +/-0 | |
1045 res.w[1] = sign_x ^ sign_y; | |
1046 res.w[0] = 0; | |
1047 BID_RETURN (res); | |
1048 } | |
1049 // y is 0, return +/-Inf | |
1050 res.w[1] = | |
1051 ((x ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull; | |
1052 res.w[0] = 0; | |
1053 #ifdef SET_STATUS_FLAGS | |
1054 __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); | |
1055 #endif | |
1056 BID_RETURN (res); | |
1057 } | |
1058 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1059 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1060 #endif | |
1061 diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128; | |
1062 | |
1063 if (__unsigned_compare_gt_128 (CY, CX)) { | |
1064 // CX < CY | |
1065 | |
1066 // 2^64 | |
1067 f64.i = 0x5f800000; | |
1068 | |
1069 // fx ~ CX, fy ~ CY | |
1070 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; | |
1071 fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0]; | |
1072 // expon_cy - expon_cx | |
1073 bin_index = (fy.i - fx.i) >> 23; | |
1074 | |
1075 if (CX.w[1]) { | |
1076 T = power10_index_binexp_128[bin_index].w[0]; | |
1077 __mul_64x128_short (CA, T, CX); | |
1078 } else { | |
1079 T128 = power10_index_binexp_128[bin_index]; | |
1080 __mul_64x128_short (CA, CX.w[0], T128); | |
1081 } | |
1082 | |
1083 ed2 = 33; | |
1084 if (__unsigned_compare_gt_128 (CY, CA)) | |
1085 ed2++; | |
1086 | |
1087 T128 = power10_table_128[ed2]; | |
1088 __mul_128x128_to_256 (CA4, CA, T128); | |
1089 | |
1090 ed2 += estimate_decimal_digits[bin_index]; | |
1091 CQ.w[0] = CQ.w[1] = 0; | |
1092 diff_expon = diff_expon - ed2; | |
1093 | |
1094 } else { | |
1095 // get CQ = CX/CY | |
1096 __div_128_by_128 (&CQ, &CR, CX, CY); | |
1097 | |
1098 if (!CR.w[1] && !CR.w[0]) { | |
1099 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, | |
1100 pfpsf); | |
1101 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1102 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1103 #endif | |
1104 BID_RETURN (res); | |
1105 } | |
1106 // get number of decimal digits in CQ | |
1107 // 2^64 | |
1108 f64.i = 0x5f800000; | |
1109 fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0]; | |
1110 // binary expon. of CQ | |
1111 bin_expon = (fx.i - 0x3f800000) >> 23; | |
1112 | |
1113 digits_q = estimate_decimal_digits[bin_expon]; | |
1114 TP128.w[0] = power10_index_binexp_128[bin_expon].w[0]; | |
1115 TP128.w[1] = power10_index_binexp_128[bin_expon].w[1]; | |
1116 if (__unsigned_compare_ge_128 (CQ, TP128)) | |
1117 digits_q++; | |
1118 | |
1119 ed2 = 34 - digits_q; | |
1120 T128.w[0] = power10_table_128[ed2].w[0]; | |
1121 T128.w[1] = power10_table_128[ed2].w[1]; | |
1122 __mul_128x128_to_256 (CA4, CR, T128); | |
1123 diff_expon = diff_expon - ed2; | |
1124 __mul_128x128_low (CQ, CQ, T128); | |
1125 | |
1126 } | |
1127 | |
1128 __div_256_by_128 (&CQ, &CA4, CY); | |
1129 | |
1130 #ifdef SET_STATUS_FLAGS | |
1131 if (CA4.w[0] || CA4.w[1]) { | |
1132 // set status flags | |
1133 __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
1134 } | |
1135 #ifndef LEAVE_TRAILING_ZEROS | |
1136 else | |
1137 #endif | |
1138 #else | |
1139 #ifndef LEAVE_TRAILING_ZEROS | |
1140 if (!CA4.w[0] && !CA4.w[1]) | |
1141 #endif | |
1142 #endif | |
1143 #ifndef LEAVE_TRAILING_ZEROS | |
1144 // check whether result is exact | |
1145 { | |
1146 //printf("ed2=%d,nz=%d,a=%d,CQ="LX16","LX16", RH="LX16", RL="LX16"\n",ed2,nzeros,amount,CQ.w[1],CQ.w[0],reciprocals10_128[nzeros].w[1],reciprocals10_128[nzeros].w[0]);fflush(stdout); | |
1147 // check whether CX, CY are short | |
1148 if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) { | |
1149 i = (int) CY.w[0] - 1; | |
1150 j = (int) CX.w[0] - 1; | |
1151 // difference in powers of 2 factors for Y and X | |
1152 nzeros = ed2 - factors[i][0] + factors[j][0]; | |
1153 // difference in powers of 5 factors | |
1154 d5 = ed2 - factors[i][1] + factors[j][1]; | |
1155 if (d5 < nzeros) | |
1156 nzeros = d5; | |
1157 // get P*(2^M[extra_digits])/10^extra_digits | |
1158 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]); | |
1159 //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2]; | |
1160 | |
1161 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
1162 amount = recip_scale[nzeros]; | |
1163 __shr_128_long (CQ, Qh, amount); | |
1164 | |
1165 diff_expon += nzeros; | |
1166 } else { | |
1167 // decompose Q as Qh*10^17 + Ql | |
1168 //T128 = reciprocals10_128[17]; | |
1169 T128.w[0] = 0x44909befeb9fad49ull; | |
1170 T128.w[1] = 0x000b877aa3236a4bull; | |
1171 __mul_128x128_to_256 (P256, CQ, T128); | |
1172 //amount = recip_scale[17]; | |
1173 Q_high = (P256.w[2] >> 44) | (P256.w[3] << (64 - 44)); | |
1174 Q_low = CQ.w[0] - Q_high * 100000000000000000ull; | |
1175 | |
1176 if (!Q_low) { | |
1177 diff_expon += 17; | |
1178 | |
1179 tdigit[0] = Q_high & 0x3ffffff; | |
1180 tdigit[1] = 0; | |
1181 QX = Q_high >> 26; | |
1182 QX32 = QX; | |
1183 nzeros = 0; | |
1184 | |
1185 for (j = 0; QX32; j++, QX32 >>= 7) { | |
1186 k = (QX32 & 127); | |
1187 tdigit[0] += convert_table[j][k][0]; | |
1188 tdigit[1] += convert_table[j][k][1]; | |
1189 if (tdigit[0] >= 100000000) { | |
1190 tdigit[0] -= 100000000; | |
1191 tdigit[1]++; | |
1192 } | |
1193 } | |
1194 | |
1195 | |
1196 if (tdigit[1] >= 100000000) { | |
1197 tdigit[1] -= 100000000; | |
1198 if (tdigit[1] >= 100000000) | |
1199 tdigit[1] -= 100000000; | |
1200 } | |
1201 | |
1202 digit = tdigit[0]; | |
1203 if (!digit && !tdigit[1]) | |
1204 nzeros += 16; | |
1205 else { | |
1206 if (!digit) { | |
1207 nzeros += 8; | |
1208 digit = tdigit[1]; | |
1209 } | |
1210 // decompose digit | |
1211 PD = (UINT64) digit *0x068DB8BBull; | |
1212 digit_h = (UINT32) (PD >> 40); | |
1213 //printf("i=%d, nz=%d, digit=%d (%d, %016I64x %016I64x)\n",i,nzeros,digit_h,digit,PD,digit_h);fflush(stdout); | |
1214 digit_low = digit - digit_h * 10000; | |
1215 | |
1216 if (!digit_low) | |
1217 nzeros += 4; | |
1218 else | |
1219 digit_h = digit_low; | |
1220 | |
1221 if (!(digit_h & 1)) | |
1222 nzeros += | |
1223 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> | |
1224 (digit_h & 7)); | |
1225 } | |
1226 | |
1227 if (nzeros) { | |
1228 __mul_64x64_to_128 (CQ, Q_high, reciprocals10_64[nzeros]); | |
1229 | |
1230 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-64 | |
1231 amount = short_recip_scale[nzeros]; | |
1232 CQ.w[0] = CQ.w[1] >> amount; | |
1233 } else | |
1234 CQ.w[0] = Q_high; | |
1235 CQ.w[1] = 0; | |
1236 | |
1237 diff_expon += nzeros; | |
1238 } else { | |
1239 tdigit[0] = Q_low & 0x3ffffff; | |
1240 tdigit[1] = 0; | |
1241 QX = Q_low >> 26; | |
1242 QX32 = QX; | |
1243 nzeros = 0; | |
1244 | |
1245 for (j = 0; QX32; j++, QX32 >>= 7) { | |
1246 k = (QX32 & 127); | |
1247 tdigit[0] += convert_table[j][k][0]; | |
1248 tdigit[1] += convert_table[j][k][1]; | |
1249 if (tdigit[0] >= 100000000) { | |
1250 tdigit[0] -= 100000000; | |
1251 tdigit[1]++; | |
1252 } | |
1253 } | |
1254 | |
1255 if (tdigit[1] >= 100000000) { | |
1256 tdigit[1] -= 100000000; | |
1257 if (tdigit[1] >= 100000000) | |
1258 tdigit[1] -= 100000000; | |
1259 } | |
1260 | |
1261 digit = tdigit[0]; | |
1262 if (!digit && !tdigit[1]) | |
1263 nzeros += 16; | |
1264 else { | |
1265 if (!digit) { | |
1266 nzeros += 8; | |
1267 digit = tdigit[1]; | |
1268 } | |
1269 // decompose digit | |
1270 PD = (UINT64) digit *0x068DB8BBull; | |
1271 digit_h = (UINT32) (PD >> 40); | |
1272 //printf("i=%d, nz=%d, digit=%d (%d, %016I64x %016I64x)\n",i,nzeros,digit_h,digit,PD,digit_h);fflush(stdout); | |
1273 digit_low = digit - digit_h * 10000; | |
1274 | |
1275 if (!digit_low) | |
1276 nzeros += 4; | |
1277 else | |
1278 digit_h = digit_low; | |
1279 | |
1280 if (!(digit_h & 1)) | |
1281 nzeros += | |
1282 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> | |
1283 (digit_h & 7)); | |
1284 } | |
1285 | |
1286 if (nzeros) { | |
1287 // get P*(2^M[extra_digits])/10^extra_digits | |
1288 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]); | |
1289 | |
1290 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
1291 amount = recip_scale[nzeros]; | |
1292 __shr_128 (CQ, Qh, amount); | |
1293 } | |
1294 diff_expon += nzeros; | |
1295 | |
1296 } | |
1297 } | |
1298 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, | |
1299 pfpsf); | |
1300 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1301 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1302 #endif | |
1303 BID_RETURN (res); | |
1304 } | |
1305 #endif | |
1306 | |
1307 if (diff_expon >= 0) { | |
1308 #ifdef IEEE_ROUND_NEAREST | |
1309 // rounding | |
1310 // 2*CA4 - CY | |
1311 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1312 CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1313 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1314 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1315 | |
1316 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
1317 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
1318 | |
1319 CQ.w[0] += carry64; | |
1320 if (CQ.w[0] < carry64) | |
1321 CQ.w[1]++; | |
1322 #else | |
1323 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY | |
1324 // rounding | |
1325 // 2*CA4 - CY | |
1326 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1327 CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1328 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1329 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1330 | |
1331 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
1332 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
1333 | |
1334 CQ.w[0] += carry64; | |
1335 if (CQ.w[0] < carry64) | |
1336 CQ.w[1]++; | |
1337 #else | |
1338 rmode = rnd_mode; | |
1339 if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) | |
1340 rmode = 3 - rmode; | |
1341 switch (rmode) { | |
1342 case ROUNDING_TO_NEAREST: // round to nearest code | |
1343 // rounding | |
1344 // 2*CA4 - CY | |
1345 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1346 CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1347 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1348 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1349 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
1350 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
1351 CQ.w[0] += carry64; | |
1352 if (CQ.w[0] < carry64) | |
1353 CQ.w[1]++; | |
1354 break; | |
1355 case ROUNDING_TIES_AWAY: | |
1356 // rounding | |
1357 // 2*CA4 - CY | |
1358 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1359 CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1360 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1361 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1362 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
1363 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
1364 CQ.w[0] += carry64; | |
1365 if (CQ.w[0] < carry64) | |
1366 CQ.w[1]++; | |
1367 break; | |
1368 case ROUNDING_DOWN: | |
1369 case ROUNDING_TO_ZERO: | |
1370 break; | |
1371 default: // rounding up | |
1372 CQ.w[0]++; | |
1373 if (!CQ.w[0]) | |
1374 CQ.w[1]++; | |
1375 break; | |
1376 } | |
1377 #endif | |
1378 #endif | |
1379 | |
1380 } else { | |
1381 #ifdef SET_STATUS_FLAGS | |
1382 if (CA4.w[0] || CA4.w[1]) { | |
1383 // set status flags | |
1384 __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
1385 } | |
1386 #endif | |
1387 handle_UF_128_rem (&res, sign_x ^ sign_y, diff_expon, CQ, | |
1388 CA4.w[1] | CA4.w[0], &rnd_mode, pfpsf); | |
1389 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1390 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1391 #endif | |
1392 BID_RETURN (res); | |
1393 } | |
1394 | |
1395 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf); | |
1396 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1397 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1398 #endif | |
1399 BID_RETURN (res); | |
1400 | |
1401 } | |
1402 | |
1403 | |
1404 BID128_FUNCTION_ARG128_ARGTYPE2 (bid128qd_div, x, UINT64, y) | |
1405 UINT256 CA4, CA4r, P256; | |
1406 UINT128 CX, CY, T128, CQ, CR, CA, TP128, Qh, Ql, res; | |
1407 UINT64 sign_x, sign_y, T, carry64, D, Q_high, Q_low, QX, PD, | |
1408 valid_y; | |
1409 int_float fx, fy, f64; | |
1410 UINT32 QX32, tdigit[3], digit, digit_h, digit_low; | |
1411 int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2, | |
1412 digits_q, amount; | |
1413 int nzeros, i, j, k, d5, rmode; | |
1414 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1415 fexcept_t binaryflags = 0; | |
1416 #endif | |
1417 | |
1418 | |
1419 valid_y = unpack_BID64 (&sign_y, &exponent_y, &CY.w[0], y); | |
1420 // unpack arguments, check for NaN or Infinity | |
1421 if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) { | |
1422 // test if x is NaN | |
1423 if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { | |
1424 #ifdef SET_STATUS_FLAGS | |
1425 if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull || // sNaN | |
1426 (y & 0x7e00000000000000ull) == 0x7e00000000000000ull) | |
1427 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
1428 #endif | |
1429 res.w[1] = (CX.w[1]) & QUIET_MASK64; | |
1430 res.w[0] = CX.w[0]; | |
1431 BID_RETURN (res); | |
1432 } | |
1433 // x is Infinity? | |
1434 if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
1435 // check if y is Inf. | |
1436 if (((y & 0x7c00000000000000ull) == 0x7800000000000000ull)) | |
1437 // return NaN | |
1438 { | |
1439 #ifdef SET_STATUS_FLAGS | |
1440 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
1441 #endif | |
1442 res.w[1] = 0x7c00000000000000ull; | |
1443 res.w[0] = 0; | |
1444 BID_RETURN (res); | |
1445 } | |
1446 // y is NaN? | |
1447 if (((y & 0x7c00000000000000ull) != 0x7c00000000000000ull)) | |
1448 // return NaN | |
1449 { | |
1450 // return +/-Inf | |
1451 res.w[1] = ((x.w[1] ^ y) & 0x8000000000000000ull) | | |
1452 0x7800000000000000ull; | |
1453 res.w[0] = 0; | |
1454 BID_RETURN (res); | |
1455 } | |
1456 } | |
1457 // x is 0 | |
1458 if ((y & 0x7800000000000000ull) < 0x7800000000000000ull) { | |
1459 if (!CY.w[0]) { | |
1460 #ifdef SET_STATUS_FLAGS | |
1461 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
1462 #endif | |
1463 // x=y=0, return NaN | |
1464 res.w[1] = 0x7c00000000000000ull; | |
1465 res.w[0] = 0; | |
1466 BID_RETURN (res); | |
1467 } | |
1468 // return 0 | |
1469 res.w[1] = (x.w[1] ^ y) & 0x8000000000000000ull; | |
1470 exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS; | |
1471 if (exponent_x > DECIMAL_MAX_EXPON_128) | |
1472 exponent_x = DECIMAL_MAX_EXPON_128; | |
1473 else if (exponent_x < 0) | |
1474 exponent_x = 0; | |
1475 res.w[1] |= (((UINT64) exponent_x) << 49); | |
1476 res.w[0] = 0; | |
1477 BID_RETURN (res); | |
1478 } | |
1479 } | |
1480 CY.w[1] = 0; | |
1481 if (!valid_y) { | |
1482 // y is Inf. or NaN | |
1483 | |
1484 // test if y is NaN | |
1485 if ((y & NAN_MASK64) == NAN_MASK64) { | |
1486 #ifdef SET_STATUS_FLAGS | |
1487 if ((y & SNAN_MASK64) == SNAN_MASK64) // sNaN | |
1488 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
1489 #endif | |
1490 res.w[0] = (CY.w[0] & 0x0003ffffffffffffull); | |
1491 __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]); | |
1492 res.w[1] |= ((CY.w[0]) & 0xfc00000000000000ull); | |
1493 BID_RETURN (res); | |
1494 } | |
1495 // y is Infinity? | |
1496 if ((y & INFINITY_MASK64) == INFINITY_MASK64) { | |
1497 // return +/-0 | |
1498 res.w[1] = ((x.w[1] ^ y) & 0x8000000000000000ull); | |
1499 res.w[0] = 0; | |
1500 BID_RETURN (res); | |
1501 } | |
1502 // y is 0 | |
1503 #ifdef SET_STATUS_FLAGS | |
1504 __set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); | |
1505 #endif | |
1506 res.w[1] = (sign_x ^ sign_y) | INFINITY_MASK64; | |
1507 res.w[0] = 0; | |
1508 BID_RETURN (res); | |
1509 } | |
1510 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1511 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1512 #endif | |
1513 diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS; | |
1514 | |
1515 if (__unsigned_compare_gt_128 (CY, CX)) { | |
1516 // CX < CY | |
1517 | |
1518 // 2^64 | |
1519 f64.i = 0x5f800000; | |
1520 | |
1521 // fx ~ CX, fy ~ CY | |
1522 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; | |
1523 fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0]; | |
1524 // expon_cy - expon_cx | |
1525 bin_index = (fy.i - fx.i) >> 23; | |
1526 | |
1527 if (CX.w[1]) { | |
1528 T = power10_index_binexp_128[bin_index].w[0]; | |
1529 __mul_64x128_short (CA, T, CX); | |
1530 } else { | |
1531 T128 = power10_index_binexp_128[bin_index]; | |
1532 __mul_64x128_short (CA, CX.w[0], T128); | |
1533 } | |
1534 | |
1535 ed2 = 33; | |
1536 if (__unsigned_compare_gt_128 (CY, CA)) | |
1537 ed2++; | |
1538 | |
1539 T128 = power10_table_128[ed2]; | |
1540 __mul_128x128_to_256 (CA4, CA, T128); | |
1541 | |
1542 ed2 += estimate_decimal_digits[bin_index]; | |
1543 CQ.w[0] = CQ.w[1] = 0; | |
1544 diff_expon = diff_expon - ed2; | |
1545 | |
1546 } else { | |
1547 // get CQ = CX/CY | |
1548 __div_128_by_128 (&CQ, &CR, CX, CY); | |
1549 | |
1550 if (!CR.w[1] && !CR.w[0]) { | |
1551 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, | |
1552 pfpsf); | |
1553 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1554 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1555 #endif | |
1556 BID_RETURN (res); | |
1557 } | |
1558 // get number of decimal digits in CQ | |
1559 // 2^64 | |
1560 f64.i = 0x5f800000; | |
1561 fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0]; | |
1562 // binary expon. of CQ | |
1563 bin_expon = (fx.i - 0x3f800000) >> 23; | |
1564 | |
1565 digits_q = estimate_decimal_digits[bin_expon]; | |
1566 TP128.w[0] = power10_index_binexp_128[bin_expon].w[0]; | |
1567 TP128.w[1] = power10_index_binexp_128[bin_expon].w[1]; | |
1568 if (__unsigned_compare_ge_128 (CQ, TP128)) | |
1569 digits_q++; | |
1570 | |
1571 ed2 = 34 - digits_q; | |
1572 T128.w[0] = power10_table_128[ed2].w[0]; | |
1573 T128.w[1] = power10_table_128[ed2].w[1]; | |
1574 __mul_128x128_to_256 (CA4, CR, T128); | |
1575 diff_expon = diff_expon - ed2; | |
1576 __mul_128x128_low (CQ, CQ, T128); | |
1577 | |
1578 } | |
1579 | |
1580 __div_256_by_128 (&CQ, &CA4, CY); | |
1581 | |
1582 | |
1583 #ifdef SET_STATUS_FLAGS | |
1584 if (CA4.w[0] || CA4.w[1]) { | |
1585 // set status flags | |
1586 __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
1587 } | |
1588 #ifndef LEAVE_TRAILING_ZEROS | |
1589 else | |
1590 #endif | |
1591 #else | |
1592 #ifndef LEAVE_TRAILING_ZEROS | |
1593 if (!CA4.w[0] && !CA4.w[1]) | |
1594 #endif | |
1595 #endif | |
1596 #ifndef LEAVE_TRAILING_ZEROS | |
1597 // check whether result is exact | |
1598 { | |
1599 // check whether CX, CY are short | |
1600 if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) { | |
1601 i = (int) CY.w[0] - 1; | |
1602 j = (int) CX.w[0] - 1; | |
1603 // difference in powers of 2 factors for Y and X | |
1604 nzeros = ed2 - factors[i][0] + factors[j][0]; | |
1605 // difference in powers of 5 factors | |
1606 d5 = ed2 - factors[i][1] + factors[j][1]; | |
1607 if (d5 < nzeros) | |
1608 nzeros = d5; | |
1609 // get P*(2^M[extra_digits])/10^extra_digits | |
1610 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]); | |
1611 //__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2]; | |
1612 | |
1613 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
1614 amount = recip_scale[nzeros]; | |
1615 __shr_128_long (CQ, Qh, amount); | |
1616 | |
1617 diff_expon += nzeros; | |
1618 } else { | |
1619 // decompose Q as Qh*10^17 + Ql | |
1620 //T128 = reciprocals10_128[17]; | |
1621 T128.w[0] = 0x44909befeb9fad49ull; | |
1622 T128.w[1] = 0x000b877aa3236a4bull; | |
1623 __mul_128x128_to_256 (P256, CQ, T128); | |
1624 //amount = recip_scale[17]; | |
1625 Q_high = (P256.w[2] >> 44) | (P256.w[3] << (64 - 44)); | |
1626 Q_low = CQ.w[0] - Q_high * 100000000000000000ull; | |
1627 | |
1628 if (!Q_low) { | |
1629 diff_expon += 17; | |
1630 | |
1631 tdigit[0] = Q_high & 0x3ffffff; | |
1632 tdigit[1] = 0; | |
1633 QX = Q_high >> 26; | |
1634 QX32 = QX; | |
1635 nzeros = 0; | |
1636 | |
1637 for (j = 0; QX32; j++, QX32 >>= 7) { | |
1638 k = (QX32 & 127); | |
1639 tdigit[0] += convert_table[j][k][0]; | |
1640 tdigit[1] += convert_table[j][k][1]; | |
1641 if (tdigit[0] >= 100000000) { | |
1642 tdigit[0] -= 100000000; | |
1643 tdigit[1]++; | |
1644 } | |
1645 } | |
1646 | |
1647 | |
1648 if (tdigit[1] >= 100000000) { | |
1649 tdigit[1] -= 100000000; | |
1650 if (tdigit[1] >= 100000000) | |
1651 tdigit[1] -= 100000000; | |
1652 } | |
1653 | |
1654 digit = tdigit[0]; | |
1655 if (!digit && !tdigit[1]) | |
1656 nzeros += 16; | |
1657 else { | |
1658 if (!digit) { | |
1659 nzeros += 8; | |
1660 digit = tdigit[1]; | |
1661 } | |
1662 // decompose digit | |
1663 PD = (UINT64) digit *0x068DB8BBull; | |
1664 digit_h = (UINT32) (PD >> 40); | |
1665 digit_low = digit - digit_h * 10000; | |
1666 | |
1667 if (!digit_low) | |
1668 nzeros += 4; | |
1669 else | |
1670 digit_h = digit_low; | |
1671 | |
1672 if (!(digit_h & 1)) | |
1673 nzeros += | |
1674 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> | |
1675 (digit_h & 7)); | |
1676 } | |
1677 | |
1678 if (nzeros) { | |
1679 __mul_64x64_to_128 (CQ, Q_high, reciprocals10_64[nzeros]); | |
1680 | |
1681 // now get P/10^extra_digits: shift C64 right by M[extra_digits]-64 | |
1682 amount = short_recip_scale[nzeros]; | |
1683 CQ.w[0] = CQ.w[1] >> amount; | |
1684 } else | |
1685 CQ.w[0] = Q_high; | |
1686 CQ.w[1] = 0; | |
1687 | |
1688 diff_expon += nzeros; | |
1689 } else { | |
1690 tdigit[0] = Q_low & 0x3ffffff; | |
1691 tdigit[1] = 0; | |
1692 QX = Q_low >> 26; | |
1693 QX32 = QX; | |
1694 nzeros = 0; | |
1695 | |
1696 for (j = 0; QX32; j++, QX32 >>= 7) { | |
1697 k = (QX32 & 127); | |
1698 tdigit[0] += convert_table[j][k][0]; | |
1699 tdigit[1] += convert_table[j][k][1]; | |
1700 if (tdigit[0] >= 100000000) { | |
1701 tdigit[0] -= 100000000; | |
1702 tdigit[1]++; | |
1703 } | |
1704 } | |
1705 | |
1706 if (tdigit[1] >= 100000000) { | |
1707 tdigit[1] -= 100000000; | |
1708 if (tdigit[1] >= 100000000) | |
1709 tdigit[1] -= 100000000; | |
1710 } | |
1711 | |
1712 digit = tdigit[0]; | |
1713 if (!digit && !tdigit[1]) | |
1714 nzeros += 16; | |
1715 else { | |
1716 if (!digit) { | |
1717 nzeros += 8; | |
1718 digit = tdigit[1]; | |
1719 } | |
1720 // decompose digit | |
1721 PD = (UINT64) digit *0x068DB8BBull; | |
1722 digit_h = (UINT32) (PD >> 40); | |
1723 digit_low = digit - digit_h * 10000; | |
1724 | |
1725 if (!digit_low) | |
1726 nzeros += 4; | |
1727 else | |
1728 digit_h = digit_low; | |
1729 | |
1730 if (!(digit_h & 1)) | |
1731 nzeros += | |
1732 3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> | |
1733 (digit_h & 7)); | |
1734 } | |
1735 | |
1736 if (nzeros) { | |
1737 // get P*(2^M[extra_digits])/10^extra_digits | |
1738 __mul_128x128_full (Qh, Ql, CQ, reciprocals10_128[nzeros]); | |
1739 | |
1740 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 | |
1741 amount = recip_scale[nzeros]; | |
1742 __shr_128 (CQ, Qh, amount); | |
1743 } | |
1744 diff_expon += nzeros; | |
1745 | |
1746 } | |
1747 } | |
1748 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode,pfpsf); | |
1749 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1750 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1751 #endif | |
1752 BID_RETURN (res); | |
1753 } | |
1754 #endif | |
1755 | |
1756 if (diff_expon >= 0) { | |
1757 #ifdef IEEE_ROUND_NEAREST | |
1758 // rounding | |
1759 // 2*CA4 - CY | |
1760 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1761 CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1762 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1763 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1764 | |
1765 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
1766 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
1767 | |
1768 CQ.w[0] += carry64; | |
1769 if (CQ.w[0] < carry64) | |
1770 CQ.w[1]++; | |
1771 #else | |
1772 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY | |
1773 // rounding | |
1774 // 2*CA4 - CY | |
1775 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1776 CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1777 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1778 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1779 | |
1780 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
1781 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
1782 | |
1783 CQ.w[0] += carry64; | |
1784 if (CQ.w[0] < carry64) | |
1785 CQ.w[1]++; | |
1786 #else | |
1787 rmode = rnd_mode; | |
1788 if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) | |
1789 rmode = 3 - rmode; | |
1790 switch (rmode) { | |
1791 case ROUNDING_TO_NEAREST: // round to nearest code | |
1792 // rounding | |
1793 // 2*CA4 - CY | |
1794 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1795 CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1796 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1797 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1798 D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; | |
1799 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); | |
1800 CQ.w[0] += carry64; | |
1801 if (CQ.w[0] < carry64) | |
1802 CQ.w[1]++; | |
1803 break; | |
1804 case ROUNDING_TIES_AWAY: | |
1805 // rounding | |
1806 // 2*CA4 - CY | |
1807 CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); | |
1808 CA4r.w[0] = CA4.w[0] + CA4.w[0]; | |
1809 __sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); | |
1810 CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; | |
1811 D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; | |
1812 carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; | |
1813 CQ.w[0] += carry64; | |
1814 if (CQ.w[0] < carry64) | |
1815 CQ.w[1]++; | |
1816 break; | |
1817 case ROUNDING_DOWN: | |
1818 case ROUNDING_TO_ZERO: | |
1819 break; | |
1820 default: // rounding up | |
1821 CQ.w[0]++; | |
1822 if (!CQ.w[0]) | |
1823 CQ.w[1]++; | |
1824 break; | |
1825 } | |
1826 #endif | |
1827 #endif | |
1828 | |
1829 } else { | |
1830 #ifdef SET_STATUS_FLAGS | |
1831 if (CA4.w[0] || CA4.w[1]) { | |
1832 // set status flags | |
1833 __set_status_flags (pfpsf, INEXACT_EXCEPTION); | |
1834 } | |
1835 #endif | |
1836 handle_UF_128_rem (&res, sign_x ^ sign_y, diff_expon, CQ, | |
1837 CA4.w[1] | CA4.w[0], &rnd_mode, pfpsf); | |
1838 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1839 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1840 #endif | |
1841 BID_RETURN (res); | |
1842 | |
1843 } | |
1844 | |
1845 get_BID128 (&res, sign_x ^ sign_y, diff_expon, CQ, &rnd_mode, pfpsf); | |
1846 #ifdef UNCHANGED_BINARY_STATUS_FLAGS | |
1847 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); | |
1848 #endif | |
1849 BID_RETURN (res); | |
1850 | |
1851 } |