Mercurial > hg > CbC > CbC_gcc
comparison libgcc/config/libbid/bid128_add.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 04ced10e8804 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Copyright (C) 2007, 2009 Free Software Foundation, Inc. | |
2 | |
3 This file is part of GCC. | |
4 | |
5 GCC is free software; you can redistribute it and/or modify it under | |
6 the terms of the GNU General Public License as published by the Free | |
7 Software Foundation; either version 3, or (at your option) any later | |
8 version. | |
9 | |
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 for more details. | |
14 | |
15 Under Section 7 of GPL version 3, you are granted additional | |
16 permissions described in the GCC Runtime Library Exception, version | |
17 3.1, as published by the Free Software Foundation. | |
18 | |
19 You should have received a copy of the GNU General Public License and | |
20 a copy of the GCC Runtime Library Exception along with this program; | |
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 <http://www.gnu.org/licenses/>. */ | |
23 | |
24 #include "bid_internal.h" | |
25 | |
26 | |
27 #if DECIMAL_CALL_BY_REFERENCE | |
28 void | |
29 bid64dq_add (UINT64 * pres, UINT64 * px, UINT128 * py | |
30 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
31 _EXC_INFO_PARAM) { | |
32 UINT64 x = *px; | |
33 #if !DECIMAL_GLOBAL_ROUNDING | |
34 unsigned int rnd_mode = *prnd_mode; | |
35 #endif | |
36 #else | |
37 UINT64 | |
38 bid64dq_add (UINT64 x, UINT128 y | |
39 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
40 _EXC_INFO_PARAM) { | |
41 #endif | |
42 UINT64 res = 0xbaddbaddbaddbaddull; | |
43 UINT128 x1; | |
44 | |
45 #if DECIMAL_CALL_BY_REFERENCE | |
46 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
47 bid64qq_add (&res, &x1, py | |
48 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
49 _EXC_INFO_ARG); | |
50 #else | |
51 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
52 res = bid64qq_add (x1, y | |
53 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
54 _EXC_INFO_ARG); | |
55 #endif | |
56 BID_RETURN (res); | |
57 } | |
58 | |
59 | |
60 #if DECIMAL_CALL_BY_REFERENCE | |
61 void | |
62 bid64qd_add (UINT64 * pres, UINT128 * px, UINT64 * py | |
63 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
64 _EXC_INFO_PARAM) { | |
65 UINT64 y = *py; | |
66 #if !DECIMAL_GLOBAL_ROUNDING | |
67 unsigned int rnd_mode = *prnd_mode; | |
68 #endif | |
69 #else | |
70 UINT64 | |
71 bid64qd_add (UINT128 x, UINT64 y | |
72 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
73 _EXC_INFO_PARAM) { | |
74 #endif | |
75 UINT64 res = 0xbaddbaddbaddbaddull; | |
76 UINT128 y1; | |
77 | |
78 #if DECIMAL_CALL_BY_REFERENCE | |
79 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
80 bid64qq_add (&res, px, &y1 | |
81 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
82 _EXC_INFO_ARG); | |
83 #else | |
84 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
85 res = bid64qq_add (x, y1 | |
86 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
87 _EXC_INFO_ARG); | |
88 #endif | |
89 BID_RETURN (res); | |
90 } | |
91 | |
92 | |
93 #if DECIMAL_CALL_BY_REFERENCE | |
94 void | |
95 bid64qq_add (UINT64 * pres, UINT128 * px, UINT128 * py | |
96 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
97 _EXC_INFO_PARAM) { | |
98 UINT128 x = *px, y = *py; | |
99 #if !DECIMAL_GLOBAL_ROUNDING | |
100 unsigned int rnd_mode = *prnd_mode; | |
101 #endif | |
102 #else | |
103 UINT64 | |
104 bid64qq_add (UINT128 x, UINT128 y | |
105 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
106 _EXC_INFO_PARAM) { | |
107 #endif | |
108 | |
109 UINT128 one = { {0x0000000000000001ull, 0x3040000000000000ull} | |
110 }; | |
111 UINT64 res = 0xbaddbaddbaddbaddull; | |
112 | |
113 BID_SWAP128 (one); | |
114 #if DECIMAL_CALL_BY_REFERENCE | |
115 bid64qqq_fma (&res, &one, &x, &y | |
116 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
117 _EXC_INFO_ARG); | |
118 #else | |
119 res = bid64qqq_fma (one, x, y | |
120 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
121 _EXC_INFO_ARG); | |
122 #endif | |
123 BID_RETURN (res); | |
124 } | |
125 | |
126 | |
127 #if DECIMAL_CALL_BY_REFERENCE | |
128 void | |
129 bid128dd_add (UINT128 * pres, UINT64 * px, UINT64 * py | |
130 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
131 _EXC_INFO_PARAM) { | |
132 UINT64 x = *px, y = *py; | |
133 #if !DECIMAL_GLOBAL_ROUNDING | |
134 unsigned int rnd_mode = *prnd_mode; | |
135 #endif | |
136 #else | |
137 UINT128 | |
138 bid128dd_add (UINT64 x, UINT64 y | |
139 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
140 _EXC_INFO_PARAM) { | |
141 #endif | |
142 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} | |
143 }; | |
144 UINT128 x1, y1; | |
145 | |
146 #if DECIMAL_CALL_BY_REFERENCE | |
147 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
148 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
149 bid128_add (&res, &x1, &y1 | |
150 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
151 _EXC_INFO_ARG); | |
152 #else | |
153 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
154 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
155 res = bid128_add (x1, y1 | |
156 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
157 _EXC_INFO_ARG); | |
158 #endif | |
159 BID_RETURN (res); | |
160 } | |
161 | |
162 | |
163 #if DECIMAL_CALL_BY_REFERENCE | |
164 void | |
165 bid128dq_add (UINT128 * pres, UINT64 * px, UINT128 * py | |
166 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
167 _EXC_INFO_PARAM) { | |
168 UINT64 x = *px; | |
169 #if !DECIMAL_GLOBAL_ROUNDING | |
170 unsigned int rnd_mode = *prnd_mode; | |
171 #endif | |
172 #else | |
173 UINT128 | |
174 bid128dq_add (UINT64 x, UINT128 y | |
175 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
176 _EXC_INFO_PARAM) { | |
177 #endif | |
178 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} | |
179 }; | |
180 UINT128 x1; | |
181 | |
182 #if DECIMAL_CALL_BY_REFERENCE | |
183 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
184 bid128_add (&res, &x1, py | |
185 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
186 _EXC_INFO_ARG); | |
187 #else | |
188 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
189 res = bid128_add (x1, y | |
190 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
191 _EXC_INFO_ARG); | |
192 #endif | |
193 BID_RETURN (res); | |
194 } | |
195 | |
196 | |
197 #if DECIMAL_CALL_BY_REFERENCE | |
198 void | |
199 bid128qd_add (UINT128 * pres, UINT128 * px, UINT64 * py | |
200 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
201 _EXC_INFO_PARAM) { | |
202 UINT64 y = *py; | |
203 #if !DECIMAL_GLOBAL_ROUNDING | |
204 unsigned int rnd_mode = *prnd_mode; | |
205 #endif | |
206 #else | |
207 UINT128 | |
208 bid128qd_add (UINT128 x, UINT64 y | |
209 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
210 _EXC_INFO_PARAM) { | |
211 #endif | |
212 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} | |
213 }; | |
214 UINT128 y1; | |
215 | |
216 #if DECIMAL_CALL_BY_REFERENCE | |
217 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
218 bid128_add (&res, px, &y1 | |
219 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
220 _EXC_INFO_ARG); | |
221 #else | |
222 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
223 res = bid128_add (x, y1 | |
224 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
225 _EXC_INFO_ARG); | |
226 #endif | |
227 BID_RETURN (res); | |
228 } | |
229 | |
230 | |
231 // bid128_add stands for bid128qq_add | |
232 | |
233 | |
234 /***************************************************************************** | |
235 * BID64/BID128 sub | |
236 ****************************************************************************/ | |
237 | |
238 #if DECIMAL_CALL_BY_REFERENCE | |
239 void | |
240 bid64dq_sub (UINT64 * pres, UINT64 * px, UINT128 * py | |
241 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
242 _EXC_INFO_PARAM) { | |
243 UINT64 x = *px; | |
244 #if !DECIMAL_GLOBAL_ROUNDING | |
245 unsigned int rnd_mode = *prnd_mode; | |
246 #endif | |
247 #else | |
248 UINT64 | |
249 bid64dq_sub (UINT64 x, UINT128 y | |
250 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
251 _EXC_INFO_PARAM) { | |
252 #endif | |
253 UINT64 res = 0xbaddbaddbaddbaddull; | |
254 UINT128 x1; | |
255 | |
256 #if DECIMAL_CALL_BY_REFERENCE | |
257 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
258 bid64qq_sub (&res, &x1, py | |
259 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
260 _EXC_INFO_ARG); | |
261 #else | |
262 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
263 res = bid64qq_sub (x1, y | |
264 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
265 _EXC_INFO_ARG); | |
266 #endif | |
267 BID_RETURN (res); | |
268 } | |
269 | |
270 | |
271 #if DECIMAL_CALL_BY_REFERENCE | |
272 void | |
273 bid64qd_sub (UINT64 * pres, UINT128 * px, UINT64 * py | |
274 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
275 _EXC_INFO_PARAM) { | |
276 UINT64 y = *py; | |
277 #if !DECIMAL_GLOBAL_ROUNDING | |
278 unsigned int rnd_mode = *prnd_mode; | |
279 #endif | |
280 #else | |
281 UINT64 | |
282 bid64qd_sub (UINT128 x, UINT64 y | |
283 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
284 _EXC_INFO_PARAM) { | |
285 #endif | |
286 UINT64 res = 0xbaddbaddbaddbaddull; | |
287 UINT128 y1; | |
288 | |
289 #if DECIMAL_CALL_BY_REFERENCE | |
290 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
291 bid64qq_sub (&res, px, &y1 | |
292 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
293 _EXC_INFO_ARG); | |
294 #else | |
295 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
296 res = bid64qq_sub (x, y1 | |
297 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
298 _EXC_INFO_ARG); | |
299 #endif | |
300 BID_RETURN (res); | |
301 } | |
302 | |
303 | |
304 #if DECIMAL_CALL_BY_REFERENCE | |
305 void | |
306 bid64qq_sub (UINT64 * pres, UINT128 * px, UINT128 * py | |
307 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
308 _EXC_INFO_PARAM) { | |
309 UINT128 x = *px, y = *py; | |
310 #if !DECIMAL_GLOBAL_ROUNDING | |
311 unsigned int rnd_mode = *prnd_mode; | |
312 #endif | |
313 #else | |
314 UINT64 | |
315 bid64qq_sub (UINT128 x, UINT128 y | |
316 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
317 _EXC_INFO_PARAM) { | |
318 #endif | |
319 | |
320 UINT128 one = { {0x0000000000000001ull, 0x3040000000000000ull} | |
321 }; | |
322 UINT64 res = 0xbaddbaddbaddbaddull; | |
323 UINT64 y_sign; | |
324 | |
325 BID_SWAP128 (one); | |
326 if ((y.w[HIGH_128W] & MASK_NAN) != MASK_NAN) { // y is not NAN | |
327 // change its sign | |
328 y_sign = y.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative | |
329 if (y_sign) | |
330 y.w[HIGH_128W] = y.w[HIGH_128W] & 0x7fffffffffffffffull; | |
331 else | |
332 y.w[HIGH_128W] = y.w[HIGH_128W] | 0x8000000000000000ull; | |
333 } | |
334 #if DECIMAL_CALL_BY_REFERENCE | |
335 bid64qqq_fma (&res, &one, &x, &y | |
336 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
337 _EXC_INFO_ARG); | |
338 #else | |
339 res = bid64qqq_fma (one, x, y | |
340 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
341 _EXC_INFO_ARG); | |
342 #endif | |
343 BID_RETURN (res); | |
344 } | |
345 | |
346 | |
347 #if DECIMAL_CALL_BY_REFERENCE | |
348 void | |
349 bid128dd_sub (UINT128 * pres, UINT64 * px, UINT64 * py | |
350 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
351 _EXC_INFO_PARAM) { | |
352 UINT64 x = *px, y = *py; | |
353 #if !DECIMAL_GLOBAL_ROUNDING | |
354 unsigned int rnd_mode = *prnd_mode; | |
355 #endif | |
356 #else | |
357 UINT128 | |
358 bid128dd_sub (UINT64 x, UINT64 y | |
359 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
360 _EXC_INFO_PARAM) { | |
361 #endif | |
362 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} | |
363 }; | |
364 UINT128 x1, y1; | |
365 | |
366 #if DECIMAL_CALL_BY_REFERENCE | |
367 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
368 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
369 bid128_sub (&res, &x1, &y1 | |
370 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
371 _EXC_INFO_ARG); | |
372 #else | |
373 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
374 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
375 res = bid128_sub (x1, y1 | |
376 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
377 _EXC_INFO_ARG); | |
378 #endif | |
379 BID_RETURN (res); | |
380 } | |
381 | |
382 | |
383 #if DECIMAL_CALL_BY_REFERENCE | |
384 void | |
385 bid128dq_sub (UINT128 * pres, UINT64 * px, UINT128 * py | |
386 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
387 _EXC_INFO_PARAM) { | |
388 UINT64 x = *px; | |
389 #if !DECIMAL_GLOBAL_ROUNDING | |
390 unsigned int rnd_mode = *prnd_mode; | |
391 #endif | |
392 #else | |
393 UINT128 | |
394 bid128dq_sub (UINT64 x, UINT128 y | |
395 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
396 _EXC_INFO_PARAM) { | |
397 #endif | |
398 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} | |
399 }; | |
400 UINT128 x1; | |
401 | |
402 #if DECIMAL_CALL_BY_REFERENCE | |
403 bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
404 bid128_sub (&res, &x1, py | |
405 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
406 _EXC_INFO_ARG); | |
407 #else | |
408 x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
409 res = bid128_sub (x1, y | |
410 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
411 _EXC_INFO_ARG); | |
412 #endif | |
413 BID_RETURN (res); | |
414 } | |
415 | |
416 | |
417 #if DECIMAL_CALL_BY_REFERENCE | |
418 void | |
419 bid128qd_sub (UINT128 * pres, UINT128 * px, UINT64 * py | |
420 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
421 _EXC_INFO_PARAM) { | |
422 UINT64 y = *py; | |
423 #if !DECIMAL_GLOBAL_ROUNDING | |
424 unsigned int rnd_mode = *prnd_mode; | |
425 #endif | |
426 #else | |
427 UINT128 | |
428 bid128qd_sub (UINT128 x, UINT64 y | |
429 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
430 _EXC_INFO_PARAM) { | |
431 #endif | |
432 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} | |
433 }; | |
434 UINT128 y1; | |
435 | |
436 #if DECIMAL_CALL_BY_REFERENCE | |
437 bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
438 bid128_sub (&res, px, &y1 | |
439 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
440 _EXC_INFO_ARG); | |
441 #else | |
442 y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG); | |
443 res = bid128_sub (x, y1 | |
444 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
445 _EXC_INFO_ARG); | |
446 #endif | |
447 BID_RETURN (res); | |
448 } | |
449 | |
450 #if DECIMAL_CALL_BY_REFERENCE | |
451 void | |
452 bid128_add (UINT128 * pres, UINT128 * px, UINT128 * py | |
453 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
454 _EXC_INFO_PARAM) { | |
455 UINT128 x = *px, y = *py; | |
456 #if !DECIMAL_GLOBAL_ROUNDING | |
457 unsigned int rnd_mode = *prnd_mode; | |
458 #endif | |
459 #else | |
460 UINT128 | |
461 bid128_add (UINT128 x, UINT128 y | |
462 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
463 _EXC_INFO_PARAM) { | |
464 #endif | |
465 UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull} | |
466 }; | |
467 UINT64 x_sign, y_sign, tmp_sign; | |
468 UINT64 x_exp, y_exp, tmp_exp; // e1 = x_exp, e2 = y_exp | |
469 UINT64 C1_hi, C2_hi, tmp_signif_hi; | |
470 UINT64 C1_lo, C2_lo, tmp_signif_lo; | |
471 // Note: C1.w[1], C1.w[0] represent C1_hi, C1_lo (all UINT64) | |
472 // Note: C2.w[1], C2.w[0] represent C2_hi, C2_lo (all UINT64) | |
473 UINT64 tmp64, tmp64A, tmp64B; | |
474 BID_UI64DOUBLE tmp1, tmp2; | |
475 int x_nr_bits, y_nr_bits; | |
476 int q1, q2, delta, scale, x1, ind, shift, tmp_inexact = 0; | |
477 UINT64 halfulp64; | |
478 UINT128 halfulp128; | |
479 UINT128 C1, C2; | |
480 UINT128 ten2m1; | |
481 UINT128 highf2star; // top 128 bits in f2*; low 128 bits in R256[1], R256[0] | |
482 UINT256 P256, Q256, R256; | |
483 int is_inexact = 0, is_midpoint_lt_even = 0, is_midpoint_gt_even = 0; | |
484 int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0; | |
485 int second_pass = 0; | |
486 | |
487 BID_SWAP128 (x); | |
488 BID_SWAP128 (y); | |
489 x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative | |
490 y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative | |
491 | |
492 // check for NaN or Infinity | |
493 if (((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL) | |
494 || ((y.w[1] & MASK_SPECIAL) == MASK_SPECIAL)) { | |
495 // x is special or y is special | |
496 if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN | |
497 // check first for non-canonical NaN payload | |
498 if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || | |
499 (((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) | |
500 && (x.w[0] > 0x38c15b09ffffffffull))) { | |
501 x.w[1] = x.w[1] & 0xffffc00000000000ull; | |
502 x.w[0] = 0x0ull; | |
503 } | |
504 if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN | |
505 // set invalid flag | |
506 *pfpsf |= INVALID_EXCEPTION; | |
507 // return quiet (x) | |
508 res.w[1] = x.w[1] & 0xfc003fffffffffffull; | |
509 // clear out also G[6]-G[16] | |
510 res.w[0] = x.w[0]; | |
511 } else { // x is QNaN | |
512 // return x | |
513 res.w[1] = x.w[1] & 0xfc003fffffffffffull; | |
514 // clear out G[6]-G[16] | |
515 res.w[0] = x.w[0]; | |
516 // if y = SNaN signal invalid exception | |
517 if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { | |
518 // set invalid flag | |
519 *pfpsf |= INVALID_EXCEPTION; | |
520 } | |
521 } | |
522 BID_SWAP128 (res); | |
523 BID_RETURN (res); | |
524 } else if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN | |
525 // check first for non-canonical NaN payload | |
526 if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) || | |
527 (((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull) | |
528 && (y.w[0] > 0x38c15b09ffffffffull))) { | |
529 y.w[1] = y.w[1] & 0xffffc00000000000ull; | |
530 y.w[0] = 0x0ull; | |
531 } | |
532 if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN | |
533 // set invalid flag | |
534 *pfpsf |= INVALID_EXCEPTION; | |
535 // return quiet (y) | |
536 res.w[1] = y.w[1] & 0xfc003fffffffffffull; | |
537 // clear out also G[6]-G[16] | |
538 res.w[0] = y.w[0]; | |
539 } else { // y is QNaN | |
540 // return y | |
541 res.w[1] = y.w[1] & 0xfc003fffffffffffull; | |
542 // clear out G[6]-G[16] | |
543 res.w[0] = y.w[0]; | |
544 } | |
545 BID_SWAP128 (res); | |
546 BID_RETURN (res); | |
547 } else { // neither x not y is NaN; at least one is infinity | |
548 if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x is infinity | |
549 if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y is infinity | |
550 // if same sign, return either of them | |
551 if ((x.w[1] & MASK_SIGN) == (y.w[1] & MASK_SIGN)) { | |
552 res.w[1] = x_sign | MASK_INF; | |
553 res.w[0] = 0x0ull; | |
554 } else { // x and y are infinities of opposite signs | |
555 // set invalid flag | |
556 *pfpsf |= INVALID_EXCEPTION; | |
557 // return QNaN Indefinite | |
558 res.w[1] = 0x7c00000000000000ull; | |
559 res.w[0] = 0x0000000000000000ull; | |
560 } | |
561 } else { // y is 0 or finite | |
562 // return x | |
563 res.w[1] = x_sign | MASK_INF; | |
564 res.w[0] = 0x0ull; | |
565 } | |
566 } else { // x is not NaN or infinity, so y must be infinity | |
567 res.w[1] = y_sign | MASK_INF; | |
568 res.w[0] = 0x0ull; | |
569 } | |
570 BID_SWAP128 (res); | |
571 BID_RETURN (res); | |
572 } | |
573 } | |
574 // unpack the arguments | |
575 | |
576 // unpack x | |
577 C1_hi = x.w[1] & MASK_COEFF; | |
578 C1_lo = x.w[0]; | |
579 // test for non-canonical values: | |
580 // - values whose encoding begins with x00, x01, or x10 and whose | |
581 // coefficient is larger than 10^34 -1, or | |
582 // - values whose encoding begins with x1100, x1101, x1110 (if NaNs | |
583 // and infinitis were eliminated already this test is reduced to | |
584 // checking for x10x) | |
585 | |
586 // x is not infinity; check for non-canonical values - treated as zero | |
587 if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { | |
588 // G0_G1=11; non-canonical | |
589 x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits | |
590 C1_hi = 0; // significand high | |
591 C1_lo = 0; // significand low | |
592 } else { // G0_G1 != 11 | |
593 x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits | |
594 if (C1_hi > 0x0001ed09bead87c0ull || | |
595 (C1_hi == 0x0001ed09bead87c0ull | |
596 && C1_lo > 0x378d8e63ffffffffull)) { | |
597 // x is non-canonical if coefficient is larger than 10^34 -1 | |
598 C1_hi = 0; | |
599 C1_lo = 0; | |
600 } else { // canonical | |
601 ; | |
602 } | |
603 } | |
604 | |
605 // unpack y | |
606 C2_hi = y.w[1] & MASK_COEFF; | |
607 C2_lo = y.w[0]; | |
608 // y is not infinity; check for non-canonical values - treated as zero | |
609 if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) { | |
610 // G0_G1=11; non-canonical | |
611 y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits | |
612 C2_hi = 0; // significand high | |
613 C2_lo = 0; // significand low | |
614 } else { // G0_G1 != 11 | |
615 y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits | |
616 if (C2_hi > 0x0001ed09bead87c0ull || | |
617 (C2_hi == 0x0001ed09bead87c0ull | |
618 && C2_lo > 0x378d8e63ffffffffull)) { | |
619 // y is non-canonical if coefficient is larger than 10^34 -1 | |
620 C2_hi = 0; | |
621 C2_lo = 0; | |
622 } else { // canonical | |
623 ; | |
624 } | |
625 } | |
626 | |
627 if ((C1_hi == 0x0ull) && (C1_lo == 0x0ull)) { | |
628 // x is 0 and y is not special | |
629 // if y is 0 return 0 with the smaller exponent | |
630 if ((C2_hi == 0x0ull) && (C2_lo == 0x0ull)) { | |
631 if (x_exp < y_exp) | |
632 res.w[1] = x_exp; | |
633 else | |
634 res.w[1] = y_exp; | |
635 if (x_sign && y_sign) | |
636 res.w[1] = res.w[1] | x_sign; // both negative | |
637 else if (rnd_mode == ROUNDING_DOWN && x_sign != y_sign) | |
638 res.w[1] = res.w[1] | 0x8000000000000000ull; // -0 | |
639 // else; // res = +0 | |
640 res.w[0] = 0; | |
641 } else { | |
642 // for 0 + y return y, with the preferred exponent | |
643 if (y_exp <= x_exp) { | |
644 res.w[1] = y.w[1]; | |
645 res.w[0] = y.w[0]; | |
646 } else { // if y_exp > x_exp | |
647 // return (C2 * 10^scale) * 10^(y_exp - scale) | |
648 // where scale = min (P34-q2, y_exp-x_exp) | |
649 // determine q2 = nr. of decimal digits in y | |
650 // determine first the nr. of bits in y (y_nr_bits) | |
651 | |
652 if (C2_hi == 0) { // y_bits is the nr. of bits in C2_lo | |
653 if (C2_lo >= 0x0020000000000000ull) { // y >= 2^53 | |
654 // split the 64-bit value in two 32-bit halves to avoid | |
655 // rounding errors | |
656 if (C2_lo >= 0x0000000100000000ull) { // y >= 2^32 | |
657 tmp2.d = (double) (C2_lo >> 32); // exact conversion | |
658 y_nr_bits = | |
659 32 + | |
660 ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
661 } else { // y < 2^32 | |
662 tmp2.d = (double) (C2_lo); // exact conversion | |
663 y_nr_bits = | |
664 ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
665 } | |
666 } else { // if y < 2^53 | |
667 tmp2.d = (double) C2_lo; // exact conversion | |
668 y_nr_bits = | |
669 ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
670 } | |
671 } else { // C2_hi != 0 => nr. bits = 64 + nr_bits (C2_hi) | |
672 tmp2.d = (double) C2_hi; // exact conversion | |
673 y_nr_bits = | |
674 64 + ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
675 } | |
676 q2 = nr_digits[y_nr_bits].digits; | |
677 if (q2 == 0) { | |
678 q2 = nr_digits[y_nr_bits].digits1; | |
679 if (C2_hi > nr_digits[y_nr_bits].threshold_hi || | |
680 (C2_hi == nr_digits[y_nr_bits].threshold_hi && | |
681 C2_lo >= nr_digits[y_nr_bits].threshold_lo)) | |
682 q2++; | |
683 } | |
684 // return (C2 * 10^scale) * 10^(y_exp - scale) | |
685 // where scale = min (P34-q2, y_exp-x_exp) | |
686 scale = P34 - q2; | |
687 ind = (y_exp - x_exp) >> 49; | |
688 if (ind < scale) | |
689 scale = ind; | |
690 if (scale == 0) { | |
691 res.w[1] = y.w[1]; | |
692 res.w[0] = y.w[0]; | |
693 } else if (q2 <= 19) { // y fits in 64 bits | |
694 if (scale <= 19) { // 10^scale fits in 64 bits | |
695 // 64 x 64 C2_lo * ten2k64[scale] | |
696 __mul_64x64_to_128MACH (res, C2_lo, ten2k64[scale]); | |
697 } else { // 10^scale fits in 128 bits | |
698 // 64 x 128 C2_lo * ten2k128[scale - 20] | |
699 __mul_128x64_to_128 (res, C2_lo, ten2k128[scale - 20]); | |
700 } | |
701 } else { // y fits in 128 bits, but 10^scale must fit in 64 bits | |
702 // 64 x 128 ten2k64[scale] * C2 | |
703 C2.w[1] = C2_hi; | |
704 C2.w[0] = C2_lo; | |
705 __mul_128x64_to_128 (res, ten2k64[scale], C2); | |
706 } | |
707 // subtract scale from the exponent | |
708 y_exp = y_exp - ((UINT64) scale << 49); | |
709 res.w[1] = res.w[1] | y_sign | y_exp; | |
710 } | |
711 } | |
712 BID_SWAP128 (res); | |
713 BID_RETURN (res); | |
714 } else if ((C2_hi == 0x0ull) && (C2_lo == 0x0ull)) { | |
715 // y is 0 and x is not special, and not zero | |
716 // for x + 0 return x, with the preferred exponent | |
717 if (x_exp <= y_exp) { | |
718 res.w[1] = x.w[1]; | |
719 res.w[0] = x.w[0]; | |
720 } else { // if x_exp > y_exp | |
721 // return (C1 * 10^scale) * 10^(x_exp - scale) | |
722 // where scale = min (P34-q1, x_exp-y_exp) | |
723 // determine q1 = nr. of decimal digits in x | |
724 // determine first the nr. of bits in x | |
725 if (C1_hi == 0) { // x_bits is the nr. of bits in C1_lo | |
726 if (C1_lo >= 0x0020000000000000ull) { // x >= 2^53 | |
727 // split the 64-bit value in two 32-bit halves to avoid | |
728 // rounding errors | |
729 if (C1_lo >= 0x0000000100000000ull) { // x >= 2^32 | |
730 tmp1.d = (double) (C1_lo >> 32); // exact conversion | |
731 x_nr_bits = | |
732 32 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - | |
733 0x3ff); | |
734 } else { // x < 2^32 | |
735 tmp1.d = (double) (C1_lo); // exact conversion | |
736 x_nr_bits = | |
737 ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
738 } | |
739 } else { // if x < 2^53 | |
740 tmp1.d = (double) C1_lo; // exact conversion | |
741 x_nr_bits = | |
742 ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
743 } | |
744 } else { // C1_hi != 0 => nr. bits = 64 + nr_bits (C1_hi) | |
745 tmp1.d = (double) C1_hi; // exact conversion | |
746 x_nr_bits = | |
747 64 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
748 } | |
749 q1 = nr_digits[x_nr_bits].digits; | |
750 if (q1 == 0) { | |
751 q1 = nr_digits[x_nr_bits].digits1; | |
752 if (C1_hi > nr_digits[x_nr_bits].threshold_hi || | |
753 (C1_hi == nr_digits[x_nr_bits].threshold_hi && | |
754 C1_lo >= nr_digits[x_nr_bits].threshold_lo)) | |
755 q1++; | |
756 } | |
757 // return (C1 * 10^scale) * 10^(x_exp - scale) | |
758 // where scale = min (P34-q1, x_exp-y_exp) | |
759 scale = P34 - q1; | |
760 ind = (x_exp - y_exp) >> 49; | |
761 if (ind < scale) | |
762 scale = ind; | |
763 if (scale == 0) { | |
764 res.w[1] = x.w[1]; | |
765 res.w[0] = x.w[0]; | |
766 } else if (q1 <= 19) { // x fits in 64 bits | |
767 if (scale <= 19) { // 10^scale fits in 64 bits | |
768 // 64 x 64 C1_lo * ten2k64[scale] | |
769 __mul_64x64_to_128MACH (res, C1_lo, ten2k64[scale]); | |
770 } else { // 10^scale fits in 128 bits | |
771 // 64 x 128 C1_lo * ten2k128[scale - 20] | |
772 __mul_128x64_to_128 (res, C1_lo, ten2k128[scale - 20]); | |
773 } | |
774 } else { // x fits in 128 bits, but 10^scale must fit in 64 bits | |
775 // 64 x 128 ten2k64[scale] * C1 | |
776 C1.w[1] = C1_hi; | |
777 C1.w[0] = C1_lo; | |
778 __mul_128x64_to_128 (res, ten2k64[scale], C1); | |
779 } | |
780 // subtract scale from the exponent | |
781 x_exp = x_exp - ((UINT64) scale << 49); | |
782 res.w[1] = res.w[1] | x_sign | x_exp; | |
783 } | |
784 BID_SWAP128 (res); | |
785 BID_RETURN (res); | |
786 } else { // x and y are not canonical, not special, and are not zero | |
787 // note that the result may still be zero, and then it has to have the | |
788 // preferred exponent | |
789 if (x_exp < y_exp) { // if exp_x < exp_y then swap x and y | |
790 tmp_sign = x_sign; | |
791 tmp_exp = x_exp; | |
792 tmp_signif_hi = C1_hi; | |
793 tmp_signif_lo = C1_lo; | |
794 x_sign = y_sign; | |
795 x_exp = y_exp; | |
796 C1_hi = C2_hi; | |
797 C1_lo = C2_lo; | |
798 y_sign = tmp_sign; | |
799 y_exp = tmp_exp; | |
800 C2_hi = tmp_signif_hi; | |
801 C2_lo = tmp_signif_lo; | |
802 } | |
803 // q1 = nr. of decimal digits in x | |
804 // determine first the nr. of bits in x | |
805 if (C1_hi == 0) { // x_bits is the nr. of bits in C1_lo | |
806 if (C1_lo >= 0x0020000000000000ull) { // x >= 2^53 | |
807 //split the 64-bit value in two 32-bit halves to avoid rounding errors | |
808 if (C1_lo >= 0x0000000100000000ull) { // x >= 2^32 | |
809 tmp1.d = (double) (C1_lo >> 32); // exact conversion | |
810 x_nr_bits = | |
811 32 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
812 } else { // x < 2^32 | |
813 tmp1.d = (double) (C1_lo); // exact conversion | |
814 x_nr_bits = | |
815 ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
816 } | |
817 } else { // if x < 2^53 | |
818 tmp1.d = (double) C1_lo; // exact conversion | |
819 x_nr_bits = | |
820 ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
821 } | |
822 } else { // C1_hi != 0 => nr. bits = 64 + nr_bits (C1_hi) | |
823 tmp1.d = (double) C1_hi; // exact conversion | |
824 x_nr_bits = | |
825 64 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
826 } | |
827 | |
828 q1 = nr_digits[x_nr_bits].digits; | |
829 if (q1 == 0) { | |
830 q1 = nr_digits[x_nr_bits].digits1; | |
831 if (C1_hi > nr_digits[x_nr_bits].threshold_hi || | |
832 (C1_hi == nr_digits[x_nr_bits].threshold_hi && | |
833 C1_lo >= nr_digits[x_nr_bits].threshold_lo)) | |
834 q1++; | |
835 } | |
836 // q2 = nr. of decimal digits in y | |
837 // determine first the nr. of bits in y (y_nr_bits) | |
838 if (C2_hi == 0) { // y_bits is the nr. of bits in C2_lo | |
839 if (C2_lo >= 0x0020000000000000ull) { // y >= 2^53 | |
840 //split the 64-bit value in two 32-bit halves to avoid rounding errors | |
841 if (C2_lo >= 0x0000000100000000ull) { // y >= 2^32 | |
842 tmp2.d = (double) (C2_lo >> 32); // exact conversion | |
843 y_nr_bits = | |
844 32 + ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
845 } else { // y < 2^32 | |
846 tmp2.d = (double) (C2_lo); // exact conversion | |
847 y_nr_bits = | |
848 ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
849 } | |
850 } else { // if y < 2^53 | |
851 tmp2.d = (double) C2_lo; // exact conversion | |
852 y_nr_bits = | |
853 ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
854 } | |
855 } else { // C2_hi != 0 => nr. bits = 64 + nr_bits (C2_hi) | |
856 tmp2.d = (double) C2_hi; // exact conversion | |
857 y_nr_bits = | |
858 64 + ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff); | |
859 } | |
860 | |
861 q2 = nr_digits[y_nr_bits].digits; | |
862 if (q2 == 0) { | |
863 q2 = nr_digits[y_nr_bits].digits1; | |
864 if (C2_hi > nr_digits[y_nr_bits].threshold_hi || | |
865 (C2_hi == nr_digits[y_nr_bits].threshold_hi && | |
866 C2_lo >= nr_digits[y_nr_bits].threshold_lo)) | |
867 q2++; | |
868 } | |
869 | |
870 delta = q1 + (int) (x_exp >> 49) - q2 - (int) (y_exp >> 49); | |
871 | |
872 if (delta >= P34) { | |
873 // round the result directly because 0 < C2 < ulp (C1 * 10^(x_exp-e2)) | |
874 // n = C1 * 10^e1 or n = C1 +/- 10^(q1-P34)) * 10^e1 | |
875 // the result is inexact; the preferred exponent is the least possible | |
876 | |
877 if (delta >= P34 + 1) { | |
878 // for RN the result is the operand with the larger magnitude, | |
879 // possibly scaled up by 10^(P34-q1) | |
880 // an overflow cannot occur in this case (rounding to nearest) | |
881 if (q1 < P34) { // scale C1 up by 10^(P34-q1) | |
882 // Note: because delta >= P34+1 it is certain that | |
883 // x_exp - ((UINT64)scale << 49) will stay above e_min | |
884 scale = P34 - q1; | |
885 if (q1 <= 19) { // C1 fits in 64 bits | |
886 // 1 <= q1 <= 19 => 15 <= scale <= 33 | |
887 if (scale <= 19) { // 10^scale fits in 64 bits | |
888 __mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo); | |
889 } else { // if 20 <= scale <= 33 | |
890 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where | |
891 // (C1 * 10^(scale-19)) fits in 64 bits | |
892 C1_lo = C1_lo * ten2k64[scale - 19]; | |
893 __mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo); | |
894 } | |
895 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits | |
896 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits | |
897 C1.w[1] = C1_hi; | |
898 C1.w[0] = C1_lo; | |
899 // C1 = ten2k64[P34 - q1] * C1 | |
900 __mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1); | |
901 } | |
902 x_exp = x_exp - ((UINT64) scale << 49); | |
903 C1_hi = C1.w[1]; | |
904 C1_lo = C1.w[0]; | |
905 } | |
906 // some special cases arise: if delta = P34 + 1 and C1 = 10^(P34-1) | |
907 // (after scaling) and x_sign != y_sign and C2 > 5*10^(q2-1) => | |
908 // subtract 1 ulp | |
909 // Note: do this only for rounding to nearest; for other rounding | |
910 // modes the correction will be applied next | |
911 if ((rnd_mode == ROUNDING_TO_NEAREST | |
912 || rnd_mode == ROUNDING_TIES_AWAY) && delta == (P34 + 1) | |
913 && C1_hi == 0x0000314dc6448d93ull | |
914 && C1_lo == 0x38c15b0a00000000ull && x_sign != y_sign | |
915 && ((q2 <= 19 && C2_lo > midpoint64[q2 - 1]) || (q2 >= 20 | |
916 && (C2_hi > | |
917 midpoint128 | |
918 [q2 - | |
919 20]. | |
920 w[1] | |
921 || | |
922 (C2_hi | |
923 == | |
924 midpoint128 | |
925 [q2 - | |
926 20]. | |
927 w[1] | |
928 && | |
929 C2_lo | |
930 > | |
931 midpoint128 | |
932 [q2 - | |
933 20]. | |
934 w | |
935 [0]))))) | |
936 { | |
937 // C1 = 10^34 - 1 and decrement x_exp by 1 (no underflow possible) | |
938 C1_hi = 0x0001ed09bead87c0ull; | |
939 C1_lo = 0x378d8e63ffffffffull; | |
940 x_exp = x_exp - EXP_P1; | |
941 } | |
942 if (rnd_mode != ROUNDING_TO_NEAREST) { | |
943 if ((rnd_mode == ROUNDING_DOWN && x_sign && y_sign) || | |
944 (rnd_mode == ROUNDING_UP && !x_sign && !y_sign)) { | |
945 // add 1 ulp and then check for overflow | |
946 C1_lo = C1_lo + 1; | |
947 if (C1_lo == 0) { // rounding overflow in the low 64 bits | |
948 C1_hi = C1_hi + 1; | |
949 } | |
950 if (C1_hi == 0x0001ed09bead87c0ull | |
951 && C1_lo == 0x378d8e6400000000ull) { | |
952 // C1 = 10^34 => rounding overflow | |
953 C1_hi = 0x0000314dc6448d93ull; | |
954 C1_lo = 0x38c15b0a00000000ull; // 10^33 | |
955 x_exp = x_exp + EXP_P1; | |
956 if (x_exp == EXP_MAX_P1) { // overflow | |
957 C1_hi = 0x7800000000000000ull; // +inf | |
958 C1_lo = 0x0ull; | |
959 x_exp = 0; // x_sign is preserved | |
960 // set overflow flag (the inexact flag was set too) | |
961 *pfpsf |= OVERFLOW_EXCEPTION; | |
962 } | |
963 } | |
964 } else if ((rnd_mode == ROUNDING_DOWN && !x_sign && y_sign) || | |
965 (rnd_mode == ROUNDING_UP && x_sign && !y_sign) || | |
966 (rnd_mode == ROUNDING_TO_ZERO | |
967 && x_sign != y_sign)) { | |
968 // subtract 1 ulp from C1 | |
969 // Note: because delta >= P34 + 1 the result cannot be zero | |
970 C1_lo = C1_lo - 1; | |
971 if (C1_lo == 0xffffffffffffffffull) | |
972 C1_hi = C1_hi - 1; | |
973 // if the coefficient is 10^33 - 1 then make it 10^34 - 1 and | |
974 // decrease the exponent by 1 (because delta >= P34 + 1 the | |
975 // exponent will not become less than e_min) | |
976 // 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff | |
977 // 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff | |
978 if (C1_hi == 0x0000314dc6448d93ull | |
979 && C1_lo == 0x38c15b09ffffffffull) { | |
980 // make C1 = 10^34 - 1 | |
981 C1_hi = 0x0001ed09bead87c0ull; | |
982 C1_lo = 0x378d8e63ffffffffull; | |
983 x_exp = x_exp - EXP_P1; | |
984 } | |
985 } else { | |
986 ; // the result is already correct | |
987 } | |
988 } | |
989 // set the inexact flag | |
990 *pfpsf |= INEXACT_EXCEPTION; | |
991 // assemble the result | |
992 res.w[1] = x_sign | x_exp | C1_hi; | |
993 res.w[0] = C1_lo; | |
994 } else { // delta = P34 | |
995 // in most cases, the smaller operand may be < or = or > 1/2 ulp of the | |
996 // larger operand | |
997 // however, the case C1 = 10^(q1-1) and x_sign != y_sign is special due | |
998 // to accuracy loss after subtraction, and will be treated separately | |
999 if (x_sign == y_sign || (q1 <= 20 | |
1000 && (C1_hi != 0 | |
1001 || C1_lo != ten2k64[q1 - 1])) | |
1002 || (q1 >= 21 && (C1_hi != ten2k128[q1 - 21].w[1] | |
1003 || C1_lo != ten2k128[q1 - 21].w[0]))) { | |
1004 // if x_sign == y_sign or C1 != 10^(q1-1) | |
1005 // compare C2 with 1/2 ulp = 5 * 10^(q2-1), the latter read from table | |
1006 // Note: cases q1<=19 and q1>=20 can be coalesced at some latency cost | |
1007 if (q2 <= 19) { // C2 and 5*10^(q2-1) both fit in 64 bits | |
1008 halfulp64 = midpoint64[q2 - 1]; // 5 * 10^(q2-1) | |
1009 if (C2_lo < halfulp64) { // n2 < 1/2 ulp (n1) | |
1010 // for RN the result is the operand with the larger magnitude, | |
1011 // possibly scaled up by 10^(P34-q1) | |
1012 // an overflow cannot occur in this case (rounding to nearest) | |
1013 if (q1 < P34) { // scale C1 up by 10^(P34-q1) | |
1014 // Note: because delta = P34 it is certain that | |
1015 // x_exp - ((UINT64)scale << 49) will stay above e_min | |
1016 scale = P34 - q1; | |
1017 if (q1 <= 19) { // C1 fits in 64 bits | |
1018 // 1 <= q1 <= 19 => 15 <= scale <= 33 | |
1019 if (scale <= 19) { // 10^scale fits in 64 bits | |
1020 __mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo); | |
1021 } else { // if 20 <= scale <= 33 | |
1022 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where | |
1023 // (C1 * 10^(scale-19)) fits in 64 bits | |
1024 C1_lo = C1_lo * ten2k64[scale - 19]; | |
1025 __mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo); | |
1026 } | |
1027 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits | |
1028 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits | |
1029 C1.w[1] = C1_hi; | |
1030 C1.w[0] = C1_lo; | |
1031 // C1 = ten2k64[P34 - q1] * C1 | |
1032 __mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1); | |
1033 } | |
1034 x_exp = x_exp - ((UINT64) scale << 49); | |
1035 C1_hi = C1.w[1]; | |
1036 C1_lo = C1.w[0]; | |
1037 } | |
1038 if (rnd_mode != ROUNDING_TO_NEAREST) { | |
1039 if ((rnd_mode == ROUNDING_DOWN && x_sign && y_sign) || | |
1040 (rnd_mode == ROUNDING_UP && !x_sign && !y_sign)) { | |
1041 // add 1 ulp and then check for overflow | |
1042 C1_lo = C1_lo + 1; | |
1043 if (C1_lo == 0) { // rounding overflow in the low 64 bits | |
1044 C1_hi = C1_hi + 1; | |
1045 } | |
1046 if (C1_hi == 0x0001ed09bead87c0ull | |
1047 && C1_lo == 0x378d8e6400000000ull) { | |
1048 // C1 = 10^34 => rounding overflow | |
1049 C1_hi = 0x0000314dc6448d93ull; | |
1050 C1_lo = 0x38c15b0a00000000ull; // 10^33 | |
1051 x_exp = x_exp + EXP_P1; | |
1052 if (x_exp == EXP_MAX_P1) { // overflow | |
1053 C1_hi = 0x7800000000000000ull; // +inf | |
1054 C1_lo = 0x0ull; | |
1055 x_exp = 0; // x_sign is preserved | |
1056 // set overflow flag (the inexact flag was set too) | |
1057 *pfpsf |= OVERFLOW_EXCEPTION; | |
1058 } | |
1059 } | |
1060 } else | |
1061 if ((rnd_mode == ROUNDING_DOWN && !x_sign && y_sign) | |
1062 || (rnd_mode == ROUNDING_UP && x_sign && !y_sign) | |
1063 || (rnd_mode == ROUNDING_TO_ZERO | |
1064 && x_sign != y_sign)) { | |
1065 // subtract 1 ulp from C1 | |
1066 // Note: because delta >= P34 + 1 the result cannot be zero | |
1067 C1_lo = C1_lo - 1; | |
1068 if (C1_lo == 0xffffffffffffffffull) | |
1069 C1_hi = C1_hi - 1; | |
1070 // if the coefficient is 10^33-1 then make it 10^34-1 and | |
1071 // decrease the exponent by 1 (because delta >= P34 + 1 the | |
1072 // exponent will not become less than e_min) | |
1073 // 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff | |
1074 // 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff | |
1075 if (C1_hi == 0x0000314dc6448d93ull | |
1076 && C1_lo == 0x38c15b09ffffffffull) { | |
1077 // make C1 = 10^34 - 1 | |
1078 C1_hi = 0x0001ed09bead87c0ull; | |
1079 C1_lo = 0x378d8e63ffffffffull; | |
1080 x_exp = x_exp - EXP_P1; | |
1081 } | |
1082 } else { | |
1083 ; // the result is already correct | |
1084 } | |
1085 } | |
1086 // set the inexact flag | |
1087 *pfpsf |= INEXACT_EXCEPTION; | |
1088 // assemble the result | |
1089 res.w[1] = x_sign | x_exp | C1_hi; | |
1090 res.w[0] = C1_lo; | |
1091 } else if ((C2_lo == halfulp64) | |
1092 && (q1 < P34 || ((C1_lo & 0x1) == 0))) { | |
1093 // n2 = 1/2 ulp (n1) and C1 is even | |
1094 // the result is the operand with the larger magnitude, | |
1095 // possibly scaled up by 10^(P34-q1) | |
1096 // an overflow cannot occur in this case (rounding to nearest) | |
1097 if (q1 < P34) { // scale C1 up by 10^(P34-q1) | |
1098 // Note: because delta = P34 it is certain that | |
1099 // x_exp - ((UINT64)scale << 49) will stay above e_min | |
1100 scale = P34 - q1; | |
1101 if (q1 <= 19) { // C1 fits in 64 bits | |
1102 // 1 <= q1 <= 19 => 15 <= scale <= 33 | |
1103 if (scale <= 19) { // 10^scale fits in 64 bits | |
1104 __mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo); | |
1105 } else { // if 20 <= scale <= 33 | |
1106 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where | |
1107 // (C1 * 10^(scale-19)) fits in 64 bits | |
1108 C1_lo = C1_lo * ten2k64[scale - 19]; | |
1109 __mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo); | |
1110 } | |
1111 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits | |
1112 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits | |
1113 C1.w[1] = C1_hi; | |
1114 C1.w[0] = C1_lo; | |
1115 // C1 = ten2k64[P34 - q1] * C1 | |
1116 __mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1); | |
1117 } | |
1118 x_exp = x_exp - ((UINT64) scale << 49); | |
1119 C1_hi = C1.w[1]; | |
1120 C1_lo = C1.w[0]; | |
1121 } | |
1122 if ((rnd_mode == ROUNDING_TO_NEAREST && x_sign == y_sign | |
1123 && (C1_lo & 0x01)) || (rnd_mode == ROUNDING_TIES_AWAY | |
1124 && x_sign == y_sign) | |
1125 || (rnd_mode == ROUNDING_UP && !x_sign && !y_sign) | |
1126 || (rnd_mode == ROUNDING_DOWN && x_sign && y_sign)) { | |
1127 // add 1 ulp and then check for overflow | |
1128 C1_lo = C1_lo + 1; | |
1129 if (C1_lo == 0) { // rounding overflow in the low 64 bits | |
1130 C1_hi = C1_hi + 1; | |
1131 } | |
1132 if (C1_hi == 0x0001ed09bead87c0ull | |
1133 && C1_lo == 0x378d8e6400000000ull) { | |
1134 // C1 = 10^34 => rounding overflow | |
1135 C1_hi = 0x0000314dc6448d93ull; | |
1136 C1_lo = 0x38c15b0a00000000ull; // 10^33 | |
1137 x_exp = x_exp + EXP_P1; | |
1138 if (x_exp == EXP_MAX_P1) { // overflow | |
1139 C1_hi = 0x7800000000000000ull; // +inf | |
1140 C1_lo = 0x0ull; | |
1141 x_exp = 0; // x_sign is preserved | |
1142 // set overflow flag (the inexact flag was set too) | |
1143 *pfpsf |= OVERFLOW_EXCEPTION; | |
1144 } | |
1145 } | |
1146 } else | |
1147 if ((rnd_mode == ROUNDING_TO_NEAREST && x_sign != y_sign | |
1148 && (C1_lo & 0x01)) || (rnd_mode == ROUNDING_DOWN | |
1149 && !x_sign && y_sign) | |
1150 || (rnd_mode == ROUNDING_UP && x_sign && !y_sign) | |
1151 || (rnd_mode == ROUNDING_TO_ZERO | |
1152 && x_sign != y_sign)) { | |
1153 // subtract 1 ulp from C1 | |
1154 // Note: because delta >= P34 + 1 the result cannot be zero | |
1155 C1_lo = C1_lo - 1; | |
1156 if (C1_lo == 0xffffffffffffffffull) | |
1157 C1_hi = C1_hi - 1; | |
1158 // if the coefficient is 10^33 - 1 then make it 10^34 - 1 | |
1159 // and decrease the exponent by 1 (because delta >= P34 + 1 | |
1160 // the exponent will not become less than e_min) | |
1161 // 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff | |
1162 // 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff | |
1163 if (C1_hi == 0x0000314dc6448d93ull | |
1164 && C1_lo == 0x38c15b09ffffffffull) { | |
1165 // make C1 = 10^34 - 1 | |
1166 C1_hi = 0x0001ed09bead87c0ull; | |
1167 C1_lo = 0x378d8e63ffffffffull; | |
1168 x_exp = x_exp - EXP_P1; | |
1169 } | |
1170 } else { | |
1171 ; // the result is already correct | |
1172 } | |
1173 // set the inexact flag | |
1174 *pfpsf |= INEXACT_EXCEPTION; | |
1175 // assemble the result | |
1176 res.w[1] = x_sign | x_exp | C1_hi; | |
1177 res.w[0] = C1_lo; | |
1178 } else { // if C2_lo > halfulp64 || | |
1179 // (C2_lo == halfulp64 && q1 == P34 && ((C1_lo & 0x1) == 1)), i.e. | |
1180 // 1/2 ulp(n1) < n2 < 1 ulp(n1) or n2 = 1/2 ulp(n1) and C1 odd | |
1181 // res = x+1 ulp if n1*n2 > 0 and res = x-1 ulp if n1*n2 < 0 | |
1182 if (q1 < P34) { // then 1 ulp = 10^(e1+q1-P34) < 10^e1 | |
1183 // Note: if (q1 == P34) then 1 ulp = 10^(e1+q1-P34) = 10^e1 | |
1184 // because q1 < P34 we must first replace C1 by | |
1185 // C1 * 10^(P34-q1), and must decrease the exponent by | |
1186 // (P34-q1) (it will still be at least e_min) | |
1187 scale = P34 - q1; | |
1188 if (q1 <= 19) { // C1 fits in 64 bits | |
1189 // 1 <= q1 <= 19 => 15 <= scale <= 33 | |
1190 if (scale <= 19) { // 10^scale fits in 64 bits | |
1191 __mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo); | |
1192 } else { // if 20 <= scale <= 33 | |
1193 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where | |
1194 // (C1 * 10^(scale-19)) fits in 64 bits | |
1195 C1_lo = C1_lo * ten2k64[scale - 19]; | |
1196 __mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo); | |
1197 } | |
1198 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits | |
1199 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits | |
1200 C1.w[1] = C1_hi; | |
1201 C1.w[0] = C1_lo; | |
1202 // C1 = ten2k64[P34 - q1] * C1 | |
1203 __mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1); | |
1204 } | |
1205 x_exp = x_exp - ((UINT64) scale << 49); | |
1206 C1_hi = C1.w[1]; | |
1207 C1_lo = C1.w[0]; | |
1208 // check for rounding overflow | |
1209 if (C1_hi == 0x0001ed09bead87c0ull | |
1210 && C1_lo == 0x378d8e6400000000ull) { | |
1211 // C1 = 10^34 => rounding overflow | |
1212 C1_hi = 0x0000314dc6448d93ull; | |
1213 C1_lo = 0x38c15b0a00000000ull; // 10^33 | |
1214 x_exp = x_exp + EXP_P1; | |
1215 } | |
1216 } | |
1217 if ((rnd_mode == ROUNDING_TO_NEAREST && x_sign != y_sign) | |
1218 || (rnd_mode == ROUNDING_TIES_AWAY && x_sign != y_sign | |
1219 && C2_lo != halfulp64) | |
1220 || (rnd_mode == ROUNDING_DOWN && !x_sign && y_sign) | |
1221 || (rnd_mode == ROUNDING_UP && x_sign && !y_sign) | |
1222 || (rnd_mode == ROUNDING_TO_ZERO | |
1223 && x_sign != y_sign)) { | |
1224 // the result is x - 1 | |
1225 // for RN n1 * n2 < 0; underflow not possible | |
1226 C1_lo = C1_lo - 1; | |
1227 if (C1_lo == 0xffffffffffffffffull) | |
1228 C1_hi--; | |
1229 // check if we crossed into the lower decade | |
1230 if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1 | |
1231 C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1 | |
1232 C1_lo = 0x378d8e63ffffffffull; | |
1233 x_exp = x_exp - EXP_P1; // no underflow, because n1 >> n2 | |
1234 } | |
1235 } else | |
1236 if ((rnd_mode == ROUNDING_TO_NEAREST | |
1237 && x_sign == y_sign) | |
1238 || (rnd_mode == ROUNDING_TIES_AWAY | |
1239 && x_sign == y_sign) | |
1240 || (rnd_mode == ROUNDING_DOWN && x_sign && y_sign) | |
1241 || (rnd_mode == ROUNDING_UP && !x_sign | |
1242 && !y_sign)) { | |
1243 // the result is x + 1 | |
1244 // for RN x_sign = y_sign, i.e. n1*n2 > 0 | |
1245 C1_lo = C1_lo + 1; | |
1246 if (C1_lo == 0) { // rounding overflow in the low 64 bits | |
1247 C1_hi = C1_hi + 1; | |
1248 } | |
1249 if (C1_hi == 0x0001ed09bead87c0ull | |
1250 && C1_lo == 0x378d8e6400000000ull) { | |
1251 // C1 = 10^34 => rounding overflow | |
1252 C1_hi = 0x0000314dc6448d93ull; | |
1253 C1_lo = 0x38c15b0a00000000ull; // 10^33 | |
1254 x_exp = x_exp + EXP_P1; | |
1255 if (x_exp == EXP_MAX_P1) { // overflow | |
1256 C1_hi = 0x7800000000000000ull; // +inf | |
1257 C1_lo = 0x0ull; | |
1258 x_exp = 0; // x_sign is preserved | |
1259 // set the overflow flag | |
1260 *pfpsf |= OVERFLOW_EXCEPTION; | |
1261 } | |
1262 } | |
1263 } else { | |
1264 ; // the result is x | |
1265 } | |
1266 // set the inexact flag | |
1267 *pfpsf |= INEXACT_EXCEPTION; | |
1268 // assemble the result | |
1269 res.w[1] = x_sign | x_exp | C1_hi; | |
1270 res.w[0] = C1_lo; | |
1271 } | |
1272 } else { // if q2 >= 20 then 5*10^(q2-1) and C2 (the latter in | |
1273 // most cases) fit only in more than 64 bits | |
1274 halfulp128 = midpoint128[q2 - 20]; // 5 * 10^(q2-1) | |
1275 if ((C2_hi < halfulp128.w[1]) | |
1276 || (C2_hi == halfulp128.w[1] | |
1277 && C2_lo < halfulp128.w[0])) { | |
1278 // n2 < 1/2 ulp (n1) | |
1279 // the result is the operand with the larger magnitude, | |
1280 // possibly scaled up by 10^(P34-q1) | |
1281 // an overflow cannot occur in this case (rounding to nearest) | |
1282 if (q1 < P34) { // scale C1 up by 10^(P34-q1) | |
1283 // Note: because delta = P34 it is certain that | |
1284 // x_exp - ((UINT64)scale << 49) will stay above e_min | |
1285 scale = P34 - q1; | |
1286 if (q1 <= 19) { // C1 fits in 64 bits | |
1287 // 1 <= q1 <= 19 => 15 <= scale <= 33 | |
1288 if (scale <= 19) { // 10^scale fits in 64 bits | |
1289 __mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo); | |
1290 } else { // if 20 <= scale <= 33 | |
1291 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where | |
1292 // (C1 * 10^(scale-19)) fits in 64 bits | |
1293 C1_lo = C1_lo * ten2k64[scale - 19]; | |
1294 __mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo); | |
1295 } | |
1296 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits | |
1297 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits | |
1298 C1.w[1] = C1_hi; | |
1299 C1.w[0] = C1_lo; | |
1300 // C1 = ten2k64[P34 - q1] * C1 | |
1301 __mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1); | |
1302 } | |
1303 C1_hi = C1.w[1]; | |
1304 C1_lo = C1.w[0]; | |
1305 x_exp = x_exp - ((UINT64) scale << 49); | |
1306 } | |
1307 if (rnd_mode != ROUNDING_TO_NEAREST) { | |
1308 if ((rnd_mode == ROUNDING_DOWN && x_sign && y_sign) || | |
1309 (rnd_mode == ROUNDING_UP && !x_sign && !y_sign)) { | |
1310 // add 1 ulp and then check for overflow | |
1311 C1_lo = C1_lo + 1; | |
1312 if (C1_lo == 0) { // rounding overflow in the low 64 bits | |
1313 C1_hi = C1_hi + 1; | |
1314 } | |
1315 if (C1_hi == 0x0001ed09bead87c0ull | |
1316 && C1_lo == 0x378d8e6400000000ull) { | |
1317 // C1 = 10^34 => rounding overflow | |
1318 C1_hi = 0x0000314dc6448d93ull; | |
1319 C1_lo = 0x38c15b0a00000000ull; // 10^33 | |
1320 x_exp = x_exp + EXP_P1; | |
1321 if (x_exp == EXP_MAX_P1) { // overflow | |
1322 C1_hi = 0x7800000000000000ull; // +inf | |
1323 C1_lo = 0x0ull; | |
1324 x_exp = 0; // x_sign is preserved | |
1325 // set overflow flag (the inexact flag was set too) | |
1326 *pfpsf |= OVERFLOW_EXCEPTION; | |
1327 } | |
1328 } | |
1329 } else | |
1330 if ((rnd_mode == ROUNDING_DOWN && !x_sign && y_sign) | |
1331 || (rnd_mode == ROUNDING_UP && x_sign && !y_sign) | |
1332 || (rnd_mode == ROUNDING_TO_ZERO | |
1333 && x_sign != y_sign)) { | |
1334 // subtract 1 ulp from C1 | |
1335 // Note: because delta >= P34 + 1 the result cannot be zero | |
1336 C1_lo = C1_lo - 1; | |
1337 if (C1_lo == 0xffffffffffffffffull) | |
1338 C1_hi = C1_hi - 1; | |
1339 // if the coefficient is 10^33-1 then make it 10^34-1 and | |
1340 // decrease the exponent by 1 (because delta >= P34 + 1 the | |
1341 // exponent will not become less than e_min) | |
1342 // 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff | |
1343 // 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff | |
1344 if (C1_hi == 0x0000314dc6448d93ull | |
1345 && C1_lo == 0x38c15b09ffffffffull) { | |
1346 // make C1 = 10^34 - 1 | |
1347 C1_hi = 0x0001ed09bead87c0ull; | |
1348 C1_lo = 0x378d8e63ffffffffull; | |
1349 x_exp = x_exp - EXP_P1; | |
1350 } | |
1351 } else { | |
1352 ; // the result is already correct | |
1353 } | |
1354 } | |
1355 // set the inexact flag | |
1356 *pfpsf |= INEXACT_EXCEPTION; | |
1357 // assemble the result | |
1358 res.w[1] = x_sign | x_exp | C1_hi; | |
1359 res.w[0] = C1_lo; | |
1360 } else if ((C2_hi == halfulp128.w[1] | |
1361 && C2_lo == halfulp128.w[0]) | |
1362 && (q1 < P34 || ((C1_lo & 0x1) == 0))) { | |
1363 // midpoint & lsb in C1 is 0 | |
1364 // n2 = 1/2 ulp (n1) and C1 is even | |
1365 // the result is the operand with the larger magnitude, | |
1366 // possibly scaled up by 10^(P34-q1) | |
1367 // an overflow cannot occur in this case (rounding to nearest) | |
1368 if (q1 < P34) { // scale C1 up by 10^(P34-q1) | |
1369 // Note: because delta = P34 it is certain that | |
1370 // x_exp - ((UINT64)scale << 49) will stay above e_min | |
1371 scale = P34 - q1; | |
1372 if (q1 <= 19) { // C1 fits in 64 bits | |
1373 // 1 <= q1 <= 19 => 15 <= scale <= 33 | |
1374 if (scale <= 19) { // 10^scale fits in 64 bits | |
1375 __mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo); | |
1376 } else { // if 20 <= scale <= 33 | |
1377 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where | |
1378 // (C1 * 10^(scale-19)) fits in 64 bits | |
1379 C1_lo = C1_lo * ten2k64[scale - 19]; | |
1380 __mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo); | |
1381 } | |
1382 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits | |
1383 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits | |
1384 C1.w[1] = C1_hi; | |
1385 C1.w[0] = C1_lo; | |
1386 // C1 = ten2k64[P34 - q1] * C1 | |
1387 __mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1); | |
1388 } | |
1389 x_exp = x_exp - ((UINT64) scale << 49); | |
1390 C1_hi = C1.w[1]; | |
1391 C1_lo = C1.w[0]; | |
1392 } | |
1393 if (rnd_mode != ROUNDING_TO_NEAREST) { | |
1394 if ((rnd_mode == ROUNDING_TIES_AWAY && x_sign == y_sign) | |
1395 || (rnd_mode == ROUNDING_UP && !y_sign)) { | |
1396 // add 1 ulp and then check for overflow | |
1397 C1_lo = C1_lo + 1; | |
1398 if (C1_lo == 0) { // rounding overflow in the low 64 bits | |
1399 C1_hi = C1_hi + 1; | |
1400 } | |
1401 if (C1_hi == 0x0001ed09bead87c0ull | |
1402 && C1_lo == 0x378d8e6400000000ull) { | |
1403 // C1 = 10^34 => rounding overflow | |
1404 C1_hi = 0x0000314dc6448d93ull; | |
1405 C1_lo = 0x38c15b0a00000000ull; // 10^33 | |
1406 x_exp = x_exp + EXP_P1; | |
1407 if (x_exp == EXP_MAX_P1) { // overflow | |
1408 C1_hi = 0x7800000000000000ull; // +inf | |
1409 C1_lo = 0x0ull; | |
1410 x_exp = 0; // x_sign is preserved | |
1411 // set overflow flag (the inexact flag was set too) | |
1412 *pfpsf |= OVERFLOW_EXCEPTION; | |
1413 } | |
1414 } | |
1415 } else if ((rnd_mode == ROUNDING_DOWN && y_sign) | |
1416 || (rnd_mode == ROUNDING_TO_ZERO | |
1417 && x_sign != y_sign)) { | |
1418 // subtract 1 ulp from C1 | |
1419 // Note: because delta >= P34 + 1 the result cannot be zero | |
1420 C1_lo = C1_lo - 1; | |
1421 if (C1_lo == 0xffffffffffffffffull) | |
1422 C1_hi = C1_hi - 1; | |
1423 // if the coefficient is 10^33 - 1 then make it 10^34 - 1 | |
1424 // and decrease the exponent by 1 (because delta >= P34 + 1 | |
1425 // the exponent will not become less than e_min) | |
1426 // 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff | |
1427 // 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff | |
1428 if (C1_hi == 0x0000314dc6448d93ull | |
1429 && C1_lo == 0x38c15b09ffffffffull) { | |
1430 // make C1 = 10^34 - 1 | |
1431 C1_hi = 0x0001ed09bead87c0ull; | |
1432 C1_lo = 0x378d8e63ffffffffull; | |
1433 x_exp = x_exp - EXP_P1; | |
1434 } | |
1435 } else { | |
1436 ; // the result is already correct | |
1437 } | |
1438 } | |
1439 // set the inexact flag | |
1440 *pfpsf |= INEXACT_EXCEPTION; | |
1441 // assemble the result | |
1442 res.w[1] = x_sign | x_exp | C1_hi; | |
1443 res.w[0] = C1_lo; | |
1444 } else { // if C2 > halfulp128 || | |
1445 // (C2 == halfulp128 && q1 == P34 && ((C1 & 0x1) == 1)), i.e. | |
1446 // 1/2 ulp(n1) < n2 < 1 ulp(n1) or n2 = 1/2 ulp(n1) and C1 odd | |
1447 // res = x+1 ulp if n1*n2 > 0 and res = x-1 ulp if n1*n2 < 0 | |
1448 if (q1 < P34) { // then 1 ulp = 10^(e1+q1-P34) < 10^e1 | |
1449 // Note: if (q1 == P34) then 1 ulp = 10^(e1+q1-P34) = 10^e1 | |
1450 // because q1 < P34 we must first replace C1 by C1*10^(P34-q1), | |
1451 // and must decrease the exponent by (P34-q1) (it will still be | |
1452 // at least e_min) | |
1453 scale = P34 - q1; | |
1454 if (q1 <= 19) { // C1 fits in 64 bits | |
1455 // 1 <= q1 <= 19 => 15 <= scale <= 33 | |
1456 if (scale <= 19) { // 10^scale fits in 64 bits | |
1457 __mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo); | |
1458 } else { // if 20 <= scale <= 33 | |
1459 // C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where | |
1460 // (C1 * 10^(scale-19)) fits in 64 bits | |
1461 C1_lo = C1_lo * ten2k64[scale - 19]; | |
1462 __mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo); | |
1463 } | |
1464 } else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits | |
1465 // => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits | |
1466 C1.w[1] = C1_hi; | |
1467 C1.w[0] = C1_lo; | |
1468 // C1 = ten2k64[P34 - q1] * C1 | |
1469 __mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1); | |
1470 } | |
1471 C1_hi = C1.w[1]; | |
1472 C1_lo = C1.w[0]; | |
1473 x_exp = x_exp - ((UINT64) scale << 49); | |
1474 } | |
1475 if ((rnd_mode == ROUNDING_TO_NEAREST && x_sign != y_sign) | |
1476 || (rnd_mode == ROUNDING_TIES_AWAY && x_sign != y_sign | |
1477 && (C2_hi != halfulp128.w[1] | |
1478 || C2_lo != halfulp128.w[0])) | |
1479 || (rnd_mode == ROUNDING_DOWN && !x_sign && y_sign) | |
1480 || (rnd_mode == ROUNDING_UP && x_sign && !y_sign) | |
1481 || (rnd_mode == ROUNDING_TO_ZERO | |
1482 && x_sign != y_sign)) { | |
1483 // the result is x - 1 | |
1484 // for RN n1 * n2 < 0; underflow not possible | |
1485 C1_lo = C1_lo - 1; | |
1486 if (C1_lo == 0xffffffffffffffffull) | |
1487 C1_hi--; | |
1488 // check if we crossed into the lower decade | |
1489 if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1 | |
1490 C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1 | |
1491 C1_lo = 0x378d8e63ffffffffull; | |
1492 x_exp = x_exp - EXP_P1; // no underflow, because n1 >> n2 | |
1493 } | |
1494 } else | |
1495 if ((rnd_mode == ROUNDING_TO_NEAREST | |
1496 && x_sign == y_sign) | |
1497 || (rnd_mode == ROUNDING_TIES_AWAY | |
1498 && x_sign == y_sign) | |
1499 || (rnd_mode == ROUNDING_DOWN && x_sign && y_sign) | |
1500 || (rnd_mode == ROUNDING_UP && !x_sign | |
1501 && !y_sign)) { | |
1502 // the result is x + 1 | |
1503 // for RN x_sign = y_sign, i.e. n1*n2 > 0 | |
1504 C1_lo = C1_lo + 1; | |
1505 if (C1_lo == 0) { // rounding overflow in the low 64 bits | |
1506 C1_hi = C1_hi + 1; | |
1507 } | |
1508 if (C1_hi == 0x0001ed09bead87c0ull | |
1509 && C1_lo == 0x378d8e6400000000ull) { | |
1510 // C1 = 10^34 => rounding overflow | |
1511 C1_hi = 0x0000314dc6448d93ull; | |
1512 C1_lo = 0x38c15b0a00000000ull; // 10^33 | |
1513 x_exp = x_exp + EXP_P1; | |
1514 if (x_exp == EXP_MAX_P1) { // overflow | |
1515 C1_hi = 0x7800000000000000ull; // +inf | |
1516 C1_lo = 0x0ull; | |
1517 x_exp = 0; // x_sign is preserved | |
1518 // set the overflow flag | |
1519 *pfpsf |= OVERFLOW_EXCEPTION; | |
1520 } | |
1521 } | |
1522 } else { | |
1523 ; // the result is x | |
1524 } | |
1525 // set the inexact flag | |
1526 *pfpsf |= INEXACT_EXCEPTION; | |
1527 // assemble the result | |
1528 res.w[1] = x_sign | x_exp | C1_hi; | |
1529 res.w[0] = C1_lo; | |
1530 } | |
1531 } // end q1 >= 20 | |
1532 // end case where C1 != 10^(q1-1) | |
1533 } else { // C1 = 10^(q1-1) and x_sign != y_sign | |
1534 // instead of C' = (C1 * 10^(e1-e2) + C2)rnd,P34 | |
1535 // calculate C' = C1 * 10^(e1-e2-x1) + (C2 * 10^(-x1))rnd,P34 | |
1536 // where x1 = q2 - 1, 0 <= x1 <= P34 - 1 | |
1537 // Because C1 = 10^(q1-1) and x_sign != y_sign, C' will have P34 | |
1538 // digits and n = C' * 10^(e2+x1) | |
1539 // If the result has P34+1 digits, redo the steps above with x1+1 | |
1540 // If the result has P34-1 digits or less, redo the steps above with | |
1541 // x1-1 but only if initially x1 >= 1 | |
1542 // NOTE: these two steps can be improved, e.g we could guess if | |
1543 // P34+1 or P34-1 digits will be obtained by adding/subtracting | |
1544 // just the top 64 bits of the two operands | |
1545 // The result cannot be zero, and it cannot overflow | |
1546 x1 = q2 - 1; // 0 <= x1 <= P34-1 | |
1547 // Calculate C1 * 10^(e1-e2-x1) where 1 <= e1-e2-x1 <= P34 | |
1548 // scale = (int)(e1 >> 49) - (int)(e2 >> 49) - x1; 0 <= scale <= P34-1 | |
1549 scale = P34 - q1 + 1; // scale=e1-e2-x1 = P34+1-q1; 1<=scale<=P34 | |
1550 // either C1 or 10^(e1-e2-x1) may not fit is 64 bits, | |
1551 // but their product fits with certainty in 128 bits | |
1552 if (scale >= 20) { //10^(e1-e2-x1) doesn't fit in 64 bits, but C1 does | |
1553 __mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]); | |
1554 } else { // if (scale >= 1 | |
1555 // if 1 <= scale <= 19 then 10^(e1-e2-x1) fits in 64 bits | |
1556 if (q1 <= 19) { // C1 fits in 64 bits | |
1557 __mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]); | |
1558 } else { // q1 >= 20 | |
1559 C1.w[1] = C1_hi; | |
1560 C1.w[0] = C1_lo; | |
1561 __mul_128x64_to_128 (C1, ten2k64[scale], C1); | |
1562 } | |
1563 } | |
1564 tmp64 = C1.w[0]; // C1.w[1], C1.w[0] contains C1 * 10^(e1-e2-x1) | |
1565 | |
1566 // now round C2 to q2-x1 = 1 decimal digit | |
1567 // C2' = C2 + 1/2 * 10^x1 = C2 + 5 * 10^(x1-1) | |
1568 ind = x1 - 1; // -1 <= ind <= P34 - 2 | |
1569 if (ind >= 0) { // if (x1 >= 1) | |
1570 C2.w[0] = C2_lo; | |
1571 C2.w[1] = C2_hi; | |
1572 if (ind <= 18) { | |
1573 C2.w[0] = C2.w[0] + midpoint64[ind]; | |
1574 if (C2.w[0] < C2_lo) | |
1575 C2.w[1]++; | |
1576 } else { // 19 <= ind <= 32 | |
1577 C2.w[0] = C2.w[0] + midpoint128[ind - 19].w[0]; | |
1578 C2.w[1] = C2.w[1] + midpoint128[ind - 19].w[1]; | |
1579 if (C2.w[0] < C2_lo) | |
1580 C2.w[1]++; | |
1581 } | |
1582 // the approximation of 10^(-x1) was rounded up to 118 bits | |
1583 __mul_128x128_to_256 (R256, C2, ten2mk128[ind]); // R256 = C2*, f2* | |
1584 // calculate C2* and f2* | |
1585 // C2* is actually floor(C2*) in this case | |
1586 // C2* and f2* need shifting and masking, as shown by | |
1587 // shiftright128[] and maskhigh128[] | |
1588 // the top Ex bits of 10^(-x1) are T* = ten2mk128trunc[ind], e.g. | |
1589 // if x1=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999 | |
1590 // if (0 < f2* < 10^(-x1)) then | |
1591 // if floor(C1+C2*) is even then C2* = floor(C2*) - logical right | |
1592 // shift; C2* has p decimal digits, correct by Prop. 1) | |
1593 // else if floor(C1+C2*) is odd C2* = floor(C2*)-1 (logical right | |
1594 // shift; C2* has p decimal digits, correct by Pr. 1) | |
1595 // else | |
1596 // C2* = floor(C2*) (logical right shift; C has p decimal digits, | |
1597 // correct by Property 1) | |
1598 // n = C2* * 10^(e2+x1) | |
1599 | |
1600 if (ind <= 2) { | |
1601 highf2star.w[1] = 0x0; | |
1602 highf2star.w[0] = 0x0; // low f2* ok | |
1603 } else if (ind <= 21) { | |
1604 highf2star.w[1] = 0x0; | |
1605 highf2star.w[0] = R256.w[2] & maskhigh128[ind]; // low f2* ok | |
1606 } else { | |
1607 highf2star.w[1] = R256.w[3] & maskhigh128[ind]; | |
1608 highf2star.w[0] = R256.w[2]; // low f2* is ok | |
1609 } | |
1610 // shift right C2* by Ex-128 = shiftright128[ind] | |
1611 if (ind >= 3) { | |
1612 shift = shiftright128[ind]; | |
1613 if (shift < 64) { // 3 <= shift <= 63 | |
1614 R256.w[2] = | |
1615 (R256.w[2] >> shift) | (R256.w[3] << (64 - shift)); | |
1616 R256.w[3] = (R256.w[3] >> shift); | |
1617 } else { // 66 <= shift <= 102 | |
1618 R256.w[2] = (R256.w[3] >> (shift - 64)); | |
1619 R256.w[3] = 0x0ULL; | |
1620 } | |
1621 } | |
1622 // redundant | |
1623 is_inexact_lt_midpoint = 0; | |
1624 is_inexact_gt_midpoint = 0; | |
1625 is_midpoint_lt_even = 0; | |
1626 is_midpoint_gt_even = 0; | |
1627 // determine inexactness of the rounding of C2* | |
1628 // (cannot be followed by a second rounding) | |
1629 // if (0 < f2* - 1/2 < 10^(-x1)) then | |
1630 // the result is exact | |
1631 // else (if f2* - 1/2 > T* then) | |
1632 // the result of is inexact | |
1633 if (ind <= 2) { | |
1634 if (R256.w[1] > 0x8000000000000000ull || | |
1635 (R256.w[1] == 0x8000000000000000ull | |
1636 && R256.w[0] > 0x0ull)) { | |
1637 // f2* > 1/2 and the result may be exact | |
1638 tmp64A = R256.w[1] - 0x8000000000000000ull; // f* - 1/2 | |
1639 if ((tmp64A > ten2mk128trunc[ind].w[1] | |
1640 || (tmp64A == ten2mk128trunc[ind].w[1] | |
1641 && R256.w[0] >= ten2mk128trunc[ind].w[0]))) { | |
1642 // set the inexact flag | |
1643 *pfpsf |= INEXACT_EXCEPTION; | |
1644 // this rounding is applied to C2 only! | |
1645 // x_sign != y_sign | |
1646 is_inexact_gt_midpoint = 1; | |
1647 } // else the result is exact | |
1648 // rounding down, unless a midpoint in [ODD, EVEN] | |
1649 } else { // the result is inexact; f2* <= 1/2 | |
1650 // set the inexact flag | |
1651 *pfpsf |= INEXACT_EXCEPTION; | |
1652 // this rounding is applied to C2 only! | |
1653 // x_sign != y_sign | |
1654 is_inexact_lt_midpoint = 1; | |
1655 } | |
1656 } else if (ind <= 21) { // if 3 <= ind <= 21 | |
1657 if (highf2star.w[1] > 0x0 || (highf2star.w[1] == 0x0 | |
1658 && highf2star.w[0] > | |
1659 onehalf128[ind]) | |
1660 || (highf2star.w[1] == 0x0 | |
1661 && highf2star.w[0] == onehalf128[ind] | |
1662 && (R256.w[1] || R256.w[0]))) { | |
1663 // f2* > 1/2 and the result may be exact | |
1664 // Calculate f2* - 1/2 | |
1665 tmp64A = highf2star.w[0] - onehalf128[ind]; | |
1666 tmp64B = highf2star.w[1]; | |
1667 if (tmp64A > highf2star.w[0]) | |
1668 tmp64B--; | |
1669 if (tmp64B || tmp64A | |
1670 || R256.w[1] > ten2mk128trunc[ind].w[1] | |
1671 || (R256.w[1] == ten2mk128trunc[ind].w[1] | |
1672 && R256.w[0] > ten2mk128trunc[ind].w[0])) { | |
1673 // set the inexact flag | |
1674 *pfpsf |= INEXACT_EXCEPTION; | |
1675 // this rounding is applied to C2 only! | |
1676 // x_sign != y_sign | |
1677 is_inexact_gt_midpoint = 1; | |
1678 } // else the result is exact | |
1679 } else { // the result is inexact; f2* <= 1/2 | |
1680 // set the inexact flag | |
1681 *pfpsf |= INEXACT_EXCEPTION; | |
1682 // this rounding is applied to C2 only! | |
1683 // x_sign != y_sign | |
1684 is_inexact_lt_midpoint = 1; | |
1685 } | |
1686 } else { // if 22 <= ind <= 33 | |
1687 if (highf2star.w[1] > onehalf128[ind] | |
1688 || (highf2star.w[1] == onehalf128[ind] | |
1689 && (highf2star.w[0] || R256.w[1] | |
1690 || R256.w[0]))) { | |
1691 // f2* > 1/2 and the result may be exact | |
1692 // Calculate f2* - 1/2 | |
1693 // tmp64A = highf2star.w[0]; | |
1694 tmp64B = highf2star.w[1] - onehalf128[ind]; | |
1695 if (tmp64B || highf2star.w[0] | |
1696 || R256.w[1] > ten2mk128trunc[ind].w[1] | |
1697 || (R256.w[1] == ten2mk128trunc[ind].w[1] | |
1698 && R256.w[0] > ten2mk128trunc[ind].w[0])) { | |
1699 // set the inexact flag | |
1700 *pfpsf |= INEXACT_EXCEPTION; | |
1701 // this rounding is applied to C2 only! | |
1702 // x_sign != y_sign | |
1703 is_inexact_gt_midpoint = 1; | |
1704 } // else the result is exact | |
1705 } else { // the result is inexact; f2* <= 1/2 | |
1706 // set the inexact flag | |
1707 *pfpsf |= INEXACT_EXCEPTION; | |
1708 // this rounding is applied to C2 only! | |
1709 // x_sign != y_sign | |
1710 is_inexact_lt_midpoint = 1; | |
1711 } | |
1712 } | |
1713 // check for midpoints after determining inexactness | |
1714 if ((R256.w[1] || R256.w[0]) && (highf2star.w[1] == 0) | |
1715 && (highf2star.w[0] == 0) | |
1716 && (R256.w[1] < ten2mk128trunc[ind].w[1] | |
1717 || (R256.w[1] == ten2mk128trunc[ind].w[1] | |
1718 && R256.w[0] <= ten2mk128trunc[ind].w[0]))) { | |
1719 // the result is a midpoint | |
1720 if ((tmp64 + R256.w[2]) & 0x01) { // MP in [EVEN, ODD] | |
1721 // if floor(C2*) is odd C = floor(C2*) - 1; the result may be 0 | |
1722 R256.w[2]--; | |
1723 if (R256.w[2] == 0xffffffffffffffffull) | |
1724 R256.w[3]--; | |
1725 // this rounding is applied to C2 only! | |
1726 // x_sign != y_sign | |
1727 is_midpoint_lt_even = 1; | |
1728 is_inexact_lt_midpoint = 0; | |
1729 is_inexact_gt_midpoint = 0; | |
1730 } else { | |
1731 // else MP in [ODD, EVEN] | |
1732 // this rounding is applied to C2 only! | |
1733 // x_sign != y_sign | |
1734 is_midpoint_gt_even = 1; | |
1735 is_inexact_lt_midpoint = 0; | |
1736 is_inexact_gt_midpoint = 0; | |
1737 } | |
1738 } | |
1739 } else { // if (ind == -1) only when x1 = 0 | |
1740 R256.w[2] = C2_lo; | |
1741 R256.w[3] = C2_hi; | |
1742 is_midpoint_lt_even = 0; | |
1743 is_midpoint_gt_even = 0; | |
1744 is_inexact_lt_midpoint = 0; | |
1745 is_inexact_gt_midpoint = 0; | |
1746 } | |
1747 // and now subtract C1 * 10^(e1-e2-x1) - (C2 * 10^(-x1))rnd,P34 | |
1748 // because x_sign != y_sign this last operation is exact | |
1749 C1.w[0] = C1.w[0] - R256.w[2]; | |
1750 C1.w[1] = C1.w[1] - R256.w[3]; | |
1751 if (C1.w[0] > tmp64) | |
1752 C1.w[1]--; // borrow | |
1753 if (C1.w[1] >= 0x8000000000000000ull) { // negative coefficient! | |
1754 C1.w[0] = ~C1.w[0]; | |
1755 C1.w[0]++; | |
1756 C1.w[1] = ~C1.w[1]; | |
1757 if (C1.w[0] == 0x0) | |
1758 C1.w[1]++; | |
1759 tmp_sign = y_sign; // the result will have the sign of y | |
1760 } else { | |
1761 tmp_sign = x_sign; | |
1762 } | |
1763 // the difference has exactly P34 digits | |
1764 x_sign = tmp_sign; | |
1765 if (x1 >= 1) | |
1766 y_exp = y_exp + ((UINT64) x1 << 49); | |
1767 C1_hi = C1.w[1]; | |
1768 C1_lo = C1.w[0]; | |
1769 // general correction from RN to RA, RM, RP, RZ; result uses y_exp | |
1770 if (rnd_mode != ROUNDING_TO_NEAREST) { | |
1771 if ((!x_sign | |
1772 && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) | |
1773 || | |
1774 ((rnd_mode == ROUNDING_TIES_AWAY | |
1775 || rnd_mode == ROUNDING_UP) | |
1776 && is_midpoint_gt_even))) || (x_sign | |
1777 && | |
1778 ((rnd_mode == | |
1779 ROUNDING_DOWN | |
1780 && | |
1781 is_inexact_lt_midpoint) | |
1782 || | |
1783 ((rnd_mode == | |
1784 ROUNDING_TIES_AWAY | |
1785 || rnd_mode == | |
1786 ROUNDING_DOWN) | |
1787 && | |
1788 is_midpoint_gt_even)))) | |
1789 { | |
1790 // C1 = C1 + 1 | |
1791 C1_lo = C1_lo + 1; | |
1792 if (C1_lo == 0) { // rounding overflow in the low 64 bits | |
1793 C1_hi = C1_hi + 1; | |
1794 } | |
1795 if (C1_hi == 0x0001ed09bead87c0ull | |
1796 && C1_lo == 0x378d8e6400000000ull) { | |
1797 // C1 = 10^34 => rounding overflow | |
1798 C1_hi = 0x0000314dc6448d93ull; | |
1799 C1_lo = 0x38c15b0a00000000ull; // 10^33 | |
1800 y_exp = y_exp + EXP_P1; | |
1801 } | |
1802 } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) | |
1803 && | |
1804 ((x_sign | |
1805 && (rnd_mode == ROUNDING_UP | |
1806 || rnd_mode == ROUNDING_TO_ZERO)) | |
1807 || (!x_sign | |
1808 && (rnd_mode == ROUNDING_DOWN | |
1809 || rnd_mode == ROUNDING_TO_ZERO)))) { | |
1810 // C1 = C1 - 1 | |
1811 C1_lo = C1_lo - 1; | |
1812 if (C1_lo == 0xffffffffffffffffull) | |
1813 C1_hi--; | |
1814 // check if we crossed into the lower decade | |
1815 if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1 | |
1816 C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1 | |
1817 C1_lo = 0x378d8e63ffffffffull; | |
1818 y_exp = y_exp - EXP_P1; | |
1819 // no underflow, because delta + q2 >= P34 + 1 | |
1820 } | |
1821 } else { | |
1822 ; // exact, the result is already correct | |
1823 } | |
1824 } | |
1825 // assemble the result | |
1826 res.w[1] = x_sign | y_exp | C1_hi; | |
1827 res.w[0] = C1_lo; | |
1828 } | |
1829 } // end delta = P34 | |
1830 } else { // if (|delta| <= P34 - 1) | |
1831 if (delta >= 0) { // if (0 <= delta <= P34 - 1) | |
1832 if (delta <= P34 - 1 - q2) { | |
1833 // calculate C' directly; the result is exact | |
1834 // in this case 1<=q1<=P34-1, 1<=q2<=P34-1 and 0 <= e1-e2 <= P34-2 | |
1835 // The coefficient of the result is C1 * 10^(e1-e2) + C2 and the | |
1836 // exponent is e2; either C1 or 10^(e1-e2) may not fit is 64 bits, | |
1837 // but their product fits with certainty in 128 bits (actually in 113) | |
1838 scale = delta - q1 + q2; // scale = (int)(e1 >> 49) - (int)(e2 >> 49) | |
1839 | |
1840 if (scale >= 20) { // 10^(e1-e2) does not fit in 64 bits, but C1 does | |
1841 __mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]); | |
1842 C1_hi = C1.w[1]; | |
1843 C1_lo = C1.w[0]; | |
1844 } else if (scale >= 1) { | |
1845 // if 1 <= scale <= 19 then 10^(e1-e2) fits in 64 bits | |
1846 if (q1 <= 19) { // C1 fits in 64 bits | |
1847 __mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]); | |
1848 } else { // q1 >= 20 | |
1849 C1.w[1] = C1_hi; | |
1850 C1.w[0] = C1_lo; | |
1851 __mul_128x64_to_128 (C1, ten2k64[scale], C1); | |
1852 } | |
1853 C1_hi = C1.w[1]; | |
1854 C1_lo = C1.w[0]; | |
1855 } else { // if (scale == 0) C1 is unchanged | |
1856 C1.w[0] = C1_lo; // C1.w[1] = C1_hi; | |
1857 } | |
1858 // now add C2 | |
1859 if (x_sign == y_sign) { | |
1860 // the result cannot overflow | |
1861 C1_lo = C1_lo + C2_lo; | |
1862 C1_hi = C1_hi + C2_hi; | |
1863 if (C1_lo < C1.w[0]) | |
1864 C1_hi++; | |
1865 } else { // if x_sign != y_sign | |
1866 C1_lo = C1_lo - C2_lo; | |
1867 C1_hi = C1_hi - C2_hi; | |
1868 if (C1_lo > C1.w[0]) | |
1869 C1_hi--; | |
1870 // the result can be zero, but it cannot overflow | |
1871 if (C1_lo == 0 && C1_hi == 0) { | |
1872 // assemble the result | |
1873 if (x_exp < y_exp) | |
1874 res.w[1] = x_exp; | |
1875 else | |
1876 res.w[1] = y_exp; | |
1877 res.w[0] = 0; | |
1878 if (rnd_mode == ROUNDING_DOWN) { | |
1879 res.w[1] |= 0x8000000000000000ull; | |
1880 } | |
1881 BID_SWAP128 (res); | |
1882 BID_RETURN (res); | |
1883 } | |
1884 if (C1_hi >= 0x8000000000000000ull) { // negative coefficient! | |
1885 C1_lo = ~C1_lo; | |
1886 C1_lo++; | |
1887 C1_hi = ~C1_hi; | |
1888 if (C1_lo == 0x0) | |
1889 C1_hi++; | |
1890 x_sign = y_sign; // the result will have the sign of y | |
1891 } | |
1892 } | |
1893 // assemble the result | |
1894 res.w[1] = x_sign | y_exp | C1_hi; | |
1895 res.w[0] = C1_lo; | |
1896 } else if (delta == P34 - q2) { | |
1897 // calculate C' directly; the result may be inexact if it requires | |
1898 // P34+1 decimal digits; in this case the 'cutoff' point for addition | |
1899 // is at the position of the lsb of C2, so 0 <= e1-e2 <= P34-1 | |
1900 // The coefficient of the result is C1 * 10^(e1-e2) + C2 and the | |
1901 // exponent is e2; either C1 or 10^(e1-e2) may not fit is 64 bits, | |
1902 // but their product fits with certainty in 128 bits (actually in 113) | |
1903 scale = delta - q1 + q2; // scale = (int)(e1 >> 49) - (int)(e2 >> 49) | |
1904 if (scale >= 20) { // 10^(e1-e2) does not fit in 64 bits, but C1 does | |
1905 __mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]); | |
1906 } else if (scale >= 1) { | |
1907 // if 1 <= scale <= 19 then 10^(e1-e2) fits in 64 bits | |
1908 if (q1 <= 19) { // C1 fits in 64 bits | |
1909 __mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]); | |
1910 } else { // q1 >= 20 | |
1911 C1.w[1] = C1_hi; | |
1912 C1.w[0] = C1_lo; | |
1913 __mul_128x64_to_128 (C1, ten2k64[scale], C1); | |
1914 } | |
1915 } else { // if (scale == 0) C1 is unchanged | |
1916 C1.w[1] = C1_hi; | |
1917 C1.w[0] = C1_lo; // only the low part is necessary | |
1918 } | |
1919 C1_hi = C1.w[1]; | |
1920 C1_lo = C1.w[0]; | |
1921 // now add C2 | |
1922 if (x_sign == y_sign) { | |
1923 // the result can overflow! | |
1924 C1_lo = C1_lo + C2_lo; | |
1925 C1_hi = C1_hi + C2_hi; | |
1926 if (C1_lo < C1.w[0]) | |
1927 C1_hi++; | |
1928 // test for overflow, possible only when C1 >= 10^34 | |
1929 if (C1_hi > 0x0001ed09bead87c0ull || (C1_hi == 0x0001ed09bead87c0ull && C1_lo >= 0x378d8e6400000000ull)) { // C1 >= 10^34 | |
1930 // in this case q = P34 + 1 and x = q - P34 = 1, so multiply | |
1931 // C'' = C'+ 5 = C1 + 5 by k1 ~ 10^(-1) calculated for P34 + 1 | |
1932 // decimal digits | |
1933 // Calculate C'' = C' + 1/2 * 10^x | |
1934 if (C1_lo >= 0xfffffffffffffffbull) { // low half add has carry | |
1935 C1_lo = C1_lo + 5; | |
1936 C1_hi = C1_hi + 1; | |
1937 } else { | |
1938 C1_lo = C1_lo + 5; | |
1939 } | |
1940 // the approximation of 10^(-1) was rounded up to 118 bits | |
1941 // 10^(-1) =~ 33333333333333333333333333333400 * 2^-129 | |
1942 // 10^(-1) =~ 19999999999999999999999999999a00 * 2^-128 | |
1943 C1.w[1] = C1_hi; | |
1944 C1.w[0] = C1_lo; // C'' | |
1945 ten2m1.w[1] = 0x1999999999999999ull; | |
1946 ten2m1.w[0] = 0x9999999999999a00ull; | |
1947 __mul_128x128_to_256 (P256, C1, ten2m1); // P256 = C*, f* | |
1948 // C* is actually floor(C*) in this case | |
1949 // the top Ex = 128 bits of 10^(-1) are | |
1950 // T* = 0x00199999999999999999999999999999 | |
1951 // if (0 < f* < 10^(-x)) then | |
1952 // if floor(C*) is even then C = floor(C*) - logical right | |
1953 // shift; C has p decimal digits, correct by Prop. 1) | |
1954 // else if floor(C*) is odd C = floor(C*) - 1 (logical right | |
1955 // shift; C has p decimal digits, correct by Pr. 1) | |
1956 // else | |
1957 // C = floor(C*) (logical right shift; C has p decimal digits, | |
1958 // correct by Property 1) | |
1959 // n = C * 10^(e2+x) | |
1960 if ((P256.w[1] || P256.w[0]) | |
1961 && (P256.w[1] < 0x1999999999999999ull | |
1962 || (P256.w[1] == 0x1999999999999999ull | |
1963 && P256.w[0] <= 0x9999999999999999ull))) { | |
1964 // the result is a midpoint | |
1965 if (P256.w[2] & 0x01) { | |
1966 is_midpoint_gt_even = 1; | |
1967 // if floor(C*) is odd C = floor(C*) - 1; the result is not 0 | |
1968 P256.w[2]--; | |
1969 if (P256.w[2] == 0xffffffffffffffffull) | |
1970 P256.w[3]--; | |
1971 } else { | |
1972 is_midpoint_lt_even = 1; | |
1973 } | |
1974 } | |
1975 // n = Cstar * 10^(e2+1) | |
1976 y_exp = y_exp + EXP_P1; | |
1977 // C* != 10^P because C* has P34 digits | |
1978 // check for overflow | |
1979 if (y_exp == EXP_MAX_P1 | |
1980 && (rnd_mode == ROUNDING_TO_NEAREST | |
1981 || rnd_mode == ROUNDING_TIES_AWAY)) { | |
1982 // overflow for RN | |
1983 res.w[1] = x_sign | 0x7800000000000000ull; // +/-inf | |
1984 res.w[0] = 0x0ull; | |
1985 // set the inexact flag | |
1986 *pfpsf |= INEXACT_EXCEPTION; | |
1987 // set the overflow flag | |
1988 *pfpsf |= OVERFLOW_EXCEPTION; | |
1989 BID_SWAP128 (res); | |
1990 BID_RETURN (res); | |
1991 } | |
1992 // if (0 < f* - 1/2 < 10^(-x)) then | |
1993 // the result of the addition is exact | |
1994 // else | |
1995 // the result of the addition is inexact | |
1996 if (P256.w[1] > 0x8000000000000000ull || (P256.w[1] == 0x8000000000000000ull && P256.w[0] > 0x0ull)) { // the result may be exact | |
1997 tmp64 = P256.w[1] - 0x8000000000000000ull; // f* - 1/2 | |
1998 if ((tmp64 > 0x1999999999999999ull | |
1999 || (tmp64 == 0x1999999999999999ull | |
2000 && P256.w[0] >= 0x9999999999999999ull))) { | |
2001 // set the inexact flag | |
2002 *pfpsf |= INEXACT_EXCEPTION; | |
2003 is_inexact = 1; | |
2004 } // else the result is exact | |
2005 } else { // the result is inexact | |
2006 // set the inexact flag | |
2007 *pfpsf |= INEXACT_EXCEPTION; | |
2008 is_inexact = 1; | |
2009 } | |
2010 C1_hi = P256.w[3]; | |
2011 C1_lo = P256.w[2]; | |
2012 if (!is_midpoint_gt_even && !is_midpoint_lt_even) { | |
2013 is_inexact_lt_midpoint = is_inexact | |
2014 && (P256.w[1] & 0x8000000000000000ull); | |
2015 is_inexact_gt_midpoint = is_inexact | |
2016 && !(P256.w[1] & 0x8000000000000000ull); | |
2017 } | |
2018 // general correction from RN to RA, RM, RP, RZ; | |
2019 // result uses y_exp | |
2020 if (rnd_mode != ROUNDING_TO_NEAREST) { | |
2021 if ((!x_sign | |
2022 && | |
2023 ((rnd_mode == ROUNDING_UP | |
2024 && is_inexact_lt_midpoint) | |
2025 || | |
2026 ((rnd_mode == ROUNDING_TIES_AWAY | |
2027 || rnd_mode == ROUNDING_UP) | |
2028 && is_midpoint_gt_even))) || (x_sign | |
2029 && | |
2030 ((rnd_mode == | |
2031 ROUNDING_DOWN | |
2032 && | |
2033 is_inexact_lt_midpoint) | |
2034 || | |
2035 ((rnd_mode == | |
2036 ROUNDING_TIES_AWAY | |
2037 || rnd_mode == | |
2038 ROUNDING_DOWN) | |
2039 && | |
2040 is_midpoint_gt_even)))) | |
2041 { | |
2042 // C1 = C1 + 1 | |
2043 C1_lo = C1_lo + 1; | |
2044 if (C1_lo == 0) { // rounding overflow in the low 64 bits | |
2045 C1_hi = C1_hi + 1; | |
2046 } | |
2047 if (C1_hi == 0x0001ed09bead87c0ull | |
2048 && C1_lo == 0x378d8e6400000000ull) { | |
2049 // C1 = 10^34 => rounding overflow | |
2050 C1_hi = 0x0000314dc6448d93ull; | |
2051 C1_lo = 0x38c15b0a00000000ull; // 10^33 | |
2052 y_exp = y_exp + EXP_P1; | |
2053 } | |
2054 } else | |
2055 if ((is_midpoint_lt_even || is_inexact_gt_midpoint) | |
2056 && | |
2057 ((x_sign | |
2058 && (rnd_mode == ROUNDING_UP | |
2059 || rnd_mode == ROUNDING_TO_ZERO)) | |
2060 || (!x_sign | |
2061 && (rnd_mode == ROUNDING_DOWN | |
2062 || rnd_mode == ROUNDING_TO_ZERO)))) { | |
2063 // C1 = C1 - 1 | |
2064 C1_lo = C1_lo - 1; | |
2065 if (C1_lo == 0xffffffffffffffffull) | |
2066 C1_hi--; | |
2067 // check if we crossed into the lower decade | |
2068 if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1 | |
2069 C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1 | |
2070 C1_lo = 0x378d8e63ffffffffull; | |
2071 y_exp = y_exp - EXP_P1; | |
2072 // no underflow, because delta + q2 >= P34 + 1 | |
2073 } | |
2074 } else { | |
2075 ; // exact, the result is already correct | |
2076 } | |
2077 // in all cases check for overflow (RN and RA solved already) | |
2078 if (y_exp == EXP_MAX_P1) { // overflow | |
2079 if ((rnd_mode == ROUNDING_DOWN && x_sign) || // RM and res < 0 | |
2080 (rnd_mode == ROUNDING_UP && !x_sign)) { // RP and res > 0 | |
2081 C1_hi = 0x7800000000000000ull; // +inf | |
2082 C1_lo = 0x0ull; | |
2083 } else { // RM and res > 0, RP and res < 0, or RZ | |
2084 C1_hi = 0x5fffed09bead87c0ull; | |
2085 C1_lo = 0x378d8e63ffffffffull; | |
2086 } | |
2087 y_exp = 0; // x_sign is preserved | |
2088 // set the inexact flag (in case the exact addition was exact) | |
2089 *pfpsf |= INEXACT_EXCEPTION; | |
2090 // set the overflow flag | |
2091 *pfpsf |= OVERFLOW_EXCEPTION; | |
2092 } | |
2093 } | |
2094 } // else if (C1 < 10^34) then C1 is the coeff.; the result is exact | |
2095 } else { // if x_sign != y_sign the result is exact | |
2096 C1_lo = C1_lo - C2_lo; | |
2097 C1_hi = C1_hi - C2_hi; | |
2098 if (C1_lo > C1.w[0]) | |
2099 C1_hi--; | |
2100 // the result can be zero, but it cannot overflow | |
2101 if (C1_lo == 0 && C1_hi == 0) { | |
2102 // assemble the result | |
2103 if (x_exp < y_exp) | |
2104 res.w[1] = x_exp; | |
2105 else | |
2106 res.w[1] = y_exp; | |
2107 res.w[0] = 0; | |
2108 if (rnd_mode == ROUNDING_DOWN) { | |
2109 res.w[1] |= 0x8000000000000000ull; | |
2110 } | |
2111 BID_SWAP128 (res); | |
2112 BID_RETURN (res); | |
2113 } | |
2114 if (C1_hi >= 0x8000000000000000ull) { // negative coefficient! | |
2115 C1_lo = ~C1_lo; | |
2116 C1_lo++; | |
2117 C1_hi = ~C1_hi; | |
2118 if (C1_lo == 0x0) | |
2119 C1_hi++; | |
2120 x_sign = y_sign; // the result will have the sign of y | |
2121 } | |
2122 } | |
2123 // assemble the result | |
2124 res.w[1] = x_sign | y_exp | C1_hi; | |
2125 res.w[0] = C1_lo; | |
2126 } else { // if (delta >= P34 + 1 - q2) | |
2127 // instead of C' = (C1 * 10^(e1-e2) + C2)rnd,P34 | |
2128 // calculate C' = C1 * 10^(e1-e2-x1) + (C2 * 10^(-x1))rnd,P34 | |
2129 // where x1 = q1 + e1 - e2 - P34, 1 <= x1 <= P34 - 1 | |
2130 // In most cases C' will have P34 digits, and n = C' * 10^(e2+x1) | |
2131 // If the result has P34+1 digits, redo the steps above with x1+1 | |
2132 // If the result has P34-1 digits or less, redo the steps above with | |
2133 // x1-1 but only if initially x1 >= 1 | |
2134 // NOTE: these two steps can be improved, e.g we could guess if | |
2135 // P34+1 or P34-1 digits will be obtained by adding/subtracting just | |
2136 // the top 64 bits of the two operands | |
2137 // The result cannot be zero, but it can overflow | |
2138 x1 = delta + q2 - P34; // 1 <= x1 <= P34-1 | |
2139 roundC2: | |
2140 // Calculate C1 * 10^(e1-e2-x1) where 0 <= e1-e2-x1 <= P34 - 1 | |
2141 // scale = (int)(e1 >> 49) - (int)(e2 >> 49) - x1; 0 <= scale <= P34-1 | |
2142 scale = delta - q1 + q2 - x1; // scale = e1 - e2 - x1 = P34 - q1 | |
2143 // either C1 or 10^(e1-e2-x1) may not fit is 64 bits, | |
2144 // but their product fits with certainty in 128 bits (actually in 113) | |
2145 if (scale >= 20) { //10^(e1-e2-x1) doesn't fit in 64 bits, but C1 does | |
2146 __mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]); | |
2147 } else if (scale >= 1) { | |
2148 // if 1 <= scale <= 19 then 10^(e1-e2-x1) fits in 64 bits | |
2149 if (q1 <= 19) { // C1 fits in 64 bits | |
2150 __mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]); | |
2151 } else { // q1 >= 20 | |
2152 C1.w[1] = C1_hi; | |
2153 C1.w[0] = C1_lo; | |
2154 __mul_128x64_to_128 (C1, ten2k64[scale], C1); | |
2155 } | |
2156 } else { // if (scale == 0) C1 is unchanged | |
2157 C1.w[1] = C1_hi; | |
2158 C1.w[0] = C1_lo; | |
2159 } | |
2160 tmp64 = C1.w[0]; // C1.w[1], C1.w[0] contains C1 * 10^(e1-e2-x1) | |
2161 | |
2162 // now round C2 to q2-x1 decimal digits, where 1<=x1<=q2-1<=P34-1 | |
2163 // (but if we got here a second time after x1 = x1 - 1, then | |
2164 // x1 >= 0; note that for x1 = 0 C2 is unchanged) | |
2165 // C2' = C2 + 1/2 * 10^x1 = C2 + 5 * 10^(x1-1) | |
2166 ind = x1 - 1; // 0 <= ind <= q2-2<=P34-2=32; but note that if x1 = 0 | |
2167 // during a second pass, then ind = -1 | |
2168 if (ind >= 0) { // if (x1 >= 1) | |
2169 C2.w[0] = C2_lo; | |
2170 C2.w[1] = C2_hi; | |
2171 if (ind <= 18) { | |
2172 C2.w[0] = C2.w[0] + midpoint64[ind]; | |
2173 if (C2.w[0] < C2_lo) | |
2174 C2.w[1]++; | |
2175 } else { // 19 <= ind <= 32 | |
2176 C2.w[0] = C2.w[0] + midpoint128[ind - 19].w[0]; | |
2177 C2.w[1] = C2.w[1] + midpoint128[ind - 19].w[1]; | |
2178 if (C2.w[0] < C2_lo) | |
2179 C2.w[1]++; | |
2180 } | |
2181 // the approximation of 10^(-x1) was rounded up to 118 bits | |
2182 __mul_128x128_to_256 (R256, C2, ten2mk128[ind]); // R256 = C2*, f2* | |
2183 // calculate C2* and f2* | |
2184 // C2* is actually floor(C2*) in this case | |
2185 // C2* and f2* need shifting and masking, as shown by | |
2186 // shiftright128[] and maskhigh128[] | |
2187 // the top Ex bits of 10^(-x1) are T* = ten2mk128trunc[ind], e.g. | |
2188 // if x1=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999 | |
2189 // if (0 < f2* < 10^(-x1)) then | |
2190 // if floor(C1+C2*) is even then C2* = floor(C2*) - logical right | |
2191 // shift; C2* has p decimal digits, correct by Prop. 1) | |
2192 // else if floor(C1+C2*) is odd C2* = floor(C2*)-1 (logical right | |
2193 // shift; C2* has p decimal digits, correct by Pr. 1) | |
2194 // else | |
2195 // C2* = floor(C2*) (logical right shift; C has p decimal digits, | |
2196 // correct by Property 1) | |
2197 // n = C2* * 10^(e2+x1) | |
2198 | |
2199 if (ind <= 2) { | |
2200 highf2star.w[1] = 0x0; | |
2201 highf2star.w[0] = 0x0; // low f2* ok | |
2202 } else if (ind <= 21) { | |
2203 highf2star.w[1] = 0x0; | |
2204 highf2star.w[0] = R256.w[2] & maskhigh128[ind]; // low f2* ok | |
2205 } else { | |
2206 highf2star.w[1] = R256.w[3] & maskhigh128[ind]; | |
2207 highf2star.w[0] = R256.w[2]; // low f2* is ok | |
2208 } | |
2209 // shift right C2* by Ex-128 = shiftright128[ind] | |
2210 if (ind >= 3) { | |
2211 shift = shiftright128[ind]; | |
2212 if (shift < 64) { // 3 <= shift <= 63 | |
2213 R256.w[2] = | |
2214 (R256.w[2] >> shift) | (R256.w[3] << (64 - shift)); | |
2215 R256.w[3] = (R256.w[3] >> shift); | |
2216 } else { // 66 <= shift <= 102 | |
2217 R256.w[2] = (R256.w[3] >> (shift - 64)); | |
2218 R256.w[3] = 0x0ULL; | |
2219 } | |
2220 } | |
2221 if (second_pass) { | |
2222 is_inexact_lt_midpoint = 0; | |
2223 is_inexact_gt_midpoint = 0; | |
2224 is_midpoint_lt_even = 0; | |
2225 is_midpoint_gt_even = 0; | |
2226 } | |
2227 // determine inexactness of the rounding of C2* (this may be | |
2228 // followed by a second rounding only if we get P34+1 | |
2229 // decimal digits) | |
2230 // if (0 < f2* - 1/2 < 10^(-x1)) then | |
2231 // the result is exact | |
2232 // else (if f2* - 1/2 > T* then) | |
2233 // the result of is inexact | |
2234 if (ind <= 2) { | |
2235 if (R256.w[1] > 0x8000000000000000ull || | |
2236 (R256.w[1] == 0x8000000000000000ull | |
2237 && R256.w[0] > 0x0ull)) { | |
2238 // f2* > 1/2 and the result may be exact | |
2239 tmp64A = R256.w[1] - 0x8000000000000000ull; // f* - 1/2 | |
2240 if ((tmp64A > ten2mk128trunc[ind].w[1] | |
2241 || (tmp64A == ten2mk128trunc[ind].w[1] | |
2242 && R256.w[0] >= ten2mk128trunc[ind].w[0]))) { | |
2243 // set the inexact flag | |
2244 // *pfpsf |= INEXACT_EXCEPTION; | |
2245 tmp_inexact = 1; // may be set again during a second pass | |
2246 // this rounding is applied to C2 only! | |
2247 if (x_sign == y_sign) | |
2248 is_inexact_lt_midpoint = 1; | |
2249 else // if (x_sign != y_sign) | |
2250 is_inexact_gt_midpoint = 1; | |
2251 } // else the result is exact | |
2252 // rounding down, unless a midpoint in [ODD, EVEN] | |
2253 } else { // the result is inexact; f2* <= 1/2 | |
2254 // set the inexact flag | |
2255 // *pfpsf |= INEXACT_EXCEPTION; | |
2256 tmp_inexact = 1; // just in case we will round a second time | |
2257 // rounding up, unless a midpoint in [EVEN, ODD] | |
2258 // this rounding is applied to C2 only! | |
2259 if (x_sign == y_sign) | |
2260 is_inexact_gt_midpoint = 1; | |
2261 else // if (x_sign != y_sign) | |
2262 is_inexact_lt_midpoint = 1; | |
2263 } | |
2264 } else if (ind <= 21) { // if 3 <= ind <= 21 | |
2265 if (highf2star.w[1] > 0x0 || (highf2star.w[1] == 0x0 | |
2266 && highf2star.w[0] > | |
2267 onehalf128[ind]) | |
2268 || (highf2star.w[1] == 0x0 | |
2269 && highf2star.w[0] == onehalf128[ind] | |
2270 && (R256.w[1] || R256.w[0]))) { | |
2271 // f2* > 1/2 and the result may be exact | |
2272 // Calculate f2* - 1/2 | |
2273 tmp64A = highf2star.w[0] - onehalf128[ind]; | |
2274 tmp64B = highf2star.w[1]; | |
2275 if (tmp64A > highf2star.w[0]) | |
2276 tmp64B--; | |
2277 if (tmp64B || tmp64A | |
2278 || R256.w[1] > ten2mk128trunc[ind].w[1] | |
2279 || (R256.w[1] == ten2mk128trunc[ind].w[1] | |
2280 && R256.w[0] > ten2mk128trunc[ind].w[0])) { | |
2281 // set the inexact flag | |
2282 // *pfpsf |= INEXACT_EXCEPTION; | |
2283 tmp_inexact = 1; // may be set again during a second pass | |
2284 // this rounding is applied to C2 only! | |
2285 if (x_sign == y_sign) | |
2286 is_inexact_lt_midpoint = 1; | |
2287 else // if (x_sign != y_sign) | |
2288 is_inexact_gt_midpoint = 1; | |
2289 } // else the result is exact | |
2290 } else { // the result is inexact; f2* <= 1/2 | |
2291 // set the inexact flag | |
2292 // *pfpsf |= INEXACT_EXCEPTION; | |
2293 tmp_inexact = 1; // may be set again during a second pass | |
2294 // rounding up, unless a midpoint in [EVEN, ODD] | |
2295 // this rounding is applied to C2 only! | |
2296 if (x_sign == y_sign) | |
2297 is_inexact_gt_midpoint = 1; | |
2298 else // if (x_sign != y_sign) | |
2299 is_inexact_lt_midpoint = 1; | |
2300 } | |
2301 } else { // if 22 <= ind <= 33 | |
2302 if (highf2star.w[1] > onehalf128[ind] | |
2303 || (highf2star.w[1] == onehalf128[ind] | |
2304 && (highf2star.w[0] || R256.w[1] | |
2305 || R256.w[0]))) { | |
2306 // f2* > 1/2 and the result may be exact | |
2307 // Calculate f2* - 1/2 | |
2308 // tmp64A = highf2star.w[0]; | |
2309 tmp64B = highf2star.w[1] - onehalf128[ind]; | |
2310 if (tmp64B || highf2star.w[0] | |
2311 || R256.w[1] > ten2mk128trunc[ind].w[1] | |
2312 || (R256.w[1] == ten2mk128trunc[ind].w[1] | |
2313 && R256.w[0] > ten2mk128trunc[ind].w[0])) { | |
2314 // set the inexact flag | |
2315 // *pfpsf |= INEXACT_EXCEPTION; | |
2316 tmp_inexact = 1; // may be set again during a second pass | |
2317 // this rounding is applied to C2 only! | |
2318 if (x_sign == y_sign) | |
2319 is_inexact_lt_midpoint = 1; | |
2320 else // if (x_sign != y_sign) | |
2321 is_inexact_gt_midpoint = 1; | |
2322 } // else the result is exact | |
2323 } else { // the result is inexact; f2* <= 1/2 | |
2324 // set the inexact flag | |
2325 // *pfpsf |= INEXACT_EXCEPTION; | |
2326 tmp_inexact = 1; // may be set again during a second pass | |
2327 // rounding up, unless a midpoint in [EVEN, ODD] | |
2328 // this rounding is applied to C2 only! | |
2329 if (x_sign == y_sign) | |
2330 is_inexact_gt_midpoint = 1; | |
2331 else // if (x_sign != y_sign) | |
2332 is_inexact_lt_midpoint = 1; | |
2333 } | |
2334 } | |
2335 // check for midpoints | |
2336 if ((R256.w[1] || R256.w[0]) && (highf2star.w[1] == 0) | |
2337 && (highf2star.w[0] == 0) | |
2338 && (R256.w[1] < ten2mk128trunc[ind].w[1] | |
2339 || (R256.w[1] == ten2mk128trunc[ind].w[1] | |
2340 && R256.w[0] <= ten2mk128trunc[ind].w[0]))) { | |
2341 // the result is a midpoint | |
2342 if ((tmp64 + R256.w[2]) & 0x01) { // MP in [EVEN, ODD] | |
2343 // if floor(C2*) is odd C = floor(C2*) - 1; the result may be 0 | |
2344 R256.w[2]--; | |
2345 if (R256.w[2] == 0xffffffffffffffffull) | |
2346 R256.w[3]--; | |
2347 // this rounding is applied to C2 only! | |
2348 if (x_sign == y_sign) | |
2349 is_midpoint_gt_even = 1; | |
2350 else // if (x_sign != y_sign) | |
2351 is_midpoint_lt_even = 1; | |
2352 is_inexact_lt_midpoint = 0; | |
2353 is_inexact_gt_midpoint = 0; | |
2354 } else { | |
2355 // else MP in [ODD, EVEN] | |
2356 // this rounding is applied to C2 only! | |
2357 if (x_sign == y_sign) | |
2358 is_midpoint_lt_even = 1; | |
2359 else // if (x_sign != y_sign) | |
2360 is_midpoint_gt_even = 1; | |
2361 is_inexact_lt_midpoint = 0; | |
2362 is_inexact_gt_midpoint = 0; | |
2363 } | |
2364 } | |
2365 // end if (ind >= 0) | |
2366 } else { // if (ind == -1); only during a 2nd pass, and when x1 = 0 | |
2367 R256.w[2] = C2_lo; | |
2368 R256.w[3] = C2_hi; | |
2369 tmp_inexact = 0; | |
2370 // to correct a possible setting to 1 from 1st pass | |
2371 if (second_pass) { | |
2372 is_midpoint_lt_even = 0; | |
2373 is_midpoint_gt_even = 0; | |
2374 is_inexact_lt_midpoint = 0; | |
2375 is_inexact_gt_midpoint = 0; | |
2376 } | |
2377 } | |
2378 // and now add/subtract C1 * 10^(e1-e2-x1) +/- (C2 * 10^(-x1))rnd,P34 | |
2379 if (x_sign == y_sign) { // addition; could overflow | |
2380 // no second pass is possible this way (only for x_sign != y_sign) | |
2381 C1.w[0] = C1.w[0] + R256.w[2]; | |
2382 C1.w[1] = C1.w[1] + R256.w[3]; | |
2383 if (C1.w[0] < tmp64) | |
2384 C1.w[1]++; // carry | |
2385 // if the sum has P34+1 digits, i.e. C1>=10^34 redo the calculation | |
2386 // with x1=x1+1 | |
2387 if (C1.w[1] > 0x0001ed09bead87c0ull || (C1.w[1] == 0x0001ed09bead87c0ull && C1.w[0] >= 0x378d8e6400000000ull)) { // C1 >= 10^34 | |
2388 // chop off one more digit from the sum, but make sure there is | |
2389 // no double-rounding error (see table - double rounding logic) | |
2390 // now round C1 from P34+1 to P34 decimal digits | |
2391 // C1' = C1 + 1/2 * 10 = C1 + 5 | |
2392 if (C1.w[0] >= 0xfffffffffffffffbull) { // low half add has carry | |
2393 C1.w[0] = C1.w[0] + 5; | |
2394 C1.w[1] = C1.w[1] + 1; | |
2395 } else { | |
2396 C1.w[0] = C1.w[0] + 5; | |
2397 } | |
2398 // the approximation of 10^(-1) was rounded up to 118 bits | |
2399 __mul_128x128_to_256 (Q256, C1, ten2mk128[0]); // Q256 = C1*, f1* | |
2400 // C1* is actually floor(C1*) in this case | |
2401 // the top 128 bits of 10^(-1) are | |
2402 // T* = ten2mk128trunc[0]=0x19999999999999999999999999999999 | |
2403 // if (0 < f1* < 10^(-1)) then | |
2404 // if floor(C1*) is even then C1* = floor(C1*) - logical right | |
2405 // shift; C1* has p decimal digits, correct by Prop. 1) | |
2406 // else if floor(C1*) is odd C1* = floor(C1*) - 1 (logical right | |
2407 // shift; C1* has p decimal digits, correct by Pr. 1) | |
2408 // else | |
2409 // C1* = floor(C1*) (logical right shift; C has p decimal digits | |
2410 // correct by Property 1) | |
2411 // n = C1* * 10^(e2+x1+1) | |
2412 if ((Q256.w[1] || Q256.w[0]) | |
2413 && (Q256.w[1] < ten2mk128trunc[0].w[1] | |
2414 || (Q256.w[1] == ten2mk128trunc[0].w[1] | |
2415 && Q256.w[0] <= ten2mk128trunc[0].w[0]))) { | |
2416 // the result is a midpoint | |
2417 if (is_inexact_lt_midpoint) { // for the 1st rounding | |
2418 is_inexact_gt_midpoint = 1; | |
2419 is_inexact_lt_midpoint = 0; | |
2420 is_midpoint_gt_even = 0; | |
2421 is_midpoint_lt_even = 0; | |
2422 } else if (is_inexact_gt_midpoint) { // for the 1st rounding | |
2423 Q256.w[2]--; | |
2424 if (Q256.w[2] == 0xffffffffffffffffull) | |
2425 Q256.w[3]--; | |
2426 is_inexact_gt_midpoint = 0; | |
2427 is_inexact_lt_midpoint = 1; | |
2428 is_midpoint_gt_even = 0; | |
2429 is_midpoint_lt_even = 0; | |
2430 } else if (is_midpoint_gt_even) { // for the 1st rounding | |
2431 // Note: cannot have is_midpoint_lt_even | |
2432 is_inexact_gt_midpoint = 0; | |
2433 is_inexact_lt_midpoint = 1; | |
2434 is_midpoint_gt_even = 0; | |
2435 is_midpoint_lt_even = 0; | |
2436 } else { // the first rounding must have been exact | |
2437 if (Q256.w[2] & 0x01) { // MP in [EVEN, ODD] | |
2438 // the truncated result is correct | |
2439 Q256.w[2]--; | |
2440 if (Q256.w[2] == 0xffffffffffffffffull) | |
2441 Q256.w[3]--; | |
2442 is_inexact_gt_midpoint = 0; | |
2443 is_inexact_lt_midpoint = 0; | |
2444 is_midpoint_gt_even = 1; | |
2445 is_midpoint_lt_even = 0; | |
2446 } else { // MP in [ODD, EVEN] | |
2447 is_inexact_gt_midpoint = 0; | |
2448 is_inexact_lt_midpoint = 0; | |
2449 is_midpoint_gt_even = 0; | |
2450 is_midpoint_lt_even = 1; | |
2451 } | |
2452 } | |
2453 tmp_inexact = 1; // in all cases | |
2454 } else { // the result is not a midpoint | |
2455 // determine inexactness of the rounding of C1 (the sum C1+C2*) | |
2456 // if (0 < f1* - 1/2 < 10^(-1)) then | |
2457 // the result is exact | |
2458 // else (if f1* - 1/2 > T* then) | |
2459 // the result of is inexact | |
2460 // ind = 0 | |
2461 if (Q256.w[1] > 0x8000000000000000ull | |
2462 || (Q256.w[1] == 0x8000000000000000ull | |
2463 && Q256.w[0] > 0x0ull)) { | |
2464 // f1* > 1/2 and the result may be exact | |
2465 Q256.w[1] = Q256.w[1] - 0x8000000000000000ull; // f1* - 1/2 | |
2466 if ((Q256.w[1] > ten2mk128trunc[0].w[1] | |
2467 || (Q256.w[1] == ten2mk128trunc[0].w[1] | |
2468 && Q256.w[0] > ten2mk128trunc[0].w[0]))) { | |
2469 is_inexact_gt_midpoint = 0; | |
2470 is_inexact_lt_midpoint = 1; | |
2471 is_midpoint_gt_even = 0; | |
2472 is_midpoint_lt_even = 0; | |
2473 // set the inexact flag | |
2474 tmp_inexact = 1; | |
2475 // *pfpsf |= INEXACT_EXCEPTION; | |
2476 } else { // else the result is exact for the 2nd rounding | |
2477 if (tmp_inexact) { // if the previous rounding was inexact | |
2478 if (is_midpoint_lt_even) { | |
2479 is_inexact_gt_midpoint = 1; | |
2480 is_midpoint_lt_even = 0; | |
2481 } else if (is_midpoint_gt_even) { | |
2482 is_inexact_lt_midpoint = 1; | |
2483 is_midpoint_gt_even = 0; | |
2484 } else { | |
2485 ; // no change | |
2486 } | |
2487 } | |
2488 } | |
2489 // rounding down, unless a midpoint in [ODD, EVEN] | |
2490 } else { // the result is inexact; f1* <= 1/2 | |
2491 is_inexact_gt_midpoint = 1; | |
2492 is_inexact_lt_midpoint = 0; | |
2493 is_midpoint_gt_even = 0; | |
2494 is_midpoint_lt_even = 0; | |
2495 // set the inexact flag | |
2496 tmp_inexact = 1; | |
2497 // *pfpsf |= INEXACT_EXCEPTION; | |
2498 } | |
2499 } // end 'the result is not a midpoint' | |
2500 // n = C1 * 10^(e2+x1) | |
2501 C1.w[1] = Q256.w[3]; | |
2502 C1.w[0] = Q256.w[2]; | |
2503 y_exp = y_exp + ((UINT64) (x1 + 1) << 49); | |
2504 } else { // C1 < 10^34 | |
2505 // C1.w[1] and C1.w[0] already set | |
2506 // n = C1 * 10^(e2+x1) | |
2507 y_exp = y_exp + ((UINT64) x1 << 49); | |
2508 } | |
2509 // check for overflow | |
2510 if (y_exp == EXP_MAX_P1 | |
2511 && (rnd_mode == ROUNDING_TO_NEAREST | |
2512 || rnd_mode == ROUNDING_TIES_AWAY)) { | |
2513 res.w[1] = 0x7800000000000000ull | x_sign; // +/-inf | |
2514 res.w[0] = 0x0ull; | |
2515 // set the inexact flag | |
2516 *pfpsf |= INEXACT_EXCEPTION; | |
2517 // set the overflow flag | |
2518 *pfpsf |= OVERFLOW_EXCEPTION; | |
2519 BID_SWAP128 (res); | |
2520 BID_RETURN (res); | |
2521 } // else no overflow | |
2522 } else { // if x_sign != y_sign the result of this subtract. is exact | |
2523 C1.w[0] = C1.w[0] - R256.w[2]; | |
2524 C1.w[1] = C1.w[1] - R256.w[3]; | |
2525 if (C1.w[0] > tmp64) | |
2526 C1.w[1]--; // borrow | |
2527 if (C1.w[1] >= 0x8000000000000000ull) { // negative coefficient! | |
2528 C1.w[0] = ~C1.w[0]; | |
2529 C1.w[0]++; | |
2530 C1.w[1] = ~C1.w[1]; | |
2531 if (C1.w[0] == 0x0) | |
2532 C1.w[1]++; | |
2533 tmp_sign = y_sign; | |
2534 // the result will have the sign of y if last rnd | |
2535 } else { | |
2536 tmp_sign = x_sign; | |
2537 } | |
2538 // if the difference has P34-1 digits or less, i.e. C1 < 10^33 then | |
2539 // redo the calculation with x1=x1-1; | |
2540 // redo the calculation also if C1 = 10^33 and | |
2541 // (is_inexact_gt_midpoint or is_midpoint_lt_even); | |
2542 // (the last part should have really been | |
2543 // (is_inexact_lt_midpoint or is_midpoint_gt_even) from | |
2544 // the rounding of C2, but the position flags have been reversed) | |
2545 // 10^33 = 0x0000314dc6448d93 0x38c15b0a00000000 | |
2546 if ((C1.w[1] < 0x0000314dc6448d93ull || (C1.w[1] == 0x0000314dc6448d93ull && C1.w[0] < 0x38c15b0a00000000ull)) || (C1.w[1] == 0x0000314dc6448d93ull && C1.w[0] == 0x38c15b0a00000000ull && (is_inexact_gt_midpoint || is_midpoint_lt_even))) { // C1=10^33 | |
2547 x1 = x1 - 1; // x1 >= 0 | |
2548 if (x1 >= 0) { | |
2549 // clear position flags and tmp_inexact | |
2550 is_midpoint_lt_even = 0; | |
2551 is_midpoint_gt_even = 0; | |
2552 is_inexact_lt_midpoint = 0; | |
2553 is_inexact_gt_midpoint = 0; | |
2554 tmp_inexact = 0; | |
2555 second_pass = 1; | |
2556 goto roundC2; // else result has less than P34 digits | |
2557 } | |
2558 } | |
2559 // if the coefficient of the result is 10^34 it means that this | |
2560 // must be the second pass, and we are done | |
2561 if (C1.w[1] == 0x0001ed09bead87c0ull && C1.w[0] == 0x378d8e6400000000ull) { // if C1 = 10^34 | |
2562 C1.w[1] = 0x0000314dc6448d93ull; // C1 = 10^33 | |
2563 C1.w[0] = 0x38c15b0a00000000ull; | |
2564 y_exp = y_exp + ((UINT64) 1 << 49); | |
2565 } | |
2566 x_sign = tmp_sign; | |
2567 if (x1 >= 1) | |
2568 y_exp = y_exp + ((UINT64) x1 << 49); | |
2569 // x1 = -1 is possible at the end of a second pass when the | |
2570 // first pass started with x1 = 1 | |
2571 } | |
2572 C1_hi = C1.w[1]; | |
2573 C1_lo = C1.w[0]; | |
2574 // general correction from RN to RA, RM, RP, RZ; result uses y_exp | |
2575 if (rnd_mode != ROUNDING_TO_NEAREST) { | |
2576 if ((!x_sign | |
2577 && ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint) | |
2578 || | |
2579 ((rnd_mode == ROUNDING_TIES_AWAY | |
2580 || rnd_mode == ROUNDING_UP) | |
2581 && is_midpoint_gt_even))) || (x_sign | |
2582 && | |
2583 ((rnd_mode == | |
2584 ROUNDING_DOWN | |
2585 && | |
2586 is_inexact_lt_midpoint) | |
2587 || | |
2588 ((rnd_mode == | |
2589 ROUNDING_TIES_AWAY | |
2590 || rnd_mode == | |
2591 ROUNDING_DOWN) | |
2592 && | |
2593 is_midpoint_gt_even)))) | |
2594 { | |
2595 // C1 = C1 + 1 | |
2596 C1_lo = C1_lo + 1; | |
2597 if (C1_lo == 0) { // rounding overflow in the low 64 bits | |
2598 C1_hi = C1_hi + 1; | |
2599 } | |
2600 if (C1_hi == 0x0001ed09bead87c0ull | |
2601 && C1_lo == 0x378d8e6400000000ull) { | |
2602 // C1 = 10^34 => rounding overflow | |
2603 C1_hi = 0x0000314dc6448d93ull; | |
2604 C1_lo = 0x38c15b0a00000000ull; // 10^33 | |
2605 y_exp = y_exp + EXP_P1; | |
2606 } | |
2607 } else if ((is_midpoint_lt_even || is_inexact_gt_midpoint) | |
2608 && | |
2609 ((x_sign | |
2610 && (rnd_mode == ROUNDING_UP | |
2611 || rnd_mode == ROUNDING_TO_ZERO)) | |
2612 || (!x_sign | |
2613 && (rnd_mode == ROUNDING_DOWN | |
2614 || rnd_mode == ROUNDING_TO_ZERO)))) { | |
2615 // C1 = C1 - 1 | |
2616 C1_lo = C1_lo - 1; | |
2617 if (C1_lo == 0xffffffffffffffffull) | |
2618 C1_hi--; | |
2619 // check if we crossed into the lower decade | |
2620 if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1 | |
2621 C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1 | |
2622 C1_lo = 0x378d8e63ffffffffull; | |
2623 y_exp = y_exp - EXP_P1; | |
2624 // no underflow, because delta + q2 >= P34 + 1 | |
2625 } | |
2626 } else { | |
2627 ; // exact, the result is already correct | |
2628 } | |
2629 // in all cases check for overflow (RN and RA solved already) | |
2630 if (y_exp == EXP_MAX_P1) { // overflow | |
2631 if ((rnd_mode == ROUNDING_DOWN && x_sign) || // RM and res < 0 | |
2632 (rnd_mode == ROUNDING_UP && !x_sign)) { // RP and res > 0 | |
2633 C1_hi = 0x7800000000000000ull; // +inf | |
2634 C1_lo = 0x0ull; | |
2635 } else { // RM and res > 0, RP and res < 0, or RZ | |
2636 C1_hi = 0x5fffed09bead87c0ull; | |
2637 C1_lo = 0x378d8e63ffffffffull; | |
2638 } | |
2639 y_exp = 0; // x_sign is preserved | |
2640 // set the inexact flag (in case the exact addition was exact) | |
2641 *pfpsf |= INEXACT_EXCEPTION; | |
2642 // set the overflow flag | |
2643 *pfpsf |= OVERFLOW_EXCEPTION; | |
2644 } | |
2645 } | |
2646 // assemble the result | |
2647 res.w[1] = x_sign | y_exp | C1_hi; | |
2648 res.w[0] = C1_lo; | |
2649 if (tmp_inexact) | |
2650 *pfpsf |= INEXACT_EXCEPTION; | |
2651 } | |
2652 } else { // if (-P34 + 1 <= delta <= -1) <=> 1 <= -delta <= P34 - 1 | |
2653 // NOTE: the following, up to "} else { // if x_sign != y_sign | |
2654 // the result is exact" is identical to "else if (delta == P34 - q2) {" | |
2655 // from above; also, the code is not symmetric: a+b and b+a may take | |
2656 // different paths (need to unify eventually!) | |
2657 // calculate C' = C2 + C1 * 10^(e1-e2) directly; the result may be | |
2658 // inexact if it requires P34 + 1 decimal digits; in either case the | |
2659 // 'cutoff' point for addition is at the position of the lsb of C2 | |
2660 // The coefficient of the result is C1 * 10^(e1-e2) + C2 and the | |
2661 // exponent is e2; either C1 or 10^(e1-e2) may not fit is 64 bits, | |
2662 // but their product fits with certainty in 128 bits (actually in 113) | |
2663 // Note that 0 <= e1 - e2 <= P34 - 2 | |
2664 // -P34 + 1 <= delta <= -1 <=> -P34 + 1 <= delta <= -1 <=> | |
2665 // -P34 + 1 <= q1 + e1 - q2 - e2 <= -1 <=> | |
2666 // q2 - q1 - P34 + 1 <= e1 - e2 <= q2 - q1 - 1 <=> | |
2667 // 1 - P34 - P34 + 1 <= e1-e2 <= P34 - 1 - 1 => 0 <= e1-e2 <= P34 - 2 | |
2668 scale = delta - q1 + q2; // scale = (int)(e1 >> 49) - (int)(e2 >> 49) | |
2669 if (scale >= 20) { // 10^(e1-e2) does not fit in 64 bits, but C1 does | |
2670 __mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]); | |
2671 } else if (scale >= 1) { | |
2672 // if 1 <= scale <= 19 then 10^(e1-e2) fits in 64 bits | |
2673 if (q1 <= 19) { // C1 fits in 64 bits | |
2674 __mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]); | |
2675 } else { // q1 >= 20 | |
2676 C1.w[1] = C1_hi; | |
2677 C1.w[0] = C1_lo; | |
2678 __mul_128x64_to_128 (C1, ten2k64[scale], C1); | |
2679 } | |
2680 } else { // if (scale == 0) C1 is unchanged | |
2681 C1.w[1] = C1_hi; | |
2682 C1.w[0] = C1_lo; // only the low part is necessary | |
2683 } | |
2684 C1_hi = C1.w[1]; | |
2685 C1_lo = C1.w[0]; | |
2686 // now add C2 | |
2687 if (x_sign == y_sign) { | |
2688 // the result can overflow! | |
2689 C1_lo = C1_lo + C2_lo; | |
2690 C1_hi = C1_hi + C2_hi; | |
2691 if (C1_lo < C1.w[0]) | |
2692 C1_hi++; | |
2693 // test for overflow, possible only when C1 >= 10^34 | |
2694 if (C1_hi > 0x0001ed09bead87c0ull || (C1_hi == 0x0001ed09bead87c0ull && C1_lo >= 0x378d8e6400000000ull)) { // C1 >= 10^34 | |
2695 // in this case q = P34 + 1 and x = q - P34 = 1, so multiply | |
2696 // C'' = C'+ 5 = C1 + 5 by k1 ~ 10^(-1) calculated for P34 + 1 | |
2697 // decimal digits | |
2698 // Calculate C'' = C' + 1/2 * 10^x | |
2699 if (C1_lo >= 0xfffffffffffffffbull) { // low half add has carry | |
2700 C1_lo = C1_lo + 5; | |
2701 C1_hi = C1_hi + 1; | |
2702 } else { | |
2703 C1_lo = C1_lo + 5; | |
2704 } | |
2705 // the approximation of 10^(-1) was rounded up to 118 bits | |
2706 // 10^(-1) =~ 33333333333333333333333333333400 * 2^-129 | |
2707 // 10^(-1) =~ 19999999999999999999999999999a00 * 2^-128 | |
2708 C1.w[1] = C1_hi; | |
2709 C1.w[0] = C1_lo; // C'' | |
2710 ten2m1.w[1] = 0x1999999999999999ull; | |
2711 ten2m1.w[0] = 0x9999999999999a00ull; | |
2712 __mul_128x128_to_256 (P256, C1, ten2m1); // P256 = C*, f* | |
2713 // C* is actually floor(C*) in this case | |
2714 // the top Ex = 128 bits of 10^(-1) are | |
2715 // T* = 0x00199999999999999999999999999999 | |
2716 // if (0 < f* < 10^(-x)) then | |
2717 // if floor(C*) is even then C = floor(C*) - logical right | |
2718 // shift; C has p decimal digits, correct by Prop. 1) | |
2719 // else if floor(C*) is odd C = floor(C*) - 1 (logical right | |
2720 // shift; C has p decimal digits, correct by Pr. 1) | |
2721 // else | |
2722 // C = floor(C*) (logical right shift; C has p decimal digits, | |
2723 // correct by Property 1) | |
2724 // n = C * 10^(e2+x) | |
2725 if ((P256.w[1] || P256.w[0]) | |
2726 && (P256.w[1] < 0x1999999999999999ull | |
2727 || (P256.w[1] == 0x1999999999999999ull | |
2728 && P256.w[0] <= 0x9999999999999999ull))) { | |
2729 // the result is a midpoint | |
2730 if (P256.w[2] & 0x01) { | |
2731 is_midpoint_gt_even = 1; | |
2732 // if floor(C*) is odd C = floor(C*) - 1; the result is not 0 | |
2733 P256.w[2]--; | |
2734 if (P256.w[2] == 0xffffffffffffffffull) | |
2735 P256.w[3]--; | |
2736 } else { | |
2737 is_midpoint_lt_even = 1; | |
2738 } | |
2739 } | |
2740 // n = Cstar * 10^(e2+1) | |
2741 y_exp = y_exp + EXP_P1; | |
2742 // C* != 10^P34 because C* has P34 digits | |
2743 // check for overflow | |
2744 if (y_exp == EXP_MAX_P1 | |
2745 && (rnd_mode == ROUNDING_TO_NEAREST | |
2746 || rnd_mode == ROUNDING_TIES_AWAY)) { | |
2747 // overflow for RN | |
2748 res.w[1] = x_sign | 0x7800000000000000ull; // +/-inf | |
2749 res.w[0] = 0x0ull; | |
2750 // set the inexact flag | |
2751 *pfpsf |= INEXACT_EXCEPTION; | |
2752 // set the overflow flag | |
2753 *pfpsf |= OVERFLOW_EXCEPTION; | |
2754 BID_SWAP128 (res); | |
2755 BID_RETURN (res); | |
2756 } | |
2757 // if (0 < f* - 1/2 < 10^(-x)) then | |
2758 // the result of the addition is exact | |
2759 // else | |
2760 // the result of the addition is inexact | |
2761 if (P256.w[1] > 0x8000000000000000ull || (P256.w[1] == 0x8000000000000000ull && P256.w[0] > 0x0ull)) { // the result may be exact | |
2762 tmp64 = P256.w[1] - 0x8000000000000000ull; // f* - 1/2 | |
2763 if ((tmp64 > 0x1999999999999999ull | |
2764 || (tmp64 == 0x1999999999999999ull | |
2765 && P256.w[0] >= 0x9999999999999999ull))) { | |
2766 // set the inexact flag | |
2767 *pfpsf |= INEXACT_EXCEPTION; | |
2768 is_inexact = 1; | |
2769 } // else the result is exact | |
2770 } else { // the result is inexact | |
2771 // set the inexact flag | |
2772 *pfpsf |= INEXACT_EXCEPTION; | |
2773 is_inexact = 1; | |
2774 } | |
2775 C1_hi = P256.w[3]; | |
2776 C1_lo = P256.w[2]; | |
2777 if (!is_midpoint_gt_even && !is_midpoint_lt_even) { | |
2778 is_inexact_lt_midpoint = is_inexact | |
2779 && (P256.w[1] & 0x8000000000000000ull); | |
2780 is_inexact_gt_midpoint = is_inexact | |
2781 && !(P256.w[1] & 0x8000000000000000ull); | |
2782 } | |
2783 // general correction from RN to RA, RM, RP, RZ; result uses y_exp | |
2784 if (rnd_mode != ROUNDING_TO_NEAREST) { | |
2785 if ((!x_sign | |
2786 && ((rnd_mode == ROUNDING_UP | |
2787 && is_inexact_lt_midpoint) | |
2788 || ((rnd_mode == ROUNDING_TIES_AWAY | |
2789 || rnd_mode == ROUNDING_UP) | |
2790 && is_midpoint_gt_even))) || (x_sign | |
2791 && | |
2792 ((rnd_mode == | |
2793 ROUNDING_DOWN | |
2794 && | |
2795 is_inexact_lt_midpoint) | |
2796 || | |
2797 ((rnd_mode == | |
2798 ROUNDING_TIES_AWAY | |
2799 || rnd_mode | |
2800 == | |
2801 ROUNDING_DOWN) | |
2802 && | |
2803 is_midpoint_gt_even)))) | |
2804 { | |
2805 // C1 = C1 + 1 | |
2806 C1_lo = C1_lo + 1; | |
2807 if (C1_lo == 0) { // rounding overflow in the low 64 bits | |
2808 C1_hi = C1_hi + 1; | |
2809 } | |
2810 if (C1_hi == 0x0001ed09bead87c0ull | |
2811 && C1_lo == 0x378d8e6400000000ull) { | |
2812 // C1 = 10^34 => rounding overflow | |
2813 C1_hi = 0x0000314dc6448d93ull; | |
2814 C1_lo = 0x38c15b0a00000000ull; // 10^33 | |
2815 y_exp = y_exp + EXP_P1; | |
2816 } | |
2817 } else | |
2818 if ((is_midpoint_lt_even || is_inexact_gt_midpoint) && | |
2819 ((x_sign && (rnd_mode == ROUNDING_UP || | |
2820 rnd_mode == ROUNDING_TO_ZERO)) || | |
2821 (!x_sign && (rnd_mode == ROUNDING_DOWN || | |
2822 rnd_mode == ROUNDING_TO_ZERO)))) { | |
2823 // C1 = C1 - 1 | |
2824 C1_lo = C1_lo - 1; | |
2825 if (C1_lo == 0xffffffffffffffffull) | |
2826 C1_hi--; | |
2827 // check if we crossed into the lower decade | |
2828 if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1 | |
2829 C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1 | |
2830 C1_lo = 0x378d8e63ffffffffull; | |
2831 y_exp = y_exp - EXP_P1; | |
2832 // no underflow, because delta + q2 >= P34 + 1 | |
2833 } | |
2834 } else { | |
2835 ; // exact, the result is already correct | |
2836 } | |
2837 // in all cases check for overflow (RN and RA solved already) | |
2838 if (y_exp == EXP_MAX_P1) { // overflow | |
2839 if ((rnd_mode == ROUNDING_DOWN && x_sign) || // RM and res < 0 | |
2840 (rnd_mode == ROUNDING_UP && !x_sign)) { // RP and res > 0 | |
2841 C1_hi = 0x7800000000000000ull; // +inf | |
2842 C1_lo = 0x0ull; | |
2843 } else { // RM and res > 0, RP and res < 0, or RZ | |
2844 C1_hi = 0x5fffed09bead87c0ull; | |
2845 C1_lo = 0x378d8e63ffffffffull; | |
2846 } | |
2847 y_exp = 0; // x_sign is preserved | |
2848 // set the inexact flag (in case the exact addition was exact) | |
2849 *pfpsf |= INEXACT_EXCEPTION; | |
2850 // set the overflow flag | |
2851 *pfpsf |= OVERFLOW_EXCEPTION; | |
2852 } | |
2853 } | |
2854 } // else if (C1 < 10^34) then C1 is the coeff.; the result is exact | |
2855 // assemble the result | |
2856 res.w[1] = x_sign | y_exp | C1_hi; | |
2857 res.w[0] = C1_lo; | |
2858 } else { // if x_sign != y_sign the result is exact | |
2859 C1_lo = C2_lo - C1_lo; | |
2860 C1_hi = C2_hi - C1_hi; | |
2861 if (C1_lo > C2_lo) | |
2862 C1_hi--; | |
2863 if (C1_hi >= 0x8000000000000000ull) { // negative coefficient! | |
2864 C1_lo = ~C1_lo; | |
2865 C1_lo++; | |
2866 C1_hi = ~C1_hi; | |
2867 if (C1_lo == 0x0) | |
2868 C1_hi++; | |
2869 x_sign = y_sign; // the result will have the sign of y | |
2870 } | |
2871 // the result can be zero, but it cannot overflow | |
2872 if (C1_lo == 0 && C1_hi == 0) { | |
2873 // assemble the result | |
2874 if (x_exp < y_exp) | |
2875 res.w[1] = x_exp; | |
2876 else | |
2877 res.w[1] = y_exp; | |
2878 res.w[0] = 0; | |
2879 if (rnd_mode == ROUNDING_DOWN) { | |
2880 res.w[1] |= 0x8000000000000000ull; | |
2881 } | |
2882 BID_SWAP128 (res); | |
2883 BID_RETURN (res); | |
2884 } | |
2885 // assemble the result | |
2886 res.w[1] = y_sign | y_exp | C1_hi; | |
2887 res.w[0] = C1_lo; | |
2888 } | |
2889 } | |
2890 } | |
2891 BID_SWAP128 (res); | |
2892 BID_RETURN (res) | |
2893 } | |
2894 } | |
2895 | |
2896 | |
2897 | |
2898 // bid128_sub stands for bid128qq_sub | |
2899 | |
2900 /***************************************************************************** | |
2901 * BID128 sub | |
2902 ****************************************************************************/ | |
2903 | |
2904 #if DECIMAL_CALL_BY_REFERENCE | |
2905 void | |
2906 bid128_sub (UINT128 * pres, UINT128 * px, UINT128 * py | |
2907 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
2908 _EXC_INFO_PARAM) { | |
2909 UINT128 x = *px, y = *py; | |
2910 #if !DECIMAL_GLOBAL_ROUNDING | |
2911 unsigned int rnd_mode = *prnd_mode; | |
2912 #endif | |
2913 #else | |
2914 UINT128 | |
2915 bid128_sub (UINT128 x, UINT128 y | |
2916 _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
2917 _EXC_INFO_PARAM) { | |
2918 #endif | |
2919 | |
2920 UINT128 res; | |
2921 UINT64 y_sign; | |
2922 | |
2923 if ((y.w[HIGH_128W] & MASK_NAN) != MASK_NAN) { // y is not NAN | |
2924 // change its sign | |
2925 y_sign = y.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative | |
2926 if (y_sign) | |
2927 y.w[HIGH_128W] = y.w[HIGH_128W] & 0x7fffffffffffffffull; | |
2928 else | |
2929 y.w[HIGH_128W] = y.w[HIGH_128W] | 0x8000000000000000ull; | |
2930 } | |
2931 #if DECIMAL_CALL_BY_REFERENCE | |
2932 bid128_add (&res, &x, &y | |
2933 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
2934 _EXC_INFO_ARG); | |
2935 #else | |
2936 res = bid128_add (x, y | |
2937 _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
2938 _EXC_INFO_ARG); | |
2939 #endif | |
2940 BID_RETURN (res); | |
2941 } |