comparison libgcc/config/libbid/bid128_sqrt.c @ 0:a06113de4d67

first commit
author kent <kent@cr.ie.u-ryukyu.ac.jp>
date Fri, 17 Jul 2009 14:47:48 +0900
parents
children 04ced10e8804
comparison
equal deleted inserted replaced
-1:000000000000 0:a06113de4d67
1 /* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
9
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #define BID_128RES
25 #include "bid_internal.h"
26 #include "bid_sqrt_macros.h"
27 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
28 #include <fenv.h>
29
30 #define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT
31 #endif
32
33 BID128_FUNCTION_ARG1 (bid128_sqrt, x)
34
35 UINT256 M256, C256, C4, C8;
36 UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res;
37 UINT64 sign_x, Carry;
38 SINT64 D;
39 int_float fx, f64;
40 int exponent_x, bin_expon_cx;
41 int digits, scale, exponent_q;
42 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
43 fexcept_t binaryflags = 0;
44 #endif
45
46 // unpack arguments, check for NaN or Infinity
47 if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
48 res.w[1] = CX.w[1];
49 res.w[0] = CX.w[0];
50 // NaN ?
51 if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
52 #ifdef SET_STATUS_FLAGS
53 if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN
54 __set_status_flags (pfpsf, INVALID_EXCEPTION);
55 #endif
56 res.w[1] = CX.w[1] & QUIET_MASK64;
57 BID_RETURN (res);
58 }
59 // x is Infinity?
60 if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
61 res.w[1] = CX.w[1];
62 if (sign_x) {
63 // -Inf, return NaN
64 res.w[1] = 0x7c00000000000000ull;
65 #ifdef SET_STATUS_FLAGS
66 __set_status_flags (pfpsf, INVALID_EXCEPTION);
67 #endif
68 }
69 BID_RETURN (res);
70 }
71 // x is 0 otherwise
72
73 res.w[1] =
74 sign_x |
75 ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1) << 49);
76 res.w[0] = 0;
77 BID_RETURN (res);
78 }
79 if (sign_x) {
80 res.w[1] = 0x7c00000000000000ull;
81 res.w[0] = 0;
82 #ifdef SET_STATUS_FLAGS
83 __set_status_flags (pfpsf, INVALID_EXCEPTION);
84 #endif
85 BID_RETURN (res);
86 }
87 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
88 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
89 #endif
90 // 2^64
91 f64.i = 0x5f800000;
92
93 // fx ~ CX
94 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
95 bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f;
96 digits = estimate_decimal_digits[bin_expon_cx];
97
98 A10 = CX;
99 if (exponent_x & 1) {
100 A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61);
101 A10.w[0] = CX.w[0] << 3;
102 CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63);
103 CX2.w[0] = CX.w[0] << 1;
104 __add_128_128 (A10, A10, CX2);
105 }
106
107 CS.w[0] = short_sqrt128 (A10);
108 CS.w[1] = 0;
109 // check for exact result
110 if (CS.w[0] * CS.w[0] == A10.w[0]) {
111 __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]);
112 if (S2.w[1] == A10.w[1]) // && S2.w[0]==A10.w[0])
113 {
114 get_BID128_very_fast (&res, 0,
115 (exponent_x +
116 DECIMAL_EXPONENT_BIAS_128) >> 1, CS);
117 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
118 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
119 #endif
120 BID_RETURN (res);
121 }
122 }
123 // get number of digits in CX
124 D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1];
125 if (D > 0
126 || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0]))
127 digits++;
128
129 // if exponent is odd, scale coefficient by 10
130 scale = 67 - digits;
131 exponent_q = exponent_x - scale;
132 scale += (exponent_q & 1); // exp. bias is even
133
134 if (scale > 38) {
135 T128 = power10_table_128[scale - 37];
136 __mul_128x128_low (CX1, CX, T128);
137
138 TP128 = power10_table_128[37];
139 __mul_128x128_to_256 (C256, CX1, TP128);
140 } else {
141 T128 = power10_table_128[scale];
142 __mul_128x128_to_256 (C256, CX, T128);
143 }
144
145
146 // 4*C256
147 C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62);
148 C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62);
149 C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62);
150 C4.w[0] = C256.w[0] << 2;
151
152 long_sqrt128 (&CS, C256);
153
154 #ifndef IEEE_ROUND_NEAREST
155 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
156 if (!((rnd_mode) & 3)) {
157 #endif
158 #endif
159 // compare to midpoints
160 CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
161 CSM.w[0] = (CS.w[0] + CS.w[0]) | 1;
162 // CSM^2
163 //__mul_128x128_to_256(M256, CSM, CSM);
164 __sqr128_to_256 (M256, CSM);
165
166 if (C4.w[3] > M256.w[3]
167 || (C4.w[3] == M256.w[3]
168 && (C4.w[2] > M256.w[2]
169 || (C4.w[2] == M256.w[2]
170 && (C4.w[1] > M256.w[1]
171 || (C4.w[1] == M256.w[1]
172 && C4.w[0] > M256.w[0])))))) {
173 // round up
174 CS.w[0]++;
175 if (!CS.w[0])
176 CS.w[1]++;
177 } else {
178 C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61);
179 C8.w[0] = CS.w[0] << 3;
180 // M256 - 8*CSM
181 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
182 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
183 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
184 M256.w[3] = M256.w[3] - Carry;
185
186 // if CSM' > C256, round up
187 if (M256.w[3] > C4.w[3]
188 || (M256.w[3] == C4.w[3]
189 && (M256.w[2] > C4.w[2]
190 || (M256.w[2] == C4.w[2]
191 && (M256.w[1] > C4.w[1]
192 || (M256.w[1] == C4.w[1]
193 && M256.w[0] > C4.w[0])))))) {
194 // round down
195 if (!CS.w[0])
196 CS.w[1]--;
197 CS.w[0]--;
198 }
199 }
200 #ifndef IEEE_ROUND_NEAREST
201 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
202 } else {
203 __sqr128_to_256 (M256, CS);
204 C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
205 C8.w[0] = CS.w[0] << 1;
206 if (M256.w[3] > C256.w[3]
207 || (M256.w[3] == C256.w[3]
208 && (M256.w[2] > C256.w[2]
209 || (M256.w[2] == C256.w[2]
210 && (M256.w[1] > C256.w[1]
211 || (M256.w[1] == C256.w[1]
212 && M256.w[0] > C256.w[0])))))) {
213 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
214 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
215 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
216 M256.w[3] = M256.w[3] - Carry;
217 M256.w[0]++;
218 if (!M256.w[0]) {
219 M256.w[1]++;
220 if (!M256.w[1]) {
221 M256.w[2]++;
222 if (!M256.w[2])
223 M256.w[3]++;
224 }
225 }
226
227 if (!CS.w[0])
228 CS.w[1]--;
229 CS.w[0]--;
230
231 if (M256.w[3] > C256.w[3]
232 || (M256.w[3] == C256.w[3]
233 && (M256.w[2] > C256.w[2]
234 || (M256.w[2] == C256.w[2]
235 && (M256.w[1] > C256.w[1]
236 || (M256.w[1] == C256.w[1]
237 && M256.w[0] > C256.w[0])))))) {
238
239 if (!CS.w[0])
240 CS.w[1]--;
241 CS.w[0]--;
242 }
243 }
244
245 else {
246 __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
247 __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
248 __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
249 M256.w[3] = M256.w[3] + Carry;
250 M256.w[0]++;
251 if (!M256.w[0]) {
252 M256.w[1]++;
253 if (!M256.w[1]) {
254 M256.w[2]++;
255 if (!M256.w[2])
256 M256.w[3]++;
257 }
258 }
259 if (M256.w[3] < C256.w[3]
260 || (M256.w[3] == C256.w[3]
261 && (M256.w[2] < C256.w[2]
262 || (M256.w[2] == C256.w[2]
263 && (M256.w[1] < C256.w[1]
264 || (M256.w[1] == C256.w[1]
265 && M256.w[0] <= C256.w[0])))))) {
266
267 CS.w[0]++;
268 if (!CS.w[0])
269 CS.w[1]++;
270 }
271 }
272 // RU?
273 if ((rnd_mode) == ROUNDING_UP) {
274 CS.w[0]++;
275 if (!CS.w[0])
276 CS.w[1]++;
277 }
278
279 }
280 #endif
281 #endif
282
283 #ifdef SET_STATUS_FLAGS
284 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
285 #endif
286 get_BID128_fast (&res, 0,
287 (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1, CS);
288 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
289 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
290 #endif
291 BID_RETURN (res);
292 }
293
294
295
296 BID128_FUNCTION_ARGTYPE1 (bid128d_sqrt, UINT64, x)
297
298 UINT256 M256, C256, C4, C8;
299 UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res;
300 UINT64 sign_x, Carry;
301 SINT64 D;
302 int_float fx, f64;
303 int exponent_x, bin_expon_cx;
304 int digits, scale, exponent_q;
305 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
306 fexcept_t binaryflags = 0;
307 #endif
308
309 // unpack arguments, check for NaN or Infinity
310 // unpack arguments, check for NaN or Infinity
311 CX.w[1] = 0;
312 if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], x)) {
313 res.w[1] = CX.w[0];
314 res.w[0] = 0;
315 // NaN ?
316 if ((x & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
317 #ifdef SET_STATUS_FLAGS
318 if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN
319 __set_status_flags (pfpsf, INVALID_EXCEPTION);
320 #endif
321 res.w[0] = (CX.w[0] & 0x0003ffffffffffffull);
322 __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]);
323 res.w[1] |= ((CX.w[0]) & 0xfc00000000000000ull);
324 BID_RETURN (res);
325 }
326 // x is Infinity?
327 if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) {
328 if (sign_x) {
329 // -Inf, return NaN
330 res.w[1] = 0x7c00000000000000ull;
331 #ifdef SET_STATUS_FLAGS
332 __set_status_flags (pfpsf, INVALID_EXCEPTION);
333 #endif
334 }
335 BID_RETURN (res);
336 }
337 // x is 0 otherwise
338
339 exponent_x =
340 exponent_x - DECIMAL_EXPONENT_BIAS + DECIMAL_EXPONENT_BIAS_128;
341 res.w[1] =
342 sign_x | ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1)
343 << 49);
344 res.w[0] = 0;
345 BID_RETURN (res);
346 }
347 if (sign_x) {
348 res.w[1] = 0x7c00000000000000ull;
349 res.w[0] = 0;
350 #ifdef SET_STATUS_FLAGS
351 __set_status_flags (pfpsf, INVALID_EXCEPTION);
352 #endif
353 BID_RETURN (res);
354 }
355 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
356 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
357 #endif
358 exponent_x =
359 exponent_x - DECIMAL_EXPONENT_BIAS + DECIMAL_EXPONENT_BIAS_128;
360
361 // 2^64
362 f64.i = 0x5f800000;
363
364 // fx ~ CX
365 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
366 bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f;
367 digits = estimate_decimal_digits[bin_expon_cx];
368
369 A10 = CX;
370 if (exponent_x & 1) {
371 A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61);
372 A10.w[0] = CX.w[0] << 3;
373 CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63);
374 CX2.w[0] = CX.w[0] << 1;
375 __add_128_128 (A10, A10, CX2);
376 }
377
378 CS.w[0] = short_sqrt128 (A10);
379 CS.w[1] = 0;
380 // check for exact result
381 if (CS.w[0] * CS.w[0] == A10.w[0]) {
382 __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]);
383 if (S2.w[1] == A10.w[1]) {
384 get_BID128_very_fast (&res, 0,
385 (exponent_x + DECIMAL_EXPONENT_BIAS_128) >> 1,
386 CS);
387 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
388 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
389 #endif
390 BID_RETURN (res);
391 }
392 }
393 // get number of digits in CX
394 D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1];
395 if (D > 0
396 || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0]))
397 digits++;
398
399 // if exponent is odd, scale coefficient by 10
400 scale = 67 - digits;
401 exponent_q = exponent_x - scale;
402 scale += (exponent_q & 1); // exp. bias is even
403
404 if (scale > 38) {
405 T128 = power10_table_128[scale - 37];
406 __mul_128x128_low (CX1, CX, T128);
407
408 TP128 = power10_table_128[37];
409 __mul_128x128_to_256 (C256, CX1, TP128);
410 } else {
411 T128 = power10_table_128[scale];
412 __mul_128x128_to_256 (C256, CX, T128);
413 }
414
415
416 // 4*C256
417 C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62);
418 C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62);
419 C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62);
420 C4.w[0] = C256.w[0] << 2;
421
422 long_sqrt128 (&CS, C256);
423
424 #ifndef IEEE_ROUND_NEAREST
425 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
426 if (!((rnd_mode) & 3)) {
427 #endif
428 #endif
429 // compare to midpoints
430 CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
431 CSM.w[0] = (CS.w[0] + CS.w[0]) | 1;
432 // CSM^2
433 //__mul_128x128_to_256(M256, CSM, CSM);
434 __sqr128_to_256 (M256, CSM);
435
436 if (C4.w[3] > M256.w[3]
437 || (C4.w[3] == M256.w[3]
438 && (C4.w[2] > M256.w[2]
439 || (C4.w[2] == M256.w[2]
440 && (C4.w[1] > M256.w[1]
441 || (C4.w[1] == M256.w[1]
442 && C4.w[0] > M256.w[0])))))) {
443 // round up
444 CS.w[0]++;
445 if (!CS.w[0])
446 CS.w[1]++;
447 } else {
448 C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61);
449 C8.w[0] = CS.w[0] << 3;
450 // M256 - 8*CSM
451 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
452 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
453 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
454 M256.w[3] = M256.w[3] - Carry;
455
456 // if CSM' > C256, round up
457 if (M256.w[3] > C4.w[3]
458 || (M256.w[3] == C4.w[3]
459 && (M256.w[2] > C4.w[2]
460 || (M256.w[2] == C4.w[2]
461 && (M256.w[1] > C4.w[1]
462 || (M256.w[1] == C4.w[1]
463 && M256.w[0] > C4.w[0])))))) {
464 // round down
465 if (!CS.w[0])
466 CS.w[1]--;
467 CS.w[0]--;
468 }
469 }
470 #ifndef IEEE_ROUND_NEAREST
471 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
472 } else {
473 __sqr128_to_256 (M256, CS);
474 C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
475 C8.w[0] = CS.w[0] << 1;
476 if (M256.w[3] > C256.w[3]
477 || (M256.w[3] == C256.w[3]
478 && (M256.w[2] > C256.w[2]
479 || (M256.w[2] == C256.w[2]
480 && (M256.w[1] > C256.w[1]
481 || (M256.w[1] == C256.w[1]
482 && M256.w[0] > C256.w[0])))))) {
483 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
484 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
485 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
486 M256.w[3] = M256.w[3] - Carry;
487 M256.w[0]++;
488 if (!M256.w[0]) {
489 M256.w[1]++;
490 if (!M256.w[1]) {
491 M256.w[2]++;
492 if (!M256.w[2])
493 M256.w[3]++;
494 }
495 }
496
497 if (!CS.w[0])
498 CS.w[1]--;
499 CS.w[0]--;
500
501 if (M256.w[3] > C256.w[3]
502 || (M256.w[3] == C256.w[3]
503 && (M256.w[2] > C256.w[2]
504 || (M256.w[2] == C256.w[2]
505 && (M256.w[1] > C256.w[1]
506 || (M256.w[1] == C256.w[1]
507 && M256.w[0] > C256.w[0])))))) {
508
509 if (!CS.w[0])
510 CS.w[1]--;
511 CS.w[0]--;
512 }
513 }
514
515 else {
516 __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
517 __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry);
518 __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry);
519 M256.w[3] = M256.w[3] + Carry;
520 M256.w[0]++;
521 if (!M256.w[0]) {
522 M256.w[1]++;
523 if (!M256.w[1]) {
524 M256.w[2]++;
525 if (!M256.w[2])
526 M256.w[3]++;
527 }
528 }
529 if (M256.w[3] < C256.w[3]
530 || (M256.w[3] == C256.w[3]
531 && (M256.w[2] < C256.w[2]
532 || (M256.w[2] == C256.w[2]
533 && (M256.w[1] < C256.w[1]
534 || (M256.w[1] == C256.w[1]
535 && M256.w[0] <= C256.w[0])))))) {
536
537 CS.w[0]++;
538 if (!CS.w[0])
539 CS.w[1]++;
540 }
541 }
542 // RU?
543 if ((rnd_mode) == ROUNDING_UP) {
544 CS.w[0]++;
545 if (!CS.w[0])
546 CS.w[1]++;
547 }
548
549 }
550 #endif
551 #endif
552
553 #ifdef SET_STATUS_FLAGS
554 __set_status_flags (pfpsf, INEXACT_EXCEPTION);
555 #endif
556 get_BID128_fast (&res, 0, (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1,
557 CS);
558 #ifdef UNCHANGED_BINARY_STATUS_FLAGS
559 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
560 #endif
561 BID_RETURN (res);
562
563
564 }