Mercurial > hg > CbC > CbC_gcc
comparison libgcc/config/libbid/bid64_fma.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 04ced10e8804 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Copyright (C) 2007, 2009 Free Software Foundation, Inc. | |
2 | |
3 This file is part of GCC. | |
4 | |
5 GCC is free software; you can redistribute it and/or modify it under | |
6 the terms of the GNU General Public License as published by the Free | |
7 Software Foundation; either version 3, or (at your option) any later | |
8 version. | |
9 | |
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 for more details. | |
14 | |
15 Under Section 7 of GPL version 3, you are granted additional | |
16 permissions described in the GCC Runtime Library Exception, version | |
17 3.1, as published by the Free Software Foundation. | |
18 | |
19 You should have received a copy of the GNU General Public License and | |
20 a copy of the GCC Runtime Library Exception along with this program; | |
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
22 <http://www.gnu.org/licenses/>. */ | |
23 | |
24 /***************************************************************************** | |
25 * BID64 fma | |
26 ***************************************************************************** | |
27 * | |
28 * Algorithm description: | |
29 * | |
30 * if multiplication is guranteed exact (short coefficients) | |
31 * call the unpacked arg. equivalent of bid64_add(x*y, z) | |
32 * else | |
33 * get full coefficient_x*coefficient_y product | |
34 * call subroutine to perform addition of 64-bit argument | |
35 * to 128-bit product | |
36 * | |
37 ****************************************************************************/ | |
38 | |
39 #include "bid_inline_add.h" | |
40 | |
41 #if DECIMAL_CALL_BY_REFERENCE | |
42 extern void bid64_mul (UINT64 * pres, UINT64 * px, | |
43 UINT64 * | |
44 py _RND_MODE_PARAM _EXC_FLAGS_PARAM | |
45 _EXC_MASKS_PARAM _EXC_INFO_PARAM); | |
46 #else | |
47 | |
48 extern UINT64 bid64_mul (UINT64 x, | |
49 UINT64 y _RND_MODE_PARAM | |
50 _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
51 _EXC_INFO_PARAM); | |
52 #endif | |
53 | |
54 #if DECIMAL_CALL_BY_REFERENCE | |
55 | |
56 void | |
57 bid64_fma (UINT64 * pres, UINT64 * px, UINT64 * py, | |
58 UINT64 * | |
59 pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM | |
60 _EXC_INFO_PARAM) { | |
61 UINT64 x, y, z; | |
62 #else | |
63 | |
64 UINT64 | |
65 bid64_fma (UINT64 x, UINT64 y, | |
66 UINT64 z _RND_MODE_PARAM _EXC_FLAGS_PARAM | |
67 _EXC_MASKS_PARAM _EXC_INFO_PARAM) { | |
68 #endif | |
69 UINT128 P, PU, CT, CZ; | |
70 UINT64 sign_x, sign_y, coefficient_x, coefficient_y, sign_z, | |
71 coefficient_z; | |
72 UINT64 C64, remainder_y, res; | |
73 UINT64 CYh, CY0L, T, valid_x, valid_y, valid_z; | |
74 int_double tempx, tempy; | |
75 int extra_digits, exponent_x, exponent_y, bin_expon_cx, bin_expon_cy, | |
76 bin_expon_product, rmode; | |
77 int digits_p, bp, final_exponent, exponent_z, digits_z, ez, ey, | |
78 scale_z, uf_status; | |
79 | |
80 #if DECIMAL_CALL_BY_REFERENCE | |
81 #if !DECIMAL_GLOBAL_ROUNDING | |
82 _IDEC_round rnd_mode = *prnd_mode; | |
83 #endif | |
84 x = *px; | |
85 y = *py; | |
86 z = *pz; | |
87 #endif | |
88 | |
89 valid_x = unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x); | |
90 valid_y = unpack_BID64 (&sign_y, &exponent_y, &coefficient_y, y); | |
91 valid_z = unpack_BID64 (&sign_z, &exponent_z, &coefficient_z, z); | |
92 | |
93 // unpack arguments, check for NaN, Infinity, or 0 | |
94 if (!valid_x || !valid_y || !valid_z) { | |
95 | |
96 if ((y & MASK_NAN) == MASK_NAN) { // y is NAN | |
97 // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y) | |
98 // check first for non-canonical NaN payload | |
99 y = y & 0xfe03ffffffffffffull; // clear G6-G12 | |
100 if ((y & 0x0003ffffffffffffull) > 999999999999999ull) { | |
101 y = y & 0xfe00000000000000ull; // clear G6-G12 and the payload bits | |
102 } | |
103 if ((y & MASK_SNAN) == MASK_SNAN) { // y is SNAN | |
104 // set invalid flag | |
105 *pfpsf |= INVALID_EXCEPTION; | |
106 // return quiet (y) | |
107 res = y & 0xfdffffffffffffffull; | |
108 } else { // y is QNaN | |
109 // return y | |
110 res = y; | |
111 // if z = SNaN or x = SNaN signal invalid exception | |
112 if ((z & MASK_SNAN) == MASK_SNAN | |
113 || (x & MASK_SNAN) == MASK_SNAN) { | |
114 // set invalid flag | |
115 *pfpsf |= INVALID_EXCEPTION; | |
116 } | |
117 } | |
118 BID_RETURN (res) | |
119 } else if ((z & MASK_NAN) == MASK_NAN) { // z is NAN | |
120 // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z) | |
121 // check first for non-canonical NaN payload | |
122 z = z & 0xfe03ffffffffffffull; // clear G6-G12 | |
123 if ((z & 0x0003ffffffffffffull) > 999999999999999ull) { | |
124 z = z & 0xfe00000000000000ull; // clear G6-G12 and the payload bits | |
125 } | |
126 if ((z & MASK_SNAN) == MASK_SNAN) { // z is SNAN | |
127 // set invalid flag | |
128 *pfpsf |= INVALID_EXCEPTION; | |
129 // return quiet (z) | |
130 res = z & 0xfdffffffffffffffull; | |
131 } else { // z is QNaN | |
132 // return z | |
133 res = z; | |
134 // if x = SNaN signal invalid exception | |
135 if ((x & MASK_SNAN) == MASK_SNAN) { | |
136 // set invalid flag | |
137 *pfpsf |= INVALID_EXCEPTION; | |
138 } | |
139 } | |
140 BID_RETURN (res) | |
141 } else if ((x & MASK_NAN) == MASK_NAN) { // x is NAN | |
142 // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x) | |
143 // check first for non-canonical NaN payload | |
144 x = x & 0xfe03ffffffffffffull; // clear G6-G12 | |
145 if ((x & 0x0003ffffffffffffull) > 999999999999999ull) { | |
146 x = x & 0xfe00000000000000ull; // clear G6-G12 and the payload bits | |
147 } | |
148 if ((x & MASK_SNAN) == MASK_SNAN) { // x is SNAN | |
149 // set invalid flag | |
150 *pfpsf |= INVALID_EXCEPTION; | |
151 // return quiet (x) | |
152 res = x & 0xfdffffffffffffffull; | |
153 } else { // x is QNaN | |
154 // return x | |
155 res = x; // clear out G[6]-G[16] | |
156 } | |
157 BID_RETURN (res) | |
158 } | |
159 | |
160 if (!valid_x) { | |
161 // x is Inf. or 0 | |
162 | |
163 // x is Infinity? | |
164 if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
165 // check if y is 0 | |
166 if (!coefficient_y) { | |
167 // y==0, return NaN | |
168 #ifdef SET_STATUS_FLAGS | |
169 if ((z & 0x7e00000000000000ull) != 0x7c00000000000000ull) | |
170 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
171 #endif | |
172 BID_RETURN (0x7c00000000000000ull); | |
173 } | |
174 // test if z is Inf of oposite sign | |
175 if (((z & 0x7c00000000000000ull) == 0x7800000000000000ull) | |
176 && (((x ^ y) ^ z) & 0x8000000000000000ull)) { | |
177 // return NaN | |
178 #ifdef SET_STATUS_FLAGS | |
179 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
180 #endif | |
181 BID_RETURN (0x7c00000000000000ull); | |
182 } | |
183 // otherwise return +/-Inf | |
184 BID_RETURN (((x ^ y) & 0x8000000000000000ull) | | |
185 0x7800000000000000ull); | |
186 } | |
187 // x is 0 | |
188 if (((y & 0x7800000000000000ull) != 0x7800000000000000ull) | |
189 && ((z & 0x7800000000000000ull) != 0x7800000000000000ull)) { | |
190 | |
191 if (coefficient_z) { | |
192 exponent_y = exponent_x - DECIMAL_EXPONENT_BIAS + exponent_y; | |
193 | |
194 sign_z = z & 0x8000000000000000ull; | |
195 | |
196 if (exponent_y >= exponent_z) | |
197 BID_RETURN (z); | |
198 res = | |
199 add_zero64 (exponent_y, sign_z, exponent_z, coefficient_z, | |
200 &rnd_mode, pfpsf); | |
201 BID_RETURN (res); | |
202 } | |
203 } | |
204 } | |
205 if (!valid_y) { | |
206 // y is Inf. or 0 | |
207 | |
208 // y is Infinity? | |
209 if ((y & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
210 // check if x is 0 | |
211 if (!coefficient_x) { | |
212 // y==0, return NaN | |
213 #ifdef SET_STATUS_FLAGS | |
214 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
215 #endif | |
216 BID_RETURN (0x7c00000000000000ull); | |
217 } | |
218 // test if z is Inf of oposite sign | |
219 if (((z & 0x7c00000000000000ull) == 0x7800000000000000ull) | |
220 && (((x ^ y) ^ z) & 0x8000000000000000ull)) { | |
221 #ifdef SET_STATUS_FLAGS | |
222 __set_status_flags (pfpsf, INVALID_EXCEPTION); | |
223 #endif | |
224 // return NaN | |
225 BID_RETURN (0x7c00000000000000ull); | |
226 } | |
227 // otherwise return +/-Inf | |
228 BID_RETURN (((x ^ y) & 0x8000000000000000ull) | | |
229 0x7800000000000000ull); | |
230 } | |
231 // y is 0 | |
232 if (((z & 0x7800000000000000ull) != 0x7800000000000000ull)) { | |
233 | |
234 if (coefficient_z) { | |
235 exponent_y += exponent_x - DECIMAL_EXPONENT_BIAS; | |
236 | |
237 sign_z = z & 0x8000000000000000ull; | |
238 | |
239 if (exponent_y >= exponent_z) | |
240 BID_RETURN (z); | |
241 res = | |
242 add_zero64 (exponent_y, sign_z, exponent_z, coefficient_z, | |
243 &rnd_mode, pfpsf); | |
244 BID_RETURN (res); | |
245 } | |
246 } | |
247 } | |
248 | |
249 if (!valid_z) { | |
250 // y is Inf. or 0 | |
251 | |
252 // test if y is NaN/Inf | |
253 if ((z & 0x7800000000000000ull) == 0x7800000000000000ull) { | |
254 BID_RETURN (coefficient_z & QUIET_MASK64); | |
255 } | |
256 // z is 0, return x*y | |
257 if ((!coefficient_x) || (!coefficient_y)) { | |
258 //0+/-0 | |
259 exponent_x += exponent_y - DECIMAL_EXPONENT_BIAS; | |
260 if (exponent_x > DECIMAL_MAX_EXPON_64) | |
261 exponent_x = DECIMAL_MAX_EXPON_64; | |
262 else if (exponent_x < 0) | |
263 exponent_x = 0; | |
264 if (exponent_x <= exponent_z) | |
265 res = ((UINT64) exponent_x) << 53; | |
266 else | |
267 res = ((UINT64) exponent_z) << 53; | |
268 if ((sign_x ^ sign_y) == sign_z) | |
269 res |= sign_z; | |
270 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY | |
271 #ifndef IEEE_ROUND_NEAREST | |
272 else if (rnd_mode == ROUNDING_DOWN) | |
273 res |= 0x8000000000000000ull; | |
274 #endif | |
275 #endif | |
276 BID_RETURN (res); | |
277 } | |
278 } | |
279 } | |
280 | |
281 /* get binary coefficients of x and y */ | |
282 | |
283 //--- get number of bits in the coefficients of x and y --- | |
284 // version 2 (original) | |
285 tempx.d = (double) coefficient_x; | |
286 bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52); | |
287 | |
288 tempy.d = (double) coefficient_y; | |
289 bin_expon_cy = ((tempy.i & MASK_BINARY_EXPONENT) >> 52); | |
290 | |
291 // magnitude estimate for coefficient_x*coefficient_y is | |
292 // 2^(unbiased_bin_expon_cx + unbiased_bin_expon_cx) | |
293 bin_expon_product = bin_expon_cx + bin_expon_cy; | |
294 | |
295 // check if coefficient_x*coefficient_y<2^(10*k+3) | |
296 // equivalent to unbiased_bin_expon_cx + unbiased_bin_expon_cx < 10*k+1 | |
297 if (bin_expon_product < UPPER_EXPON_LIMIT + 2 * BINARY_EXPONENT_BIAS) { | |
298 // easy multiply | |
299 C64 = coefficient_x * coefficient_y; | |
300 final_exponent = exponent_x + exponent_y - DECIMAL_EXPONENT_BIAS; | |
301 if ((final_exponent > 0) || (!coefficient_z)) { | |
302 res = | |
303 get_add64 (sign_x ^ sign_y, | |
304 final_exponent, C64, sign_z, exponent_z, coefficient_z, rnd_mode, pfpsf); | |
305 BID_RETURN (res); | |
306 } else { | |
307 P.w[0] = C64; | |
308 P.w[1] = 0; | |
309 extra_digits = 0; | |
310 } | |
311 } else { | |
312 if (!coefficient_z) { | |
313 #if DECIMAL_CALL_BY_REFERENCE | |
314 bid64_mul (&res, px, | |
315 py _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
316 _EXC_INFO_ARG); | |
317 #else | |
318 res = | |
319 bid64_mul (x, | |
320 y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG | |
321 _EXC_INFO_ARG); | |
322 #endif | |
323 BID_RETURN (res); | |
324 } | |
325 // get 128-bit product: coefficient_x*coefficient_y | |
326 __mul_64x64_to_128 (P, coefficient_x, coefficient_y); | |
327 | |
328 // tighten binary range of P: leading bit is 2^bp | |
329 // unbiased_bin_expon_product <= bp <= unbiased_bin_expon_product+1 | |
330 bin_expon_product -= 2 * BINARY_EXPONENT_BIAS; | |
331 __tight_bin_range_128 (bp, P, bin_expon_product); | |
332 | |
333 // get number of decimal digits in the product | |
334 digits_p = estimate_decimal_digits[bp]; | |
335 if (!(__unsigned_compare_gt_128 (power10_table_128[digits_p], P))) | |
336 digits_p++; // if power10_table_128[digits_p] <= P | |
337 | |
338 // determine number of decimal digits to be rounded out | |
339 extra_digits = digits_p - MAX_FORMAT_DIGITS; | |
340 final_exponent = | |
341 exponent_x + exponent_y + extra_digits - DECIMAL_EXPONENT_BIAS; | |
342 } | |
343 | |
344 if (((unsigned) final_exponent) >= 3 * 256) { | |
345 if (final_exponent < 0) { | |
346 //--- get number of bits in the coefficients of z --- | |
347 tempx.d = (double) coefficient_z; | |
348 bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff; | |
349 // get number of decimal digits in the coeff_x | |
350 digits_z = estimate_decimal_digits[bin_expon_cx]; | |
351 if (coefficient_z >= power10_table_128[digits_z].w[0]) | |
352 digits_z++; | |
353 // underflow | |
354 if ((final_exponent + 16 < 0) | |
355 || (exponent_z + digits_z > 33 + final_exponent)) { | |
356 res = | |
357 BID_normalize (sign_z, exponent_z, coefficient_z, | |
358 sign_x ^ sign_y, 1, rnd_mode, pfpsf); | |
359 BID_RETURN (res); | |
360 } | |
361 | |
362 ez = exponent_z + digits_z - 16; | |
363 if (ez < 0) | |
364 ez = 0; | |
365 scale_z = exponent_z - ez; | |
366 coefficient_z *= power10_table_128[scale_z].w[0]; | |
367 ey = final_exponent - extra_digits; | |
368 extra_digits = ez - ey; | |
369 if (extra_digits > 33) { | |
370 res = | |
371 BID_normalize (sign_z, exponent_z, coefficient_z, | |
372 sign_x ^ sign_y, 1, rnd_mode, pfpsf); | |
373 BID_RETURN (res); | |
374 } | |
375 //else // extra_digits<=32 | |
376 | |
377 if (extra_digits > 17) { | |
378 CYh = __truncate (P, 16); | |
379 // get remainder | |
380 T = power10_table_128[16].w[0]; | |
381 __mul_64x64_to_64 (CY0L, CYh, T); | |
382 remainder_y = P.w[0] - CY0L; | |
383 | |
384 extra_digits -= 16; | |
385 P.w[0] = CYh; | |
386 P.w[1] = 0; | |
387 } else | |
388 remainder_y = 0; | |
389 | |
390 // align coeff_x, CYh | |
391 __mul_64x64_to_128 (CZ, coefficient_z, | |
392 power10_table_128[extra_digits].w[0]); | |
393 | |
394 if (sign_z == (sign_y ^ sign_x)) { | |
395 __add_128_128 (CT, CZ, P); | |
396 if (__unsigned_compare_ge_128 | |
397 (CT, power10_table_128[16 + extra_digits])) { | |
398 extra_digits++; | |
399 ez++; | |
400 } | |
401 } else { | |
402 if (remainder_y && (__unsigned_compare_ge_128 (CZ, P))) { | |
403 P.w[0]++; | |
404 if (!P.w[0]) | |
405 P.w[1]++; | |
406 } | |
407 __sub_128_128 (CT, CZ, P); | |
408 if (((SINT64) CT.w[1]) < 0) { | |
409 sign_z = sign_y ^ sign_x; | |
410 CT.w[0] = 0 - CT.w[0]; | |
411 CT.w[1] = 0 - CT.w[1]; | |
412 if (CT.w[0]) | |
413 CT.w[1]--; | |
414 } else if(!(CT.w[1]|CT.w[0])) | |
415 sign_z = (rnd_mode!=ROUNDING_DOWN)? 0: 0x8000000000000000ull; | |
416 if (ez | |
417 && | |
418 (__unsigned_compare_gt_128 | |
419 (power10_table_128[15 + extra_digits], CT))) { | |
420 extra_digits--; | |
421 ez--; | |
422 } | |
423 } | |
424 | |
425 #ifdef SET_STATUS_FLAGS | |
426 uf_status = 0; | |
427 if ((!ez) | |
428 && | |
429 __unsigned_compare_gt_128 (power10_table_128 | |
430 [extra_digits + 15], CT)) { | |
431 rmode = rnd_mode; | |
432 if (sign_z && (unsigned) (rmode - 1) < 2) | |
433 rmode = 3 - rmode; | |
434 //__add_128_64(PU, CT, round_const_table[rmode][extra_digits]); | |
435 PU = power10_table_128[extra_digits + 15]; | |
436 PU.w[0]--; | |
437 if (__unsigned_compare_gt_128 (PU, CT) | |
438 || (rmode == ROUNDING_DOWN) | |
439 || (rmode == ROUNDING_TO_ZERO)) | |
440 uf_status = UNDERFLOW_EXCEPTION; | |
441 else if (extra_digits < 2) { | |
442 if ((rmode == ROUNDING_UP)) { | |
443 if (!extra_digits) | |
444 uf_status = UNDERFLOW_EXCEPTION; | |
445 else { | |
446 if (remainder_y && (sign_z != (sign_y ^ sign_x))) | |
447 remainder_y = power10_table_128[16].w[0] - remainder_y; | |
448 | |
449 if (power10_table_128[15].w[0] > remainder_y) | |
450 uf_status = UNDERFLOW_EXCEPTION; | |
451 } | |
452 } else // RN or RN_away | |
453 { | |
454 if (remainder_y && (sign_z != (sign_y ^ sign_x))) | |
455 remainder_y = power10_table_128[16].w[0] - remainder_y; | |
456 | |
457 if (!extra_digits) { | |
458 remainder_y += round_const_table[rmode][15]; | |
459 if (remainder_y < power10_table_128[16].w[0]) | |
460 uf_status = UNDERFLOW_EXCEPTION; | |
461 } else { | |
462 if (remainder_y < round_const_table[rmode][16]) | |
463 uf_status = UNDERFLOW_EXCEPTION; | |
464 } | |
465 } | |
466 //__set_status_flags (pfpsf, uf_status); | |
467 } | |
468 } | |
469 #endif | |
470 res = | |
471 __bid_full_round64_remainder (sign_z, ez - extra_digits, CT, | |
472 extra_digits, remainder_y, | |
473 rnd_mode, pfpsf, uf_status); | |
474 BID_RETURN (res); | |
475 | |
476 } else { | |
477 if ((sign_z == (sign_x ^ sign_y)) | |
478 || (final_exponent > 3 * 256 + 15)) { | |
479 res = | |
480 fast_get_BID64_check_OF (sign_x ^ sign_y, final_exponent, | |
481 1000000000000000ull, rnd_mode, | |
482 pfpsf); | |
483 BID_RETURN (res); | |
484 } | |
485 } | |
486 } | |
487 | |
488 | |
489 if (extra_digits > 0) { | |
490 res = | |
491 get_add128 (sign_z, exponent_z, coefficient_z, sign_x ^ sign_y, | |
492 final_exponent, P, extra_digits, rnd_mode, pfpsf); | |
493 BID_RETURN (res); | |
494 } | |
495 // go to convert_format and exit | |
496 else { | |
497 C64 = __low_64 (P); | |
498 | |
499 res = | |
500 get_add64 (sign_x ^ sign_y, | |
501 exponent_x + exponent_y - DECIMAL_EXPONENT_BIAS, C64, | |
502 sign_z, exponent_z, coefficient_z, | |
503 rnd_mode, pfpsf); | |
504 BID_RETURN (res); | |
505 } | |
506 } |