comparison libgcc/config/libbid/bid_div_macros.h @ 0:a06113de4d67

first commit
author kent <kent@cr.ie.u-ryukyu.ac.jp>
date Fri, 17 Jul 2009 14:47:48 +0900
parents
children 04ced10e8804
comparison
equal deleted inserted replaced
-1:000000000000 0:a06113de4d67
1 /* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
9
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #ifndef _DIV_MACROS_H_
25 #define _DIV_MACROS_H_
26
27 #include "bid_internal.h"
28
29 #define FENCE __fence
30 //#define FENCE
31
32 //#define DOUBLE_EXTENDED_ON
33
34 #if DOUBLE_EXTENDED_ON
35
36
37 __BID_INLINE__ void
38 __div_128_by_128 (UINT128 * pCQ, UINT128 * pCR, UINT128 CX, UINT128 CY) {
39 UINT128 CB, CB2, CB4, CB8, CQB, CA;
40 int_double d64, dm64, ds;
41 int_float t64;
42 double dx, dq, dqh;
43 BINARY80 lq, lx, ly;
44 UINT64 Rh, R, B2, B4, Ph, Ql, Ql2, carry, Qh;
45
46 if (!CY.w[1]) {
47 pCR->w[1] = 0;
48
49 if (!CX.w[1]) {
50 pCQ->w[0] = CX.w[0] / CY.w[0];
51 pCQ->w[1] = 0;
52 pCR->w[1] = 0;
53 pCR->w[0] = CX.w[0] - pCQ->w[0] * CY.w[0];
54 } else {
55
56 // This path works for CX<2^116 only
57
58 // 2^64
59 d64.i = 0x43f0000000000000;
60 // 2^64
61 dm64.i = 0x3bf0000000000000;
62 // 1.5*2^(-52)
63 ds.i = 0x3cb8000000000000;
64 dx = (BINARY80) CX.w[1] * d64.d + (BINARY80) CX.w[0];
65 dq = dx / (BINARY80) CY.w[0];
66 dq -= dq * (ds.d);
67 dqh = dq * dm64.d;
68 Qh = (UINT64) dqh;
69 Ql = (UINT64) (dq - ((double) Qh) * d64.d);
70
71 Rh = CX.w[0] - Ql * CY.w[0];
72 Ql2 = Rh / CY.w[0];
73 pCR->w[0] = Rh - Ql2 * CY.w[0];
74 __add_carry_out ((pCQ->w[0]), carry, Ql, Ql2);
75 pCQ->w[1] = Qh + carry;
76
77 }
78 return;
79 }
80 // now CY.w[1] > 0
81
82 // 2^64
83 t64.i = 0x5f800000;
84 lx = (BINARY80) CX.w[1] * (BINARY80) t64.d + (BINARY80) CX.w[0];
85 ly = (BINARY80) CY.w[1] * (BINARY80) t64.d + (BINARY80) CY.w[0];
86 lq = lx / ly;
87 pCQ->w[0] = (UINT64) lq;
88
89 pCQ->w[1] = 0;
90
91 if (!pCQ->w[0]) {
92 /*if(__unsigned_compare_ge_128(CX,CY))
93 {
94 pCQ->w[0] = 1;
95 __sub_128_128((*pCR), CX, CY);
96 }
97 else */
98 {
99 pCR->w[1] = CX.w[1];
100 pCR->w[0] = CX.w[0];
101 }
102 return;
103 }
104
105 if (CY.w[1] >= 16 || pCQ->w[0] <= 0x1000000000000000ull) {
106 pCQ->w[0] = (UINT64) lq - 1;
107 __mul_64x128_full (Ph, CQB, (pCQ->w[0]), CY);
108 __sub_128_128 (CA, CX, CQB);
109 if (__unsigned_compare_ge_128 (CA, CY)) {
110 __sub_128_128 (CA, CA, CY);
111 pCQ->w[0]++;
112 if (__unsigned_compare_ge_128 (CA, CY)) {
113 __sub_128_128 (CA, CA, CY);
114 pCQ->w[0]++;
115 }
116 }
117 pCR->w[1] = CA.w[1];
118 pCR->w[0] = CA.w[0];
119 } else {
120 pCQ->w[0] = (UINT64) lq - 6;
121
122 __mul_64x128_full (Ph, CQB, (pCQ->w[0]), CY);
123 __sub_128_128 (CA, CX, CQB);
124
125 CB8.w[1] = (CY.w[1] << 3) | (CY.w[0] >> 61);
126 CB8.w[0] = CY.w[0] << 3;
127 CB4.w[1] = (CY.w[1] << 2) | (CY.w[0] >> 62);
128 CB4.w[0] = CY.w[0] << 2;
129 CB2.w[1] = (CY.w[1] << 1) | (CY.w[0] >> 63);
130 CB2.w[0] = CY.w[0] << 1;
131
132 if (__unsigned_compare_ge_128 (CA, CB8)) {
133 pCQ->w[0] += 8;
134 __sub_128_128 (CA, CA, CB8);
135 }
136 if (__unsigned_compare_ge_128 (CA, CB4)) {
137 pCQ->w[0] += 4;
138 __sub_128_128 (CA, CA, CB4);
139 }
140 if (__unsigned_compare_ge_128 (CA, CB2)) {
141 pCQ->w[0] += 2;
142 __sub_128_128 (CA, CA, CB2);
143 }
144 if (__unsigned_compare_ge_128 (CA, CY)) {
145 pCQ->w[0] += 1;
146 __sub_128_128 (CA, CA, CY);
147 }
148
149 pCR->w[1] = CA.w[1];
150 pCR->w[0] = CA.w[0];
151 }
152 }
153
154
155
156
157
158
159 __BID_INLINE__ void
160 __div_256_by_128 (UINT128 * pCQ, UINT256 * pCA4, UINT128 CY) {
161 UINT256 CQ2Y;
162 UINT128 CQ2, CQ3Y;
163 UINT64 Q3, carry64;
164 int_double d64;
165 BINARY80 lx, ly, lq, l64, l128;
166
167 // 2^64
168 d64.i = 0x43f0000000000000ull;
169 l64 = (BINARY80) d64.d;
170 // 2^128
171 l128 = l64 * l64;
172
173 lx =
174 ((BINARY80) (*pCA4).w[3] * l64 +
175 (BINARY80) (*pCA4).w[2]) * l128 +
176 (BINARY80) (*pCA4).w[1] * l64 + (BINARY80) (*pCA4).w[0];
177 ly = (BINARY80) CY.w[1] * l128 + (BINARY80) CY.w[0] * l64;
178
179 lq = lx / ly;
180 CQ2.w[1] = (UINT64) lq;
181 lq = (lq - CQ2.w[1]) * l64;
182 CQ2.w[0] = (UINT64) lq;
183
184 // CQ2*CY
185 __mul_128x128_to_256 (CQ2Y, CY, CQ2);
186
187 // CQ2Y <= (*pCA4) ?
188 if (CQ2Y.w[3] < (*pCA4).w[3]
189 || (CQ2Y.w[3] == (*pCA4).w[3]
190 && (CQ2Y.w[2] < (*pCA4).w[2]
191 || (CQ2Y.w[2] == (*pCA4).w[2]
192 && (CQ2Y.w[1] < (*pCA4).w[1]
193 || (CQ2Y.w[1] == (*pCA4).w[1]
194 && (CQ2Y.w[0] <= (*pCA4).w[0]))))))) {
195
196 // (*pCA4) -CQ2Y, guaranteed below 5*2^49*CY < 5*2^(49+128)
197 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CQ2Y.w[0]);
198 __sub_borrow_in_out ((*pCA4).w[1], carry64, (*pCA4).w[1], CQ2Y.w[1],
199 carry64);
200 (*pCA4).w[2] = (*pCA4).w[2] - CQ2Y.w[2] - carry64;
201
202 lx = ((BINARY80) (*pCA4).w[2] * l128 +
203 ((BINARY80) (*pCA4).w[1] * l64 +
204 (BINARY80) (*pCA4).w[0])) * l64;
205 lq = lx / ly;
206 Q3 = (UINT64) lq;
207
208 if (Q3) {
209 Q3--;
210 __mul_64x128_short (CQ3Y, Q3, CY);
211 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CQ3Y.w[0]);
212 (*pCA4).w[1] = (*pCA4).w[1] - CQ3Y.w[1] - carry64;
213
214 if ((*pCA4).w[1] > CY.w[1]
215 || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) {
216 Q3++;
217 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]);
218 (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64;
219 if ((*pCA4).w[1] > CY.w[1]
220 || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) {
221 Q3++;
222 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0],
223 CY.w[0]);
224 (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64;
225 }
226 }
227 // add Q3 to Q2
228 __add_carry_out (CQ2.w[0], carry64, Q3, CQ2.w[0]);
229 CQ2.w[1] += carry64;
230 }
231 } else {
232 // CQ2Y - (*pCA4), guaranteed below 5*2^(49+128)
233 __sub_borrow_out ((*pCA4).w[0], carry64, CQ2Y.w[0], (*pCA4).w[0]);
234 __sub_borrow_in_out ((*pCA4).w[1], carry64, CQ2Y.w[1], (*pCA4).w[1],
235 carry64);
236 (*pCA4).w[2] = CQ2Y.w[2] - (*pCA4).w[2] - carry64;
237
238 lx =
239 ((BINARY80) (*pCA4).w[2] * l128 +
240 (BINARY80) (*pCA4).w[1] * l64 + (BINARY80) (*pCA4).w[0]) * l64;
241 lq = lx / ly;
242 Q3 = 1 + (UINT64) lq;
243
244 __mul_64x128_short (CQ3Y, Q3, CY);
245 __sub_borrow_out ((*pCA4).w[0], carry64, CQ3Y.w[0], (*pCA4).w[0]);
246 (*pCA4).w[1] = CQ3Y.w[1] - (*pCA4).w[1] - carry64;
247
248 if ((SINT64) (*pCA4).w[1] > (SINT64) CY.w[1]
249 || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) {
250 Q3--;
251 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]);
252 (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64;
253 } else if ((SINT64) (*pCA4).w[1] < 0) {
254 Q3++;
255 __add_carry_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]);
256 (*pCA4).w[1] = (*pCA4).w[1] + CY.w[1] + carry64;
257 }
258 // subtract Q3 from Q2
259 __sub_borrow_out (CQ2.w[0], carry64, CQ2.w[0], Q3);
260 CQ2.w[1] -= carry64;
261 }
262
263 // (*pCQ) + CQ2 + carry
264 __add_carry_out ((*pCQ).w[0], carry64, CQ2.w[0], (*pCQ).w[0]);
265 (*pCQ).w[1] = (*pCQ).w[1] + CQ2.w[1] + carry64;
266
267
268 }
269 #else
270
271 __BID_INLINE__ void
272 __div_128_by_128 (UINT128 * pCQ, UINT128 * pCR, UINT128 CX0, UINT128 CY) {
273 UINT128 CY36, CY51, CQ, A2, CX, CQT;
274 UINT64 Q;
275 int_double t64, d49, d60;
276 double lx, ly, lq;
277
278 if (!CX0.w[1] && !CY.w[1]) {
279 pCQ->w[0] = CX0.w[0] / CY.w[0];
280 pCQ->w[1] = 0;
281 pCR->w[1] = pCR->w[0] = 0;
282 pCR->w[0] = CX0.w[0] - pCQ->w[0] * CY.w[0];
283 return;
284 }
285
286 CX.w[1] = CX0.w[1];
287 CX.w[0] = CX0.w[0];
288
289 // 2^64
290 t64.i = 0x43f0000000000000ull;
291 lx = (double) CX.w[1] * t64.d + (double) CX.w[0];
292 ly = (double) CY.w[1] * t64.d + (double) CY.w[0];
293 lq = lx / ly;
294
295 CY36.w[1] = CY.w[0] >> (64 - 36);
296 CY36.w[0] = CY.w[0] << 36;
297
298 CQ.w[1] = CQ.w[0] = 0;
299
300 // Q >= 2^100 ?
301 if (!CY.w[1] && !CY36.w[1] && (CX.w[1] >= CY36.w[0])) {
302 // then Q >= 2^100
303
304 // 2^(-60)*CX/CY
305 d60.i = 0x3c30000000000000ull;
306 lq *= d60.d;
307 Q = (UINT64) lq - 4ull;
308
309 // Q*CY
310 __mul_64x64_to_128 (A2, Q, CY.w[0]);
311
312 // A2 <<= 60
313 A2.w[1] = (A2.w[1] << 60) | (A2.w[0] >> (64 - 60));
314 A2.w[0] <<= 60;
315
316 __sub_128_128 (CX, CX, A2);
317
318 lx = (double) CX.w[1] * t64.d + (double) CX.w[0];
319 lq = lx / ly;
320
321 CQ.w[1] = Q >> (64 - 60);
322 CQ.w[0] = Q << 60;
323 }
324
325
326 CY51.w[1] = (CY.w[1] << 51) | (CY.w[0] >> (64 - 51));
327 CY51.w[0] = CY.w[0] << 51;
328
329 if (CY.w[1] < (UINT64) (1 << (64 - 51))
330 && (__unsigned_compare_gt_128 (CX, CY51))) {
331 // Q > 2^51
332
333 // 2^(-49)*CX/CY
334 d49.i = 0x3ce0000000000000ull;
335 lq *= d49.d;
336
337 Q = (UINT64) lq - 1ull;
338
339 // Q*CY
340 __mul_64x64_to_128 (A2, Q, CY.w[0]);
341 A2.w[1] += Q * CY.w[1];
342
343 // A2 <<= 49
344 A2.w[1] = (A2.w[1] << 49) | (A2.w[0] >> (64 - 49));
345 A2.w[0] <<= 49;
346
347 __sub_128_128 (CX, CX, A2);
348
349 CQT.w[1] = Q >> (64 - 49);
350 CQT.w[0] = Q << 49;
351 __add_128_128 (CQ, CQ, CQT);
352
353 lx = (double) CX.w[1] * t64.d + (double) CX.w[0];
354 lq = lx / ly;
355 }
356
357 Q = (UINT64) lq;
358
359 __mul_64x64_to_128 (A2, Q, CY.w[0]);
360 A2.w[1] += Q * CY.w[1];
361
362 __sub_128_128 (CX, CX, A2);
363 if ((SINT64) CX.w[1] < 0) {
364 Q--;
365 CX.w[0] += CY.w[0];
366 if (CX.w[0] < CY.w[0])
367 CX.w[1]++;
368 CX.w[1] += CY.w[1];
369 if ((SINT64) CX.w[1] < 0) {
370 Q--;
371 CX.w[0] += CY.w[0];
372 if (CX.w[0] < CY.w[0])
373 CX.w[1]++;
374 CX.w[1] += CY.w[1];
375 }
376 } else if (__unsigned_compare_ge_128 (CX, CY)) {
377 Q++;
378 __sub_128_128 (CX, CX, CY);
379 }
380
381 __add_128_64 (CQ, CQ, Q);
382
383
384 pCQ->w[1] = CQ.w[1];
385 pCQ->w[0] = CQ.w[0];
386 pCR->w[1] = CX.w[1];
387 pCR->w[0] = CX.w[0];
388 return;
389 }
390
391
392 __BID_INLINE__ void
393 __div_256_by_128 (UINT128 * pCQ, UINT256 * pCA4, UINT128 CY) {
394 UINT256 CA4, CA2, CY51, CY36;
395 UINT128 CQ, A2, A2h, CQT;
396 UINT64 Q, carry64;
397 int_double t64, d49, d60;
398 double lx, ly, lq, d128, d192;
399
400 // the quotient is assumed to be at most 113 bits,
401 // as needed by BID128 divide routines
402
403 // initial dividend
404 CA4.w[3] = (*pCA4).w[3];
405 CA4.w[2] = (*pCA4).w[2];
406 CA4.w[1] = (*pCA4).w[1];
407 CA4.w[0] = (*pCA4).w[0];
408 CQ.w[1] = (*pCQ).w[1];
409 CQ.w[0] = (*pCQ).w[0];
410
411 // 2^64
412 t64.i = 0x43f0000000000000ull;
413 d128 = t64.d * t64.d;
414 d192 = d128 * t64.d;
415 lx = (double) CA4.w[3] * d192 + ((double) CA4.w[2] * d128 +
416 ((double) CA4.w[1] * t64.d +
417 (double) CA4.w[0]));
418 ly = (double) CY.w[1] * t64.d + (double) CY.w[0];
419 lq = lx / ly;
420
421 CY36.w[2] = CY.w[1] >> (64 - 36);
422 CY36.w[1] = (CY.w[1] << 36) | (CY.w[0] >> (64 - 36));
423 CY36.w[0] = CY.w[0] << 36;
424
425 CQ.w[1] = (*pCQ).w[1];
426 CQ.w[0] = (*pCQ).w[0];
427
428 // Q >= 2^100 ?
429 if (CA4.w[3] > CY36.w[2]
430 || (CA4.w[3] == CY36.w[2]
431 && (CA4.w[2] > CY36.w[1]
432 || (CA4.w[2] == CY36.w[1] && CA4.w[1] >= CY36.w[0])))) {
433 // 2^(-60)*CA4/CY
434 d60.i = 0x3c30000000000000ull;
435 lq *= d60.d;
436 Q = (UINT64) lq - 4ull;
437
438 // Q*CY
439 __mul_64x128_to_192 (CA2, Q, CY);
440
441 // CA2 <<= 60
442 // CA2.w[3] = CA2.w[2] >> (64-60);
443 CA2.w[2] = (CA2.w[2] << 60) | (CA2.w[1] >> (64 - 60));
444 CA2.w[1] = (CA2.w[1] << 60) | (CA2.w[0] >> (64 - 60));
445 CA2.w[0] <<= 60;
446
447 // CA4 -= CA2
448 __sub_borrow_out (CA4.w[0], carry64, CA4.w[0], CA2.w[0]);
449 __sub_borrow_in_out (CA4.w[1], carry64, CA4.w[1], CA2.w[1],
450 carry64);
451 CA4.w[2] = CA4.w[2] - CA2.w[2] - carry64;
452
453 lx = ((double) CA4.w[2] * d128 +
454 ((double) CA4.w[1] * t64.d + (double) CA4.w[0]));
455 lq = lx / ly;
456
457 CQT.w[1] = Q >> (64 - 60);
458 CQT.w[0] = Q << 60;
459 __add_128_128 (CQ, CQ, CQT);
460 }
461
462 CY51.w[2] = CY.w[1] >> (64 - 51);
463 CY51.w[1] = (CY.w[1] << 51) | (CY.w[0] >> (64 - 51));
464 CY51.w[0] = CY.w[0] << 51;
465
466 if (CA4.w[2] > CY51.w[2] || ((CA4.w[2] == CY51.w[2])
467 &&
468 (__unsigned_compare_gt_128 (CA4, CY51))))
469 {
470 // Q > 2^51
471
472 // 2^(-49)*CA4/CY
473 d49.i = 0x3ce0000000000000ull;
474 lq *= d49.d;
475
476 Q = (UINT64) lq - 1ull;
477
478 // Q*CY
479 __mul_64x64_to_128 (A2, Q, CY.w[0]);
480 __mul_64x64_to_128 (A2h, Q, CY.w[1]);
481 A2.w[1] += A2h.w[0];
482 if (A2.w[1] < A2h.w[0])
483 A2h.w[1]++;
484
485 // A2 <<= 49
486 CA2.w[2] = (A2h.w[1] << 49) | (A2.w[1] >> (64 - 49));
487 CA2.w[1] = (A2.w[1] << 49) | (A2.w[0] >> (64 - 49));
488 CA2.w[0] = A2.w[0] << 49;
489
490 __sub_borrow_out (CA4.w[0], carry64, CA4.w[0], CA2.w[0]);
491 __sub_borrow_in_out (CA4.w[1], carry64, CA4.w[1], CA2.w[1],
492 carry64);
493 CA4.w[2] = CA4.w[2] - CA2.w[2] - carry64;
494
495 CQT.w[1] = Q >> (64 - 49);
496 CQT.w[0] = Q << 49;
497 __add_128_128 (CQ, CQ, CQT);
498
499 lx = ((double) CA4.w[2] * d128 +
500 ((double) CA4.w[1] * t64.d + (double) CA4.w[0]));
501 lq = lx / ly;
502 }
503
504 Q = (UINT64) lq;
505 __mul_64x64_to_128 (A2, Q, CY.w[0]);
506 A2.w[1] += Q * CY.w[1];
507
508 __sub_128_128 (CA4, CA4, A2);
509 if ((SINT64) CA4.w[1] < 0) {
510 Q--;
511 CA4.w[0] += CY.w[0];
512 if (CA4.w[0] < CY.w[0])
513 CA4.w[1]++;
514 CA4.w[1] += CY.w[1];
515 if ((SINT64) CA4.w[1] < 0) {
516 Q--;
517 CA4.w[0] += CY.w[0];
518 if (CA4.w[0] < CY.w[0])
519 CA4.w[1]++;
520 CA4.w[1] += CY.w[1];
521 }
522 } else if (__unsigned_compare_ge_128 (CA4, CY)) {
523 Q++;
524 __sub_128_128 (CA4, CA4, CY);
525 }
526
527 __add_128_64 (CQ, CQ, Q);
528
529 pCQ->w[1] = CQ.w[1];
530 pCQ->w[0] = CQ.w[0];
531 pCA4->w[1] = CA4.w[1];
532 pCA4->w[0] = CA4.w[0];
533 return;
534
535
536
537 }
538
539 #endif
540 #endif