0
|
1 /* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
|
|
2
|
|
3 This file is part of GCC.
|
|
4
|
|
5 GCC is free software; you can redistribute it and/or modify it under
|
|
6 the terms of the GNU General Public License as published by the Free
|
|
7 Software Foundation; either version 3, or (at your option) any later
|
|
8 version.
|
|
9
|
|
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
13 for more details.
|
|
14
|
|
15 Under Section 7 of GPL version 3, you are granted additional
|
|
16 permissions described in the GCC Runtime Library Exception, version
|
|
17 3.1, as published by the Free Software Foundation.
|
|
18
|
|
19 You should have received a copy of the GNU General Public License and
|
|
20 a copy of the GCC Runtime Library Exception along with this program;
|
|
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
22 <http://www.gnu.org/licenses/>. */
|
|
23
|
|
24 #ifndef _DIV_MACROS_H_
|
|
25 #define _DIV_MACROS_H_
|
|
26
|
|
27 #include "bid_internal.h"
|
|
28
|
|
29 #define FENCE __fence
|
|
30 //#define FENCE
|
|
31
|
|
32 //#define DOUBLE_EXTENDED_ON
|
|
33
|
|
34 #if DOUBLE_EXTENDED_ON
|
|
35
|
|
36
|
|
37 __BID_INLINE__ void
|
|
38 __div_128_by_128 (UINT128 * pCQ, UINT128 * pCR, UINT128 CX, UINT128 CY) {
|
|
39 UINT128 CB, CB2, CB4, CB8, CQB, CA;
|
|
40 int_double d64, dm64, ds;
|
|
41 int_float t64;
|
|
42 double dx, dq, dqh;
|
|
43 BINARY80 lq, lx, ly;
|
|
44 UINT64 Rh, R, B2, B4, Ph, Ql, Ql2, carry, Qh;
|
|
45
|
|
46 if (!CY.w[1]) {
|
|
47 pCR->w[1] = 0;
|
|
48
|
|
49 if (!CX.w[1]) {
|
|
50 pCQ->w[0] = CX.w[0] / CY.w[0];
|
|
51 pCQ->w[1] = 0;
|
|
52 pCR->w[1] = 0;
|
|
53 pCR->w[0] = CX.w[0] - pCQ->w[0] * CY.w[0];
|
|
54 } else {
|
|
55
|
|
56 // This path works for CX<2^116 only
|
|
57
|
|
58 // 2^64
|
|
59 d64.i = 0x43f0000000000000;
|
|
60 // 2^64
|
|
61 dm64.i = 0x3bf0000000000000;
|
|
62 // 1.5*2^(-52)
|
|
63 ds.i = 0x3cb8000000000000;
|
|
64 dx = (BINARY80) CX.w[1] * d64.d + (BINARY80) CX.w[0];
|
|
65 dq = dx / (BINARY80) CY.w[0];
|
|
66 dq -= dq * (ds.d);
|
|
67 dqh = dq * dm64.d;
|
|
68 Qh = (UINT64) dqh;
|
|
69 Ql = (UINT64) (dq - ((double) Qh) * d64.d);
|
|
70
|
|
71 Rh = CX.w[0] - Ql * CY.w[0];
|
|
72 Ql2 = Rh / CY.w[0];
|
|
73 pCR->w[0] = Rh - Ql2 * CY.w[0];
|
|
74 __add_carry_out ((pCQ->w[0]), carry, Ql, Ql2);
|
|
75 pCQ->w[1] = Qh + carry;
|
|
76
|
|
77 }
|
|
78 return;
|
|
79 }
|
|
80 // now CY.w[1] > 0
|
|
81
|
|
82 // 2^64
|
|
83 t64.i = 0x5f800000;
|
|
84 lx = (BINARY80) CX.w[1] * (BINARY80) t64.d + (BINARY80) CX.w[0];
|
|
85 ly = (BINARY80) CY.w[1] * (BINARY80) t64.d + (BINARY80) CY.w[0];
|
|
86 lq = lx / ly;
|
|
87 pCQ->w[0] = (UINT64) lq;
|
|
88
|
|
89 pCQ->w[1] = 0;
|
|
90
|
|
91 if (!pCQ->w[0]) {
|
|
92 /*if(__unsigned_compare_ge_128(CX,CY))
|
|
93 {
|
|
94 pCQ->w[0] = 1;
|
|
95 __sub_128_128((*pCR), CX, CY);
|
|
96 }
|
|
97 else */
|
|
98 {
|
|
99 pCR->w[1] = CX.w[1];
|
|
100 pCR->w[0] = CX.w[0];
|
|
101 }
|
|
102 return;
|
|
103 }
|
|
104
|
|
105 if (CY.w[1] >= 16 || pCQ->w[0] <= 0x1000000000000000ull) {
|
|
106 pCQ->w[0] = (UINT64) lq - 1;
|
|
107 __mul_64x128_full (Ph, CQB, (pCQ->w[0]), CY);
|
|
108 __sub_128_128 (CA, CX, CQB);
|
|
109 if (__unsigned_compare_ge_128 (CA, CY)) {
|
|
110 __sub_128_128 (CA, CA, CY);
|
|
111 pCQ->w[0]++;
|
|
112 if (__unsigned_compare_ge_128 (CA, CY)) {
|
|
113 __sub_128_128 (CA, CA, CY);
|
|
114 pCQ->w[0]++;
|
|
115 }
|
|
116 }
|
|
117 pCR->w[1] = CA.w[1];
|
|
118 pCR->w[0] = CA.w[0];
|
|
119 } else {
|
|
120 pCQ->w[0] = (UINT64) lq - 6;
|
|
121
|
|
122 __mul_64x128_full (Ph, CQB, (pCQ->w[0]), CY);
|
|
123 __sub_128_128 (CA, CX, CQB);
|
|
124
|
|
125 CB8.w[1] = (CY.w[1] << 3) | (CY.w[0] >> 61);
|
|
126 CB8.w[0] = CY.w[0] << 3;
|
|
127 CB4.w[1] = (CY.w[1] << 2) | (CY.w[0] >> 62);
|
|
128 CB4.w[0] = CY.w[0] << 2;
|
|
129 CB2.w[1] = (CY.w[1] << 1) | (CY.w[0] >> 63);
|
|
130 CB2.w[0] = CY.w[0] << 1;
|
|
131
|
|
132 if (__unsigned_compare_ge_128 (CA, CB8)) {
|
|
133 pCQ->w[0] += 8;
|
|
134 __sub_128_128 (CA, CA, CB8);
|
|
135 }
|
|
136 if (__unsigned_compare_ge_128 (CA, CB4)) {
|
|
137 pCQ->w[0] += 4;
|
|
138 __sub_128_128 (CA, CA, CB4);
|
|
139 }
|
|
140 if (__unsigned_compare_ge_128 (CA, CB2)) {
|
|
141 pCQ->w[0] += 2;
|
|
142 __sub_128_128 (CA, CA, CB2);
|
|
143 }
|
|
144 if (__unsigned_compare_ge_128 (CA, CY)) {
|
|
145 pCQ->w[0] += 1;
|
|
146 __sub_128_128 (CA, CA, CY);
|
|
147 }
|
|
148
|
|
149 pCR->w[1] = CA.w[1];
|
|
150 pCR->w[0] = CA.w[0];
|
|
151 }
|
|
152 }
|
|
153
|
|
154
|
|
155
|
|
156
|
|
157
|
|
158
|
|
159 __BID_INLINE__ void
|
|
160 __div_256_by_128 (UINT128 * pCQ, UINT256 * pCA4, UINT128 CY) {
|
|
161 UINT256 CQ2Y;
|
|
162 UINT128 CQ2, CQ3Y;
|
|
163 UINT64 Q3, carry64;
|
|
164 int_double d64;
|
|
165 BINARY80 lx, ly, lq, l64, l128;
|
|
166
|
|
167 // 2^64
|
|
168 d64.i = 0x43f0000000000000ull;
|
|
169 l64 = (BINARY80) d64.d;
|
|
170 // 2^128
|
|
171 l128 = l64 * l64;
|
|
172
|
|
173 lx =
|
|
174 ((BINARY80) (*pCA4).w[3] * l64 +
|
|
175 (BINARY80) (*pCA4).w[2]) * l128 +
|
|
176 (BINARY80) (*pCA4).w[1] * l64 + (BINARY80) (*pCA4).w[0];
|
|
177 ly = (BINARY80) CY.w[1] * l128 + (BINARY80) CY.w[0] * l64;
|
|
178
|
|
179 lq = lx / ly;
|
|
180 CQ2.w[1] = (UINT64) lq;
|
|
181 lq = (lq - CQ2.w[1]) * l64;
|
|
182 CQ2.w[0] = (UINT64) lq;
|
|
183
|
|
184 // CQ2*CY
|
|
185 __mul_128x128_to_256 (CQ2Y, CY, CQ2);
|
|
186
|
|
187 // CQ2Y <= (*pCA4) ?
|
|
188 if (CQ2Y.w[3] < (*pCA4).w[3]
|
|
189 || (CQ2Y.w[3] == (*pCA4).w[3]
|
|
190 && (CQ2Y.w[2] < (*pCA4).w[2]
|
|
191 || (CQ2Y.w[2] == (*pCA4).w[2]
|
|
192 && (CQ2Y.w[1] < (*pCA4).w[1]
|
|
193 || (CQ2Y.w[1] == (*pCA4).w[1]
|
|
194 && (CQ2Y.w[0] <= (*pCA4).w[0]))))))) {
|
|
195
|
|
196 // (*pCA4) -CQ2Y, guaranteed below 5*2^49*CY < 5*2^(49+128)
|
|
197 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CQ2Y.w[0]);
|
|
198 __sub_borrow_in_out ((*pCA4).w[1], carry64, (*pCA4).w[1], CQ2Y.w[1],
|
|
199 carry64);
|
|
200 (*pCA4).w[2] = (*pCA4).w[2] - CQ2Y.w[2] - carry64;
|
|
201
|
|
202 lx = ((BINARY80) (*pCA4).w[2] * l128 +
|
|
203 ((BINARY80) (*pCA4).w[1] * l64 +
|
|
204 (BINARY80) (*pCA4).w[0])) * l64;
|
|
205 lq = lx / ly;
|
|
206 Q3 = (UINT64) lq;
|
|
207
|
|
208 if (Q3) {
|
|
209 Q3--;
|
|
210 __mul_64x128_short (CQ3Y, Q3, CY);
|
|
211 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CQ3Y.w[0]);
|
|
212 (*pCA4).w[1] = (*pCA4).w[1] - CQ3Y.w[1] - carry64;
|
|
213
|
|
214 if ((*pCA4).w[1] > CY.w[1]
|
|
215 || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) {
|
|
216 Q3++;
|
|
217 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]);
|
|
218 (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64;
|
|
219 if ((*pCA4).w[1] > CY.w[1]
|
|
220 || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) {
|
|
221 Q3++;
|
|
222 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0],
|
|
223 CY.w[0]);
|
|
224 (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64;
|
|
225 }
|
|
226 }
|
|
227 // add Q3 to Q2
|
|
228 __add_carry_out (CQ2.w[0], carry64, Q3, CQ2.w[0]);
|
|
229 CQ2.w[1] += carry64;
|
|
230 }
|
|
231 } else {
|
|
232 // CQ2Y - (*pCA4), guaranteed below 5*2^(49+128)
|
|
233 __sub_borrow_out ((*pCA4).w[0], carry64, CQ2Y.w[0], (*pCA4).w[0]);
|
|
234 __sub_borrow_in_out ((*pCA4).w[1], carry64, CQ2Y.w[1], (*pCA4).w[1],
|
|
235 carry64);
|
|
236 (*pCA4).w[2] = CQ2Y.w[2] - (*pCA4).w[2] - carry64;
|
|
237
|
|
238 lx =
|
|
239 ((BINARY80) (*pCA4).w[2] * l128 +
|
|
240 (BINARY80) (*pCA4).w[1] * l64 + (BINARY80) (*pCA4).w[0]) * l64;
|
|
241 lq = lx / ly;
|
|
242 Q3 = 1 + (UINT64) lq;
|
|
243
|
|
244 __mul_64x128_short (CQ3Y, Q3, CY);
|
|
245 __sub_borrow_out ((*pCA4).w[0], carry64, CQ3Y.w[0], (*pCA4).w[0]);
|
|
246 (*pCA4).w[1] = CQ3Y.w[1] - (*pCA4).w[1] - carry64;
|
|
247
|
|
248 if ((SINT64) (*pCA4).w[1] > (SINT64) CY.w[1]
|
|
249 || ((*pCA4).w[1] == CY.w[1] && (*pCA4).w[0] >= CY.w[0])) {
|
|
250 Q3--;
|
|
251 __sub_borrow_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]);
|
|
252 (*pCA4).w[1] = (*pCA4).w[1] - CY.w[1] - carry64;
|
|
253 } else if ((SINT64) (*pCA4).w[1] < 0) {
|
|
254 Q3++;
|
|
255 __add_carry_out ((*pCA4).w[0], carry64, (*pCA4).w[0], CY.w[0]);
|
|
256 (*pCA4).w[1] = (*pCA4).w[1] + CY.w[1] + carry64;
|
|
257 }
|
|
258 // subtract Q3 from Q2
|
|
259 __sub_borrow_out (CQ2.w[0], carry64, CQ2.w[0], Q3);
|
|
260 CQ2.w[1] -= carry64;
|
|
261 }
|
|
262
|
|
263 // (*pCQ) + CQ2 + carry
|
|
264 __add_carry_out ((*pCQ).w[0], carry64, CQ2.w[0], (*pCQ).w[0]);
|
|
265 (*pCQ).w[1] = (*pCQ).w[1] + CQ2.w[1] + carry64;
|
|
266
|
|
267
|
|
268 }
|
|
269 #else
|
|
270
|
|
271 __BID_INLINE__ void
|
|
272 __div_128_by_128 (UINT128 * pCQ, UINT128 * pCR, UINT128 CX0, UINT128 CY) {
|
|
273 UINT128 CY36, CY51, CQ, A2, CX, CQT;
|
|
274 UINT64 Q;
|
|
275 int_double t64, d49, d60;
|
|
276 double lx, ly, lq;
|
|
277
|
|
278 if (!CX0.w[1] && !CY.w[1]) {
|
|
279 pCQ->w[0] = CX0.w[0] / CY.w[0];
|
|
280 pCQ->w[1] = 0;
|
|
281 pCR->w[1] = pCR->w[0] = 0;
|
|
282 pCR->w[0] = CX0.w[0] - pCQ->w[0] * CY.w[0];
|
|
283 return;
|
|
284 }
|
|
285
|
|
286 CX.w[1] = CX0.w[1];
|
|
287 CX.w[0] = CX0.w[0];
|
|
288
|
|
289 // 2^64
|
|
290 t64.i = 0x43f0000000000000ull;
|
|
291 lx = (double) CX.w[1] * t64.d + (double) CX.w[0];
|
|
292 ly = (double) CY.w[1] * t64.d + (double) CY.w[0];
|
|
293 lq = lx / ly;
|
|
294
|
|
295 CY36.w[1] = CY.w[0] >> (64 - 36);
|
|
296 CY36.w[0] = CY.w[0] << 36;
|
|
297
|
|
298 CQ.w[1] = CQ.w[0] = 0;
|
|
299
|
|
300 // Q >= 2^100 ?
|
|
301 if (!CY.w[1] && !CY36.w[1] && (CX.w[1] >= CY36.w[0])) {
|
|
302 // then Q >= 2^100
|
|
303
|
|
304 // 2^(-60)*CX/CY
|
|
305 d60.i = 0x3c30000000000000ull;
|
|
306 lq *= d60.d;
|
|
307 Q = (UINT64) lq - 4ull;
|
|
308
|
|
309 // Q*CY
|
|
310 __mul_64x64_to_128 (A2, Q, CY.w[0]);
|
|
311
|
|
312 // A2 <<= 60
|
|
313 A2.w[1] = (A2.w[1] << 60) | (A2.w[0] >> (64 - 60));
|
|
314 A2.w[0] <<= 60;
|
|
315
|
|
316 __sub_128_128 (CX, CX, A2);
|
|
317
|
|
318 lx = (double) CX.w[1] * t64.d + (double) CX.w[0];
|
|
319 lq = lx / ly;
|
|
320
|
|
321 CQ.w[1] = Q >> (64 - 60);
|
|
322 CQ.w[0] = Q << 60;
|
|
323 }
|
|
324
|
|
325
|
|
326 CY51.w[1] = (CY.w[1] << 51) | (CY.w[0] >> (64 - 51));
|
|
327 CY51.w[0] = CY.w[0] << 51;
|
|
328
|
|
329 if (CY.w[1] < (UINT64) (1 << (64 - 51))
|
|
330 && (__unsigned_compare_gt_128 (CX, CY51))) {
|
|
331 // Q > 2^51
|
|
332
|
|
333 // 2^(-49)*CX/CY
|
|
334 d49.i = 0x3ce0000000000000ull;
|
|
335 lq *= d49.d;
|
|
336
|
|
337 Q = (UINT64) lq - 1ull;
|
|
338
|
|
339 // Q*CY
|
|
340 __mul_64x64_to_128 (A2, Q, CY.w[0]);
|
|
341 A2.w[1] += Q * CY.w[1];
|
|
342
|
|
343 // A2 <<= 49
|
|
344 A2.w[1] = (A2.w[1] << 49) | (A2.w[0] >> (64 - 49));
|
|
345 A2.w[0] <<= 49;
|
|
346
|
|
347 __sub_128_128 (CX, CX, A2);
|
|
348
|
|
349 CQT.w[1] = Q >> (64 - 49);
|
|
350 CQT.w[0] = Q << 49;
|
|
351 __add_128_128 (CQ, CQ, CQT);
|
|
352
|
|
353 lx = (double) CX.w[1] * t64.d + (double) CX.w[0];
|
|
354 lq = lx / ly;
|
|
355 }
|
|
356
|
|
357 Q = (UINT64) lq;
|
|
358
|
|
359 __mul_64x64_to_128 (A2, Q, CY.w[0]);
|
|
360 A2.w[1] += Q * CY.w[1];
|
|
361
|
|
362 __sub_128_128 (CX, CX, A2);
|
|
363 if ((SINT64) CX.w[1] < 0) {
|
|
364 Q--;
|
|
365 CX.w[0] += CY.w[0];
|
|
366 if (CX.w[0] < CY.w[0])
|
|
367 CX.w[1]++;
|
|
368 CX.w[1] += CY.w[1];
|
|
369 if ((SINT64) CX.w[1] < 0) {
|
|
370 Q--;
|
|
371 CX.w[0] += CY.w[0];
|
|
372 if (CX.w[0] < CY.w[0])
|
|
373 CX.w[1]++;
|
|
374 CX.w[1] += CY.w[1];
|
|
375 }
|
|
376 } else if (__unsigned_compare_ge_128 (CX, CY)) {
|
|
377 Q++;
|
|
378 __sub_128_128 (CX, CX, CY);
|
|
379 }
|
|
380
|
|
381 __add_128_64 (CQ, CQ, Q);
|
|
382
|
|
383
|
|
384 pCQ->w[1] = CQ.w[1];
|
|
385 pCQ->w[0] = CQ.w[0];
|
|
386 pCR->w[1] = CX.w[1];
|
|
387 pCR->w[0] = CX.w[0];
|
|
388 return;
|
|
389 }
|
|
390
|
|
391
|
|
392 __BID_INLINE__ void
|
|
393 __div_256_by_128 (UINT128 * pCQ, UINT256 * pCA4, UINT128 CY) {
|
|
394 UINT256 CA4, CA2, CY51, CY36;
|
|
395 UINT128 CQ, A2, A2h, CQT;
|
|
396 UINT64 Q, carry64;
|
|
397 int_double t64, d49, d60;
|
|
398 double lx, ly, lq, d128, d192;
|
|
399
|
|
400 // the quotient is assumed to be at most 113 bits,
|
|
401 // as needed by BID128 divide routines
|
|
402
|
|
403 // initial dividend
|
|
404 CA4.w[3] = (*pCA4).w[3];
|
|
405 CA4.w[2] = (*pCA4).w[2];
|
|
406 CA4.w[1] = (*pCA4).w[1];
|
|
407 CA4.w[0] = (*pCA4).w[0];
|
|
408 CQ.w[1] = (*pCQ).w[1];
|
|
409 CQ.w[0] = (*pCQ).w[0];
|
|
410
|
|
411 // 2^64
|
|
412 t64.i = 0x43f0000000000000ull;
|
|
413 d128 = t64.d * t64.d;
|
|
414 d192 = d128 * t64.d;
|
|
415 lx = (double) CA4.w[3] * d192 + ((double) CA4.w[2] * d128 +
|
|
416 ((double) CA4.w[1] * t64.d +
|
|
417 (double) CA4.w[0]));
|
|
418 ly = (double) CY.w[1] * t64.d + (double) CY.w[0];
|
|
419 lq = lx / ly;
|
|
420
|
|
421 CY36.w[2] = CY.w[1] >> (64 - 36);
|
|
422 CY36.w[1] = (CY.w[1] << 36) | (CY.w[0] >> (64 - 36));
|
|
423 CY36.w[0] = CY.w[0] << 36;
|
|
424
|
|
425 CQ.w[1] = (*pCQ).w[1];
|
|
426 CQ.w[0] = (*pCQ).w[0];
|
|
427
|
|
428 // Q >= 2^100 ?
|
|
429 if (CA4.w[3] > CY36.w[2]
|
|
430 || (CA4.w[3] == CY36.w[2]
|
|
431 && (CA4.w[2] > CY36.w[1]
|
|
432 || (CA4.w[2] == CY36.w[1] && CA4.w[1] >= CY36.w[0])))) {
|
|
433 // 2^(-60)*CA4/CY
|
|
434 d60.i = 0x3c30000000000000ull;
|
|
435 lq *= d60.d;
|
|
436 Q = (UINT64) lq - 4ull;
|
|
437
|
|
438 // Q*CY
|
|
439 __mul_64x128_to_192 (CA2, Q, CY);
|
|
440
|
|
441 // CA2 <<= 60
|
|
442 // CA2.w[3] = CA2.w[2] >> (64-60);
|
|
443 CA2.w[2] = (CA2.w[2] << 60) | (CA2.w[1] >> (64 - 60));
|
|
444 CA2.w[1] = (CA2.w[1] << 60) | (CA2.w[0] >> (64 - 60));
|
|
445 CA2.w[0] <<= 60;
|
|
446
|
|
447 // CA4 -= CA2
|
|
448 __sub_borrow_out (CA4.w[0], carry64, CA4.w[0], CA2.w[0]);
|
|
449 __sub_borrow_in_out (CA4.w[1], carry64, CA4.w[1], CA2.w[1],
|
|
450 carry64);
|
|
451 CA4.w[2] = CA4.w[2] - CA2.w[2] - carry64;
|
|
452
|
|
453 lx = ((double) CA4.w[2] * d128 +
|
|
454 ((double) CA4.w[1] * t64.d + (double) CA4.w[0]));
|
|
455 lq = lx / ly;
|
|
456
|
|
457 CQT.w[1] = Q >> (64 - 60);
|
|
458 CQT.w[0] = Q << 60;
|
|
459 __add_128_128 (CQ, CQ, CQT);
|
|
460 }
|
|
461
|
|
462 CY51.w[2] = CY.w[1] >> (64 - 51);
|
|
463 CY51.w[1] = (CY.w[1] << 51) | (CY.w[0] >> (64 - 51));
|
|
464 CY51.w[0] = CY.w[0] << 51;
|
|
465
|
|
466 if (CA4.w[2] > CY51.w[2] || ((CA4.w[2] == CY51.w[2])
|
|
467 &&
|
|
468 (__unsigned_compare_gt_128 (CA4, CY51))))
|
|
469 {
|
|
470 // Q > 2^51
|
|
471
|
|
472 // 2^(-49)*CA4/CY
|
|
473 d49.i = 0x3ce0000000000000ull;
|
|
474 lq *= d49.d;
|
|
475
|
|
476 Q = (UINT64) lq - 1ull;
|
|
477
|
|
478 // Q*CY
|
|
479 __mul_64x64_to_128 (A2, Q, CY.w[0]);
|
|
480 __mul_64x64_to_128 (A2h, Q, CY.w[1]);
|
|
481 A2.w[1] += A2h.w[0];
|
|
482 if (A2.w[1] < A2h.w[0])
|
|
483 A2h.w[1]++;
|
|
484
|
|
485 // A2 <<= 49
|
|
486 CA2.w[2] = (A2h.w[1] << 49) | (A2.w[1] >> (64 - 49));
|
|
487 CA2.w[1] = (A2.w[1] << 49) | (A2.w[0] >> (64 - 49));
|
|
488 CA2.w[0] = A2.w[0] << 49;
|
|
489
|
|
490 __sub_borrow_out (CA4.w[0], carry64, CA4.w[0], CA2.w[0]);
|
|
491 __sub_borrow_in_out (CA4.w[1], carry64, CA4.w[1], CA2.w[1],
|
|
492 carry64);
|
|
493 CA4.w[2] = CA4.w[2] - CA2.w[2] - carry64;
|
|
494
|
|
495 CQT.w[1] = Q >> (64 - 49);
|
|
496 CQT.w[0] = Q << 49;
|
|
497 __add_128_128 (CQ, CQ, CQT);
|
|
498
|
|
499 lx = ((double) CA4.w[2] * d128 +
|
|
500 ((double) CA4.w[1] * t64.d + (double) CA4.w[0]));
|
|
501 lq = lx / ly;
|
|
502 }
|
|
503
|
|
504 Q = (UINT64) lq;
|
|
505 __mul_64x64_to_128 (A2, Q, CY.w[0]);
|
|
506 A2.w[1] += Q * CY.w[1];
|
|
507
|
|
508 __sub_128_128 (CA4, CA4, A2);
|
|
509 if ((SINT64) CA4.w[1] < 0) {
|
|
510 Q--;
|
|
511 CA4.w[0] += CY.w[0];
|
|
512 if (CA4.w[0] < CY.w[0])
|
|
513 CA4.w[1]++;
|
|
514 CA4.w[1] += CY.w[1];
|
|
515 if ((SINT64) CA4.w[1] < 0) {
|
|
516 Q--;
|
|
517 CA4.w[0] += CY.w[0];
|
|
518 if (CA4.w[0] < CY.w[0])
|
|
519 CA4.w[1]++;
|
|
520 CA4.w[1] += CY.w[1];
|
|
521 }
|
|
522 } else if (__unsigned_compare_ge_128 (CA4, CY)) {
|
|
523 Q++;
|
|
524 __sub_128_128 (CA4, CA4, CY);
|
|
525 }
|
|
526
|
|
527 __add_128_64 (CQ, CQ, Q);
|
|
528
|
|
529 pCQ->w[1] = CQ.w[1];
|
|
530 pCQ->w[0] = CQ.w[0];
|
|
531 pCA4->w[1] = CA4.w[1];
|
|
532 pCA4->w[0] = CA4.w[0];
|
|
533 return;
|
|
534
|
|
535
|
|
536
|
|
537 }
|
|
538
|
|
539 #endif
|
|
540 #endif
|