Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/soft-fp/op-4.h @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Software floating-point emulation. | |
2 Basic four-word fraction declaration and manipulation. | |
3 Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc. | |
4 This file is part of the GNU C Library. | |
5 Contributed by Richard Henderson (rth@cygnus.com), | |
6 Jakub Jelinek (jj@ultra.linux.cz), | |
7 David S. Miller (davem@redhat.com) and | |
8 Peter Maydell (pmaydell@chiark.greenend.org.uk). | |
9 | |
10 The GNU C Library is free software; you can redistribute it and/or | |
11 modify it under the terms of the GNU Lesser General Public | |
12 License as published by the Free Software Foundation; either | |
13 version 2.1 of the License, or (at your option) any later version. | |
14 | |
15 In addition to the permissions in the GNU Lesser General Public | |
16 License, the Free Software Foundation gives you unlimited | |
17 permission to link the compiled version of this file into | |
18 combinations with other programs, and to distribute those | |
19 combinations without any restriction coming from the use of this | |
20 file. (The Lesser General Public License restrictions do apply in | |
21 other respects; for example, they cover modification of the file, | |
22 and distribution when not linked into a combine executable.) | |
23 | |
24 The GNU C Library is distributed in the hope that it will be useful, | |
25 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
27 Lesser General Public License for more details. | |
28 | |
29 You should have received a copy of the GNU Lesser General Public | |
30 License along with the GNU C Library; if not, write to the Free | |
31 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, | |
32 MA 02110-1301, USA. */ | |
33 | |
34 #define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4] | |
35 #define _FP_FRAC_COPY_4(D,S) \ | |
36 (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \ | |
37 D##_f[2] = S##_f[2], D##_f[3] = S##_f[3]) | |
38 #define _FP_FRAC_SET_4(X,I) __FP_FRAC_SET_4(X, I) | |
39 #define _FP_FRAC_HIGH_4(X) (X##_f[3]) | |
40 #define _FP_FRAC_LOW_4(X) (X##_f[0]) | |
41 #define _FP_FRAC_WORD_4(X,w) (X##_f[w]) | |
42 | |
43 #define _FP_FRAC_SLL_4(X,N) \ | |
44 do { \ | |
45 _FP_I_TYPE _up, _down, _skip, _i; \ | |
46 _skip = (N) / _FP_W_TYPE_SIZE; \ | |
47 _up = (N) % _FP_W_TYPE_SIZE; \ | |
48 _down = _FP_W_TYPE_SIZE - _up; \ | |
49 if (!_up) \ | |
50 for (_i = 3; _i >= _skip; --_i) \ | |
51 X##_f[_i] = X##_f[_i-_skip]; \ | |
52 else \ | |
53 { \ | |
54 for (_i = 3; _i > _skip; --_i) \ | |
55 X##_f[_i] = X##_f[_i-_skip] << _up \ | |
56 | X##_f[_i-_skip-1] >> _down; \ | |
57 X##_f[_i--] = X##_f[0] << _up; \ | |
58 } \ | |
59 for (; _i >= 0; --_i) \ | |
60 X##_f[_i] = 0; \ | |
61 } while (0) | |
62 | |
63 /* This one was broken too */ | |
64 #define _FP_FRAC_SRL_4(X,N) \ | |
65 do { \ | |
66 _FP_I_TYPE _up, _down, _skip, _i; \ | |
67 _skip = (N) / _FP_W_TYPE_SIZE; \ | |
68 _down = (N) % _FP_W_TYPE_SIZE; \ | |
69 _up = _FP_W_TYPE_SIZE - _down; \ | |
70 if (!_down) \ | |
71 for (_i = 0; _i <= 3-_skip; ++_i) \ | |
72 X##_f[_i] = X##_f[_i+_skip]; \ | |
73 else \ | |
74 { \ | |
75 for (_i = 0; _i < 3-_skip; ++_i) \ | |
76 X##_f[_i] = X##_f[_i+_skip] >> _down \ | |
77 | X##_f[_i+_skip+1] << _up; \ | |
78 X##_f[_i++] = X##_f[3] >> _down; \ | |
79 } \ | |
80 for (; _i < 4; ++_i) \ | |
81 X##_f[_i] = 0; \ | |
82 } while (0) | |
83 | |
84 | |
85 /* Right shift with sticky-lsb. | |
86 * What this actually means is that we do a standard right-shift, | |
87 * but that if any of the bits that fall off the right hand side | |
88 * were one then we always set the LSbit. | |
89 */ | |
90 #define _FP_FRAC_SRST_4(X,S,N,size) \ | |
91 do { \ | |
92 _FP_I_TYPE _up, _down, _skip, _i; \ | |
93 _FP_W_TYPE _s; \ | |
94 _skip = (N) / _FP_W_TYPE_SIZE; \ | |
95 _down = (N) % _FP_W_TYPE_SIZE; \ | |
96 _up = _FP_W_TYPE_SIZE - _down; \ | |
97 for (_s = _i = 0; _i < _skip; ++_i) \ | |
98 _s |= X##_f[_i]; \ | |
99 if (!_down) \ | |
100 for (_i = 0; _i <= 3-_skip; ++_i) \ | |
101 X##_f[_i] = X##_f[_i+_skip]; \ | |
102 else \ | |
103 { \ | |
104 _s |= X##_f[_i] << _up; \ | |
105 for (_i = 0; _i < 3-_skip; ++_i) \ | |
106 X##_f[_i] = X##_f[_i+_skip] >> _down \ | |
107 | X##_f[_i+_skip+1] << _up; \ | |
108 X##_f[_i++] = X##_f[3] >> _down; \ | |
109 } \ | |
110 for (; _i < 4; ++_i) \ | |
111 X##_f[_i] = 0; \ | |
112 S = (_s != 0); \ | |
113 } while (0) | |
114 | |
115 #define _FP_FRAC_SRS_4(X,N,size) \ | |
116 do { \ | |
117 int _sticky; \ | |
118 _FP_FRAC_SRST_4(X, _sticky, N, size); \ | |
119 X##_f[0] |= _sticky; \ | |
120 } while (0) | |
121 | |
122 #define _FP_FRAC_ADD_4(R,X,Y) \ | |
123 __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \ | |
124 X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ | |
125 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) | |
126 | |
127 #define _FP_FRAC_SUB_4(R,X,Y) \ | |
128 __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \ | |
129 X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ | |
130 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) | |
131 | |
132 #define _FP_FRAC_DEC_4(X,Y) \ | |
133 __FP_FRAC_DEC_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ | |
134 Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) | |
135 | |
136 #define _FP_FRAC_ADDI_4(X,I) \ | |
137 __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I) | |
138 | |
139 #define _FP_ZEROFRAC_4 0,0,0,0 | |
140 #define _FP_MINFRAC_4 0,0,0,1 | |
141 #define _FP_MAXFRAC_4 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0) | |
142 | |
143 #define _FP_FRAC_ZEROP_4(X) ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0) | |
144 #define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE)X##_f[3] < 0) | |
145 #define _FP_FRAC_OVERP_4(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs) | |
146 #define _FP_FRAC_CLEAR_OVERP_4(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs) | |
147 | |
148 #define _FP_FRAC_EQ_4(X,Y) \ | |
149 (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \ | |
150 && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3]) | |
151 | |
152 #define _FP_FRAC_GT_4(X,Y) \ | |
153 (X##_f[3] > Y##_f[3] || \ | |
154 (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \ | |
155 (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \ | |
156 (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0]) \ | |
157 )) \ | |
158 )) \ | |
159 ) | |
160 | |
161 #define _FP_FRAC_GE_4(X,Y) \ | |
162 (X##_f[3] > Y##_f[3] || \ | |
163 (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \ | |
164 (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \ | |
165 (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0]) \ | |
166 )) \ | |
167 )) \ | |
168 ) | |
169 | |
170 | |
171 #define _FP_FRAC_CLZ_4(R,X) \ | |
172 do { \ | |
173 if (X##_f[3]) \ | |
174 { \ | |
175 __FP_CLZ(R,X##_f[3]); \ | |
176 } \ | |
177 else if (X##_f[2]) \ | |
178 { \ | |
179 __FP_CLZ(R,X##_f[2]); \ | |
180 R += _FP_W_TYPE_SIZE; \ | |
181 } \ | |
182 else if (X##_f[1]) \ | |
183 { \ | |
184 __FP_CLZ(R,X##_f[1]); \ | |
185 R += _FP_W_TYPE_SIZE*2; \ | |
186 } \ | |
187 else \ | |
188 { \ | |
189 __FP_CLZ(R,X##_f[0]); \ | |
190 R += _FP_W_TYPE_SIZE*3; \ | |
191 } \ | |
192 } while(0) | |
193 | |
194 | |
195 #define _FP_UNPACK_RAW_4(fs, X, val) \ | |
196 do { \ | |
197 union _FP_UNION_##fs _flo; _flo.flt = (val); \ | |
198 X##_f[0] = _flo.bits.frac0; \ | |
199 X##_f[1] = _flo.bits.frac1; \ | |
200 X##_f[2] = _flo.bits.frac2; \ | |
201 X##_f[3] = _flo.bits.frac3; \ | |
202 X##_e = _flo.bits.exp; \ | |
203 X##_s = _flo.bits.sign; \ | |
204 } while (0) | |
205 | |
206 #define _FP_UNPACK_RAW_4_P(fs, X, val) \ | |
207 do { \ | |
208 union _FP_UNION_##fs *_flo = \ | |
209 (union _FP_UNION_##fs *)(val); \ | |
210 \ | |
211 X##_f[0] = _flo->bits.frac0; \ | |
212 X##_f[1] = _flo->bits.frac1; \ | |
213 X##_f[2] = _flo->bits.frac2; \ | |
214 X##_f[3] = _flo->bits.frac3; \ | |
215 X##_e = _flo->bits.exp; \ | |
216 X##_s = _flo->bits.sign; \ | |
217 } while (0) | |
218 | |
219 #define _FP_PACK_RAW_4(fs, val, X) \ | |
220 do { \ | |
221 union _FP_UNION_##fs _flo; \ | |
222 _flo.bits.frac0 = X##_f[0]; \ | |
223 _flo.bits.frac1 = X##_f[1]; \ | |
224 _flo.bits.frac2 = X##_f[2]; \ | |
225 _flo.bits.frac3 = X##_f[3]; \ | |
226 _flo.bits.exp = X##_e; \ | |
227 _flo.bits.sign = X##_s; \ | |
228 (val) = _flo.flt; \ | |
229 } while (0) | |
230 | |
231 #define _FP_PACK_RAW_4_P(fs, val, X) \ | |
232 do { \ | |
233 union _FP_UNION_##fs *_flo = \ | |
234 (union _FP_UNION_##fs *)(val); \ | |
235 \ | |
236 _flo->bits.frac0 = X##_f[0]; \ | |
237 _flo->bits.frac1 = X##_f[1]; \ | |
238 _flo->bits.frac2 = X##_f[2]; \ | |
239 _flo->bits.frac3 = X##_f[3]; \ | |
240 _flo->bits.exp = X##_e; \ | |
241 _flo->bits.sign = X##_s; \ | |
242 } while (0) | |
243 | |
244 /* | |
245 * Multiplication algorithms: | |
246 */ | |
247 | |
248 /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ | |
249 | |
250 #define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit) \ | |
251 do { \ | |
252 _FP_FRAC_DECL_8(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \ | |
253 _FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f); \ | |
254 \ | |
255 doit(_FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0), X##_f[0], Y##_f[0]); \ | |
256 doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]); \ | |
257 doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]); \ | |
258 doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]); \ | |
259 doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]); \ | |
260 doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]); \ | |
261 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2), \ | |
262 _FP_FRAC_WORD_8(_z,1), 0,_b_f1,_b_f0, \ | |
263 0,0,_FP_FRAC_WORD_8(_z,1)); \ | |
264 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2), \ | |
265 _FP_FRAC_WORD_8(_z,1), 0,_c_f1,_c_f0, \ | |
266 _FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2), \ | |
267 _FP_FRAC_WORD_8(_z,1)); \ | |
268 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ | |
269 _FP_FRAC_WORD_8(_z,2), 0,_d_f1,_d_f0, \ | |
270 0,_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2)); \ | |
271 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ | |
272 _FP_FRAC_WORD_8(_z,2), 0,_e_f1,_e_f0, \ | |
273 _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ | |
274 _FP_FRAC_WORD_8(_z,2)); \ | |
275 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ | |
276 _FP_FRAC_WORD_8(_z,2), 0,_f_f1,_f_f0, \ | |
277 _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ | |
278 _FP_FRAC_WORD_8(_z,2)); \ | |
279 doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]); \ | |
280 doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]); \ | |
281 doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]); \ | |
282 doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]); \ | |
283 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ | |
284 _FP_FRAC_WORD_8(_z,3), 0,_b_f1,_b_f0, \ | |
285 0,_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3)); \ | |
286 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ | |
287 _FP_FRAC_WORD_8(_z,3), 0,_c_f1,_c_f0, \ | |
288 _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ | |
289 _FP_FRAC_WORD_8(_z,3)); \ | |
290 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ | |
291 _FP_FRAC_WORD_8(_z,3), 0,_d_f1,_d_f0, \ | |
292 _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ | |
293 _FP_FRAC_WORD_8(_z,3)); \ | |
294 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ | |
295 _FP_FRAC_WORD_8(_z,3), 0,_e_f1,_e_f0, \ | |
296 _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ | |
297 _FP_FRAC_WORD_8(_z,3)); \ | |
298 doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]); \ | |
299 doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]); \ | |
300 doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]); \ | |
301 doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]); \ | |
302 doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]); \ | |
303 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ | |
304 _FP_FRAC_WORD_8(_z,4), 0,_b_f1,_b_f0, \ | |
305 0,_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4)); \ | |
306 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ | |
307 _FP_FRAC_WORD_8(_z,4), 0,_c_f1,_c_f0, \ | |
308 _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ | |
309 _FP_FRAC_WORD_8(_z,4)); \ | |
310 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ | |
311 _FP_FRAC_WORD_8(_z,4), 0,_d_f1,_d_f0, \ | |
312 _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ | |
313 _FP_FRAC_WORD_8(_z,4)); \ | |
314 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ | |
315 _FP_FRAC_WORD_8(_z,5), 0,_e_f1,_e_f0, \ | |
316 0,_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5)); \ | |
317 __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ | |
318 _FP_FRAC_WORD_8(_z,5), 0,_f_f1,_f_f0, \ | |
319 _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ | |
320 _FP_FRAC_WORD_8(_z,5)); \ | |
321 doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]); \ | |
322 __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ | |
323 _b_f1,_b_f0, \ | |
324 _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6)); \ | |
325 \ | |
326 /* Normalize since we know where the msb of the multiplicands \ | |
327 were (bit B), we know that the msb of the of the product is \ | |
328 at either 2B or 2B-1. */ \ | |
329 _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits); \ | |
330 __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2), \ | |
331 _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0)); \ | |
332 } while (0) | |
333 | |
334 #define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y) \ | |
335 do { \ | |
336 _FP_FRAC_DECL_8(_z); \ | |
337 \ | |
338 mpn_mul_n(_z_f, _x_f, _y_f, 4); \ | |
339 \ | |
340 /* Normalize since we know where the msb of the multiplicands \ | |
341 were (bit B), we know that the msb of the of the product is \ | |
342 at either 2B or 2B-1. */ \ | |
343 _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits); \ | |
344 __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2), \ | |
345 _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0)); \ | |
346 } while (0) | |
347 | |
348 /* | |
349 * Helper utility for _FP_DIV_MEAT_4_udiv: | |
350 * pppp = m * nnn | |
351 */ | |
352 #define umul_ppppmnnn(p3,p2,p1,p0,m,n2,n1,n0) \ | |
353 do { \ | |
354 UWtype _t; \ | |
355 umul_ppmm(p1,p0,m,n0); \ | |
356 umul_ppmm(p2,_t,m,n1); \ | |
357 __FP_FRAC_ADDI_2(p2,p1,_t); \ | |
358 umul_ppmm(p3,_t,m,n2); \ | |
359 __FP_FRAC_ADDI_2(p3,p2,_t); \ | |
360 } while (0) | |
361 | |
362 /* | |
363 * Division algorithms: | |
364 */ | |
365 | |
366 #define _FP_DIV_MEAT_4_udiv(fs, R, X, Y) \ | |
367 do { \ | |
368 int _i; \ | |
369 _FP_FRAC_DECL_4(_n); _FP_FRAC_DECL_4(_m); \ | |
370 _FP_FRAC_SET_4(_n, _FP_ZEROFRAC_4); \ | |
371 if (_FP_FRAC_GT_4(X, Y)) \ | |
372 { \ | |
373 _n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1); \ | |
374 _FP_FRAC_SRL_4(X, 1); \ | |
375 } \ | |
376 else \ | |
377 R##_e--; \ | |
378 \ | |
379 /* Normalize, i.e. make the most significant bit of the \ | |
380 denominator set. */ \ | |
381 _FP_FRAC_SLL_4(Y, _FP_WFRACXBITS_##fs); \ | |
382 \ | |
383 for (_i = 3; ; _i--) \ | |
384 { \ | |
385 if (X##_f[3] == Y##_f[3]) \ | |
386 { \ | |
387 /* This is a special case, not an optimization \ | |
388 (X##_f[3]/Y##_f[3] would not fit into UWtype). \ | |
389 As X## is guaranteed to be < Y, R##_f[_i] can be either \ | |
390 (UWtype)-1 or (UWtype)-2. */ \ | |
391 R##_f[_i] = -1; \ | |
392 if (!_i) \ | |
393 break; \ | |
394 __FP_FRAC_SUB_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ | |
395 Y##_f[2], Y##_f[1], Y##_f[0], 0, \ | |
396 X##_f[2], X##_f[1], X##_f[0], _n_f[_i]); \ | |
397 _FP_FRAC_SUB_4(X, Y, X); \ | |
398 if (X##_f[3] > Y##_f[3]) \ | |
399 { \ | |
400 R##_f[_i] = -2; \ | |
401 _FP_FRAC_ADD_4(X, Y, X); \ | |
402 } \ | |
403 } \ | |
404 else \ | |
405 { \ | |
406 udiv_qrnnd(R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \ | |
407 umul_ppppmnnn(_m_f[3], _m_f[2], _m_f[1], _m_f[0], \ | |
408 R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]); \ | |
409 X##_f[2] = X##_f[1]; \ | |
410 X##_f[1] = X##_f[0]; \ | |
411 X##_f[0] = _n_f[_i]; \ | |
412 if (_FP_FRAC_GT_4(_m, X)) \ | |
413 { \ | |
414 R##_f[_i]--; \ | |
415 _FP_FRAC_ADD_4(X, Y, X); \ | |
416 if (_FP_FRAC_GE_4(X, Y) && _FP_FRAC_GT_4(_m, X)) \ | |
417 { \ | |
418 R##_f[_i]--; \ | |
419 _FP_FRAC_ADD_4(X, Y, X); \ | |
420 } \ | |
421 } \ | |
422 _FP_FRAC_DEC_4(X, _m); \ | |
423 if (!_i) \ | |
424 { \ | |
425 if (!_FP_FRAC_EQ_4(X, _m)) \ | |
426 R##_f[0] |= _FP_WORK_STICKY; \ | |
427 break; \ | |
428 } \ | |
429 } \ | |
430 } \ | |
431 } while (0) | |
432 | |
433 | |
434 /* | |
435 * Square root algorithms: | |
436 * We have just one right now, maybe Newton approximation | |
437 * should be added for those machines where division is fast. | |
438 */ | |
439 | |
440 #define _FP_SQRT_MEAT_4(R, S, T, X, q) \ | |
441 do { \ | |
442 while (q) \ | |
443 { \ | |
444 T##_f[3] = S##_f[3] + q; \ | |
445 if (T##_f[3] <= X##_f[3]) \ | |
446 { \ | |
447 S##_f[3] = T##_f[3] + q; \ | |
448 X##_f[3] -= T##_f[3]; \ | |
449 R##_f[3] += q; \ | |
450 } \ | |
451 _FP_FRAC_SLL_4(X, 1); \ | |
452 q >>= 1; \ | |
453 } \ | |
454 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ | |
455 while (q) \ | |
456 { \ | |
457 T##_f[2] = S##_f[2] + q; \ | |
458 T##_f[3] = S##_f[3]; \ | |
459 if (T##_f[3] < X##_f[3] || \ | |
460 (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2])) \ | |
461 { \ | |
462 S##_f[2] = T##_f[2] + q; \ | |
463 S##_f[3] += (T##_f[2] > S##_f[2]); \ | |
464 __FP_FRAC_DEC_2(X##_f[3], X##_f[2], \ | |
465 T##_f[3], T##_f[2]); \ | |
466 R##_f[2] += q; \ | |
467 } \ | |
468 _FP_FRAC_SLL_4(X, 1); \ | |
469 q >>= 1; \ | |
470 } \ | |
471 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ | |
472 while (q) \ | |
473 { \ | |
474 T##_f[1] = S##_f[1] + q; \ | |
475 T##_f[2] = S##_f[2]; \ | |
476 T##_f[3] = S##_f[3]; \ | |
477 if (T##_f[3] < X##_f[3] || \ | |
478 (T##_f[3] == X##_f[3] && (T##_f[2] < X##_f[2] || \ | |
479 (T##_f[2] == X##_f[2] && T##_f[1] <= X##_f[1])))) \ | |
480 { \ | |
481 S##_f[1] = T##_f[1] + q; \ | |
482 S##_f[2] += (T##_f[1] > S##_f[1]); \ | |
483 S##_f[3] += (T##_f[2] > S##_f[2]); \ | |
484 __FP_FRAC_DEC_3(X##_f[3], X##_f[2], X##_f[1], \ | |
485 T##_f[3], T##_f[2], T##_f[1]); \ | |
486 R##_f[1] += q; \ | |
487 } \ | |
488 _FP_FRAC_SLL_4(X, 1); \ | |
489 q >>= 1; \ | |
490 } \ | |
491 q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ | |
492 while (q != _FP_WORK_ROUND) \ | |
493 { \ | |
494 T##_f[0] = S##_f[0] + q; \ | |
495 T##_f[1] = S##_f[1]; \ | |
496 T##_f[2] = S##_f[2]; \ | |
497 T##_f[3] = S##_f[3]; \ | |
498 if (_FP_FRAC_GE_4(X,T)) \ | |
499 { \ | |
500 S##_f[0] = T##_f[0] + q; \ | |
501 S##_f[1] += (T##_f[0] > S##_f[0]); \ | |
502 S##_f[2] += (T##_f[1] > S##_f[1]); \ | |
503 S##_f[3] += (T##_f[2] > S##_f[2]); \ | |
504 _FP_FRAC_DEC_4(X, T); \ | |
505 R##_f[0] += q; \ | |
506 } \ | |
507 _FP_FRAC_SLL_4(X, 1); \ | |
508 q >>= 1; \ | |
509 } \ | |
510 if (!_FP_FRAC_ZEROP_4(X)) \ | |
511 { \ | |
512 if (_FP_FRAC_GT_4(X,S)) \ | |
513 R##_f[0] |= _FP_WORK_ROUND; \ | |
514 R##_f[0] |= _FP_WORK_STICKY; \ | |
515 } \ | |
516 } while (0) | |
517 | |
518 | |
519 /* | |
520 * Internals | |
521 */ | |
522 | |
523 #define __FP_FRAC_SET_4(X,I3,I2,I1,I0) \ | |
524 (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0) | |
525 | |
526 #ifndef __FP_FRAC_ADD_3 | |
527 #define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \ | |
528 do { \ | |
529 _FP_W_TYPE _c1, _c2; \ | |
530 r0 = x0 + y0; \ | |
531 _c1 = r0 < x0; \ | |
532 r1 = x1 + y1; \ | |
533 _c2 = r1 < x1; \ | |
534 r1 += _c1; \ | |
535 _c2 |= r1 < _c1; \ | |
536 r2 = x2 + y2 + _c2; \ | |
537 } while (0) | |
538 #endif | |
539 | |
540 #ifndef __FP_FRAC_ADD_4 | |
541 #define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ | |
542 do { \ | |
543 _FP_W_TYPE _c1, _c2, _c3; \ | |
544 r0 = x0 + y0; \ | |
545 _c1 = r0 < x0; \ | |
546 r1 = x1 + y1; \ | |
547 _c2 = r1 < x1; \ | |
548 r1 += _c1; \ | |
549 _c2 |= r1 < _c1; \ | |
550 r2 = x2 + y2; \ | |
551 _c3 = r2 < x2; \ | |
552 r2 += _c2; \ | |
553 _c3 |= r2 < _c2; \ | |
554 r3 = x3 + y3 + _c3; \ | |
555 } while (0) | |
556 #endif | |
557 | |
558 #ifndef __FP_FRAC_SUB_3 | |
559 #define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \ | |
560 do { \ | |
561 _FP_W_TYPE _c1, _c2; \ | |
562 r0 = x0 - y0; \ | |
563 _c1 = r0 > x0; \ | |
564 r1 = x1 - y1; \ | |
565 _c2 = r1 > x1; \ | |
566 r1 -= _c1; \ | |
567 _c2 |= _c1 && (y1 == x1); \ | |
568 r2 = x2 - y2 - _c2; \ | |
569 } while (0) | |
570 #endif | |
571 | |
572 #ifndef __FP_FRAC_SUB_4 | |
573 #define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ | |
574 do { \ | |
575 _FP_W_TYPE _c1, _c2, _c3; \ | |
576 r0 = x0 - y0; \ | |
577 _c1 = r0 > x0; \ | |
578 r1 = x1 - y1; \ | |
579 _c2 = r1 > x1; \ | |
580 r1 -= _c1; \ | |
581 _c2 |= _c1 && (y1 == x1); \ | |
582 r2 = x2 - y2; \ | |
583 _c3 = r2 > x2; \ | |
584 r2 -= _c2; \ | |
585 _c3 |= _c2 && (y2 == x2); \ | |
586 r3 = x3 - y3 - _c3; \ | |
587 } while (0) | |
588 #endif | |
589 | |
590 #ifndef __FP_FRAC_DEC_3 | |
591 #define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) \ | |
592 do { \ | |
593 UWtype _t0, _t1, _t2; \ | |
594 _t0 = x0, _t1 = x1, _t2 = x2; \ | |
595 __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0); \ | |
596 } while (0) | |
597 #endif | |
598 | |
599 #ifndef __FP_FRAC_DEC_4 | |
600 #define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0) \ | |
601 do { \ | |
602 UWtype _t0, _t1, _t2, _t3; \ | |
603 _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3; \ | |
604 __FP_FRAC_SUB_4 (x3,x2,x1,x0,_t3,_t2,_t1,_t0, y3,y2,y1,y0); \ | |
605 } while (0) | |
606 #endif | |
607 | |
608 #ifndef __FP_FRAC_ADDI_4 | |
609 #define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \ | |
610 do { \ | |
611 UWtype _t; \ | |
612 _t = ((x0 += i) < i); \ | |
613 x1 += _t; _t = (x1 < _t); \ | |
614 x2 += _t; _t = (x2 < _t); \ | |
615 x3 += _t; \ | |
616 } while (0) | |
617 #endif | |
618 | |
619 /* Convert FP values between word sizes. This appears to be more | |
620 * complicated than I'd have expected it to be, so these might be | |
621 * wrong... These macros are in any case somewhat bogus because they | |
622 * use information about what various FRAC_n variables look like | |
623 * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do | |
624 * the ones in op-2.h and op-1.h. | |
625 */ | |
626 #define _FP_FRAC_COPY_1_4(D, S) (D##_f = S##_f[0]) | |
627 | |
628 #define _FP_FRAC_COPY_2_4(D, S) \ | |
629 do { \ | |
630 D##_f0 = S##_f[0]; \ | |
631 D##_f1 = S##_f[1]; \ | |
632 } while (0) | |
633 | |
634 /* Assembly/disassembly for converting to/from integral types. | |
635 * No shifting or overflow handled here. | |
636 */ | |
637 /* Put the FP value X into r, which is an integer of size rsize. */ | |
638 #define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \ | |
639 do { \ | |
640 if (rsize <= _FP_W_TYPE_SIZE) \ | |
641 r = X##_f[0]; \ | |
642 else if (rsize <= 2*_FP_W_TYPE_SIZE) \ | |
643 { \ | |
644 r = X##_f[1]; \ | |
645 r <<= _FP_W_TYPE_SIZE; \ | |
646 r += X##_f[0]; \ | |
647 } \ | |
648 else \ | |
649 { \ | |
650 /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \ | |
651 /* and int == 4words as a single case. */ \ | |
652 r = X##_f[3]; \ | |
653 r <<= _FP_W_TYPE_SIZE; \ | |
654 r += X##_f[2]; \ | |
655 r <<= _FP_W_TYPE_SIZE; \ | |
656 r += X##_f[1]; \ | |
657 r <<= _FP_W_TYPE_SIZE; \ | |
658 r += X##_f[0]; \ | |
659 } \ | |
660 } while (0) | |
661 | |
662 /* "No disassemble Number Five!" */ | |
663 /* move an integer of size rsize into X's fractional part. We rely on | |
664 * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid | |
665 * having to mask the values we store into it. | |
666 */ | |
667 #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \ | |
668 do { \ | |
669 X##_f[0] = r; \ | |
670 X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \ | |
671 X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \ | |
672 X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \ | |
673 } while (0); | |
674 | |
675 #define _FP_FRAC_COPY_4_1(D, S) \ | |
676 do { \ | |
677 D##_f[0] = S##_f; \ | |
678 D##_f[1] = D##_f[2] = D##_f[3] = 0; \ | |
679 } while (0) | |
680 | |
681 #define _FP_FRAC_COPY_4_2(D, S) \ | |
682 do { \ | |
683 D##_f[0] = S##_f0; \ | |
684 D##_f[1] = S##_f1; \ | |
685 D##_f[2] = D##_f[3] = 0; \ | |
686 } while (0) | |
687 | |
688 #define _FP_FRAC_COPY_4_4(D,S) _FP_FRAC_COPY_4(D,S) |