Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/sparc/lb1spc.asm @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* This is an assembly language implementation of mulsi3, divsi3, and modsi3 | |
2 for the sparc processor. | |
3 | |
4 These routines are derived from the SPARC Architecture Manual, version 8, | |
5 slightly edited to match the desired calling convention, and also to | |
6 optimize them for our purposes. */ | |
7 | |
8 #ifdef L_mulsi3 | |
9 .text | |
10 .align 4 | |
11 .global .umul | |
12 .proc 4 | |
13 .umul: | |
14 or %o0, %o1, %o4 ! logical or of multiplier and multiplicand | |
15 mov %o0, %y ! multiplier to Y register | |
16 andncc %o4, 0xfff, %o5 ! mask out lower 12 bits | |
17 be mul_shortway ! can do it the short way | |
18 andcc %g0, %g0, %o4 ! zero the partial product and clear NV cc | |
19 ! | |
20 ! long multiply | |
21 ! | |
22 mulscc %o4, %o1, %o4 ! first iteration of 33 | |
23 mulscc %o4, %o1, %o4 | |
24 mulscc %o4, %o1, %o4 | |
25 mulscc %o4, %o1, %o4 | |
26 mulscc %o4, %o1, %o4 | |
27 mulscc %o4, %o1, %o4 | |
28 mulscc %o4, %o1, %o4 | |
29 mulscc %o4, %o1, %o4 | |
30 mulscc %o4, %o1, %o4 | |
31 mulscc %o4, %o1, %o4 | |
32 mulscc %o4, %o1, %o4 | |
33 mulscc %o4, %o1, %o4 | |
34 mulscc %o4, %o1, %o4 | |
35 mulscc %o4, %o1, %o4 | |
36 mulscc %o4, %o1, %o4 | |
37 mulscc %o4, %o1, %o4 | |
38 mulscc %o4, %o1, %o4 | |
39 mulscc %o4, %o1, %o4 | |
40 mulscc %o4, %o1, %o4 | |
41 mulscc %o4, %o1, %o4 | |
42 mulscc %o4, %o1, %o4 | |
43 mulscc %o4, %o1, %o4 | |
44 mulscc %o4, %o1, %o4 | |
45 mulscc %o4, %o1, %o4 | |
46 mulscc %o4, %o1, %o4 | |
47 mulscc %o4, %o1, %o4 | |
48 mulscc %o4, %o1, %o4 | |
49 mulscc %o4, %o1, %o4 | |
50 mulscc %o4, %o1, %o4 | |
51 mulscc %o4, %o1, %o4 | |
52 mulscc %o4, %o1, %o4 | |
53 mulscc %o4, %o1, %o4 ! 32nd iteration | |
54 mulscc %o4, %g0, %o4 ! last iteration only shifts | |
55 ! the upper 32 bits of product are wrong, but we do not care | |
56 retl | |
57 rd %y, %o0 | |
58 ! | |
59 ! short multiply | |
60 ! | |
61 mul_shortway: | |
62 mulscc %o4, %o1, %o4 ! first iteration of 13 | |
63 mulscc %o4, %o1, %o4 | |
64 mulscc %o4, %o1, %o4 | |
65 mulscc %o4, %o1, %o4 | |
66 mulscc %o4, %o1, %o4 | |
67 mulscc %o4, %o1, %o4 | |
68 mulscc %o4, %o1, %o4 | |
69 mulscc %o4, %o1, %o4 | |
70 mulscc %o4, %o1, %o4 | |
71 mulscc %o4, %o1, %o4 | |
72 mulscc %o4, %o1, %o4 | |
73 mulscc %o4, %o1, %o4 ! 12th iteration | |
74 mulscc %o4, %g0, %o4 ! last iteration only shifts | |
75 rd %y, %o5 | |
76 sll %o4, 12, %o4 ! left shift partial product by 12 bits | |
77 srl %o5, 20, %o5 ! right shift partial product by 20 bits | |
78 retl | |
79 or %o5, %o4, %o0 ! merge for true product | |
80 #endif | |
81 | |
82 #ifdef L_divsi3 | |
83 /* | |
84 * Division and remainder, from Appendix E of the SPARC Version 8 | |
85 * Architecture Manual, with fixes from Gordon Irlam. | |
86 */ | |
87 | |
88 /* | |
89 * Input: dividend and divisor in %o0 and %o1 respectively. | |
90 * | |
91 * m4 parameters: | |
92 * .div name of function to generate | |
93 * div div=div => %o0 / %o1; div=rem => %o0 % %o1 | |
94 * true true=true => signed; true=false => unsigned | |
95 * | |
96 * Algorithm parameters: | |
97 * N how many bits per iteration we try to get (4) | |
98 * WORDSIZE total number of bits (32) | |
99 * | |
100 * Derived constants: | |
101 * TOPBITS number of bits in the top decade of a number | |
102 * | |
103 * Important variables: | |
104 * Q the partial quotient under development (initially 0) | |
105 * R the remainder so far, initially the dividend | |
106 * ITER number of main division loop iterations required; | |
107 * equal to ceil(log2(quotient) / N). Note that this | |
108 * is the log base (2^N) of the quotient. | |
109 * V the current comparand, initially divisor*2^(ITER*N-1) | |
110 * | |
111 * Cost: | |
112 * Current estimate for non-large dividend is | |
113 * ceil(log2(quotient) / N) * (10 + 7N/2) + C | |
114 * A large dividend is one greater than 2^(31-TOPBITS) and takes a | |
115 * different path, as the upper bits of the quotient must be developed | |
116 * one bit at a time. | |
117 */ | |
118 .global .udiv | |
119 .align 4 | |
120 .proc 4 | |
121 .text | |
122 .udiv: | |
123 b ready_to_divide | |
124 mov 0, %g3 ! result is always positive | |
125 | |
126 .global .div | |
127 .align 4 | |
128 .proc 4 | |
129 .text | |
130 .div: | |
131 ! compute sign of result; if neither is negative, no problem | |
132 orcc %o1, %o0, %g0 ! either negative? | |
133 bge ready_to_divide ! no, go do the divide | |
134 xor %o1, %o0, %g3 ! compute sign in any case | |
135 tst %o1 | |
136 bge 1f | |
137 tst %o0 | |
138 ! %o1 is definitely negative; %o0 might also be negative | |
139 bge ready_to_divide ! if %o0 not negative... | |
140 sub %g0, %o1, %o1 ! in any case, make %o1 nonneg | |
141 1: ! %o0 is negative, %o1 is nonnegative | |
142 sub %g0, %o0, %o0 ! make %o0 nonnegative | |
143 | |
144 | |
145 ready_to_divide: | |
146 | |
147 ! Ready to divide. Compute size of quotient; scale comparand. | |
148 orcc %o1, %g0, %o5 | |
149 bne 1f | |
150 mov %o0, %o3 | |
151 | |
152 ! Divide by zero trap. If it returns, return 0 (about as | |
153 ! wrong as possible, but that is what SunOS does...). | |
154 ta 0x2 ! ST_DIV0 | |
155 retl | |
156 clr %o0 | |
157 | |
158 1: | |
159 cmp %o3, %o5 ! if %o1 exceeds %o0, done | |
160 blu got_result ! (and algorithm fails otherwise) | |
161 clr %o2 | |
162 sethi %hi(1 << (32 - 4 - 1)), %g1 | |
163 cmp %o3, %g1 | |
164 blu not_really_big | |
165 clr %o4 | |
166 | |
167 ! Here the dividend is >= 2**(31-N) or so. We must be careful here, | |
168 ! as our usual N-at-a-shot divide step will cause overflow and havoc. | |
169 ! The number of bits in the result here is N*ITER+SC, where SC <= N. | |
170 ! Compute ITER in an unorthodox manner: know we need to shift V into | |
171 ! the top decade: so do not even bother to compare to R. | |
172 1: | |
173 cmp %o5, %g1 | |
174 bgeu 3f | |
175 mov 1, %g2 | |
176 sll %o5, 4, %o5 | |
177 b 1b | |
178 add %o4, 1, %o4 | |
179 | |
180 ! Now compute %g2. | |
181 2: addcc %o5, %o5, %o5 | |
182 bcc not_too_big | |
183 add %g2, 1, %g2 | |
184 | |
185 ! We get here if the %o1 overflowed while shifting. | |
186 ! This means that %o3 has the high-order bit set. | |
187 ! Restore %o5 and subtract from %o3. | |
188 sll %g1, 4, %g1 ! high order bit | |
189 srl %o5, 1, %o5 ! rest of %o5 | |
190 add %o5, %g1, %o5 | |
191 b do_single_div | |
192 sub %g2, 1, %g2 | |
193 | |
194 not_too_big: | |
195 3: cmp %o5, %o3 | |
196 blu 2b | |
197 nop | |
198 be do_single_div | |
199 nop | |
200 /* NB: these are commented out in the V8-SPARC manual as well */ | |
201 /* (I do not understand this) */ | |
202 ! %o5 > %o3: went too far: back up 1 step | |
203 ! srl %o5, 1, %o5 | |
204 ! dec %g2 | |
205 ! do single-bit divide steps | |
206 ! | |
207 ! We have to be careful here. We know that %o3 >= %o5, so we can do the | |
208 ! first divide step without thinking. BUT, the others are conditional, | |
209 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- | |
210 ! order bit set in the first step, just falling into the regular | |
211 ! division loop will mess up the first time around. | |
212 ! So we unroll slightly... | |
213 do_single_div: | |
214 subcc %g2, 1, %g2 | |
215 bl end_regular_divide | |
216 nop | |
217 sub %o3, %o5, %o3 | |
218 mov 1, %o2 | |
219 b end_single_divloop | |
220 nop | |
221 single_divloop: | |
222 sll %o2, 1, %o2 | |
223 bl 1f | |
224 srl %o5, 1, %o5 | |
225 ! %o3 >= 0 | |
226 sub %o3, %o5, %o3 | |
227 b 2f | |
228 add %o2, 1, %o2 | |
229 1: ! %o3 < 0 | |
230 add %o3, %o5, %o3 | |
231 sub %o2, 1, %o2 | |
232 2: | |
233 end_single_divloop: | |
234 subcc %g2, 1, %g2 | |
235 bge single_divloop | |
236 tst %o3 | |
237 b,a end_regular_divide | |
238 | |
239 not_really_big: | |
240 1: | |
241 sll %o5, 4, %o5 | |
242 cmp %o5, %o3 | |
243 bleu 1b | |
244 addcc %o4, 1, %o4 | |
245 be got_result | |
246 sub %o4, 1, %o4 | |
247 | |
248 tst %o3 ! set up for initial iteration | |
249 divloop: | |
250 sll %o2, 4, %o2 | |
251 ! depth 1, accumulated bits 0 | |
252 bl L1.16 | |
253 srl %o5,1,%o5 | |
254 ! remainder is positive | |
255 subcc %o3,%o5,%o3 | |
256 ! depth 2, accumulated bits 1 | |
257 bl L2.17 | |
258 srl %o5,1,%o5 | |
259 ! remainder is positive | |
260 subcc %o3,%o5,%o3 | |
261 ! depth 3, accumulated bits 3 | |
262 bl L3.19 | |
263 srl %o5,1,%o5 | |
264 ! remainder is positive | |
265 subcc %o3,%o5,%o3 | |
266 ! depth 4, accumulated bits 7 | |
267 bl L4.23 | |
268 srl %o5,1,%o5 | |
269 ! remainder is positive | |
270 subcc %o3,%o5,%o3 | |
271 b 9f | |
272 add %o2, (7*2+1), %o2 | |
273 | |
274 L4.23: | |
275 ! remainder is negative | |
276 addcc %o3,%o5,%o3 | |
277 b 9f | |
278 add %o2, (7*2-1), %o2 | |
279 | |
280 | |
281 L3.19: | |
282 ! remainder is negative | |
283 addcc %o3,%o5,%o3 | |
284 ! depth 4, accumulated bits 5 | |
285 bl L4.21 | |
286 srl %o5,1,%o5 | |
287 ! remainder is positive | |
288 subcc %o3,%o5,%o3 | |
289 b 9f | |
290 add %o2, (5*2+1), %o2 | |
291 | |
292 L4.21: | |
293 ! remainder is negative | |
294 addcc %o3,%o5,%o3 | |
295 b 9f | |
296 add %o2, (5*2-1), %o2 | |
297 | |
298 L2.17: | |
299 ! remainder is negative | |
300 addcc %o3,%o5,%o3 | |
301 ! depth 3, accumulated bits 1 | |
302 bl L3.17 | |
303 srl %o5,1,%o5 | |
304 ! remainder is positive | |
305 subcc %o3,%o5,%o3 | |
306 ! depth 4, accumulated bits 3 | |
307 bl L4.19 | |
308 srl %o5,1,%o5 | |
309 ! remainder is positive | |
310 subcc %o3,%o5,%o3 | |
311 b 9f | |
312 add %o2, (3*2+1), %o2 | |
313 | |
314 L4.19: | |
315 ! remainder is negative | |
316 addcc %o3,%o5,%o3 | |
317 b 9f | |
318 add %o2, (3*2-1), %o2 | |
319 | |
320 L3.17: | |
321 ! remainder is negative | |
322 addcc %o3,%o5,%o3 | |
323 ! depth 4, accumulated bits 1 | |
324 bl L4.17 | |
325 srl %o5,1,%o5 | |
326 ! remainder is positive | |
327 subcc %o3,%o5,%o3 | |
328 b 9f | |
329 add %o2, (1*2+1), %o2 | |
330 | |
331 L4.17: | |
332 ! remainder is negative | |
333 addcc %o3,%o5,%o3 | |
334 b 9f | |
335 add %o2, (1*2-1), %o2 | |
336 | |
337 L1.16: | |
338 ! remainder is negative | |
339 addcc %o3,%o5,%o3 | |
340 ! depth 2, accumulated bits -1 | |
341 bl L2.15 | |
342 srl %o5,1,%o5 | |
343 ! remainder is positive | |
344 subcc %o3,%o5,%o3 | |
345 ! depth 3, accumulated bits -1 | |
346 bl L3.15 | |
347 srl %o5,1,%o5 | |
348 ! remainder is positive | |
349 subcc %o3,%o5,%o3 | |
350 ! depth 4, accumulated bits -1 | |
351 bl L4.15 | |
352 srl %o5,1,%o5 | |
353 ! remainder is positive | |
354 subcc %o3,%o5,%o3 | |
355 b 9f | |
356 add %o2, (-1*2+1), %o2 | |
357 | |
358 L4.15: | |
359 ! remainder is negative | |
360 addcc %o3,%o5,%o3 | |
361 b 9f | |
362 add %o2, (-1*2-1), %o2 | |
363 | |
364 L3.15: | |
365 ! remainder is negative | |
366 addcc %o3,%o5,%o3 | |
367 ! depth 4, accumulated bits -3 | |
368 bl L4.13 | |
369 srl %o5,1,%o5 | |
370 ! remainder is positive | |
371 subcc %o3,%o5,%o3 | |
372 b 9f | |
373 add %o2, (-3*2+1), %o2 | |
374 | |
375 L4.13: | |
376 ! remainder is negative | |
377 addcc %o3,%o5,%o3 | |
378 b 9f | |
379 add %o2, (-3*2-1), %o2 | |
380 | |
381 L2.15: | |
382 ! remainder is negative | |
383 addcc %o3,%o5,%o3 | |
384 ! depth 3, accumulated bits -3 | |
385 bl L3.13 | |
386 srl %o5,1,%o5 | |
387 ! remainder is positive | |
388 subcc %o3,%o5,%o3 | |
389 ! depth 4, accumulated bits -5 | |
390 bl L4.11 | |
391 srl %o5,1,%o5 | |
392 ! remainder is positive | |
393 subcc %o3,%o5,%o3 | |
394 b 9f | |
395 add %o2, (-5*2+1), %o2 | |
396 | |
397 L4.11: | |
398 ! remainder is negative | |
399 addcc %o3,%o5,%o3 | |
400 b 9f | |
401 add %o2, (-5*2-1), %o2 | |
402 | |
403 L3.13: | |
404 ! remainder is negative | |
405 addcc %o3,%o5,%o3 | |
406 ! depth 4, accumulated bits -7 | |
407 bl L4.9 | |
408 srl %o5,1,%o5 | |
409 ! remainder is positive | |
410 subcc %o3,%o5,%o3 | |
411 b 9f | |
412 add %o2, (-7*2+1), %o2 | |
413 | |
414 L4.9: | |
415 ! remainder is negative | |
416 addcc %o3,%o5,%o3 | |
417 b 9f | |
418 add %o2, (-7*2-1), %o2 | |
419 | |
420 9: | |
421 end_regular_divide: | |
422 subcc %o4, 1, %o4 | |
423 bge divloop | |
424 tst %o3 | |
425 bl,a got_result | |
426 ! non-restoring fixup here (one instruction only!) | |
427 sub %o2, 1, %o2 | |
428 | |
429 | |
430 got_result: | |
431 ! check to see if answer should be < 0 | |
432 tst %g3 | |
433 bl,a 1f | |
434 sub %g0, %o2, %o2 | |
435 1: | |
436 retl | |
437 mov %o2, %o0 | |
438 #endif | |
439 | |
440 #ifdef L_modsi3 | |
441 /* This implementation was taken from glibc: | |
442 * | |
443 * Input: dividend and divisor in %o0 and %o1 respectively. | |
444 * | |
445 * Algorithm parameters: | |
446 * N how many bits per iteration we try to get (4) | |
447 * WORDSIZE total number of bits (32) | |
448 * | |
449 * Derived constants: | |
450 * TOPBITS number of bits in the top decade of a number | |
451 * | |
452 * Important variables: | |
453 * Q the partial quotient under development (initially 0) | |
454 * R the remainder so far, initially the dividend | |
455 * ITER number of main division loop iterations required; | |
456 * equal to ceil(log2(quotient) / N). Note that this | |
457 * is the log base (2^N) of the quotient. | |
458 * V the current comparand, initially divisor*2^(ITER*N-1) | |
459 * | |
460 * Cost: | |
461 * Current estimate for non-large dividend is | |
462 * ceil(log2(quotient) / N) * (10 + 7N/2) + C | |
463 * A large dividend is one greater than 2^(31-TOPBITS) and takes a | |
464 * different path, as the upper bits of the quotient must be developed | |
465 * one bit at a time. | |
466 */ | |
467 .text | |
468 .align 4 | |
469 .global .urem | |
470 .proc 4 | |
471 .urem: | |
472 b divide | |
473 mov 0, %g3 ! result always positive | |
474 | |
475 .align 4 | |
476 .global .rem | |
477 .proc 4 | |
478 .rem: | |
479 ! compute sign of result; if neither is negative, no problem | |
480 orcc %o1, %o0, %g0 ! either negative? | |
481 bge 2f ! no, go do the divide | |
482 mov %o0, %g3 ! sign of remainder matches %o0 | |
483 tst %o1 | |
484 bge 1f | |
485 tst %o0 | |
486 ! %o1 is definitely negative; %o0 might also be negative | |
487 bge 2f ! if %o0 not negative... | |
488 sub %g0, %o1, %o1 ! in any case, make %o1 nonneg | |
489 1: ! %o0 is negative, %o1 is nonnegative | |
490 sub %g0, %o0, %o0 ! make %o0 nonnegative | |
491 2: | |
492 | |
493 ! Ready to divide. Compute size of quotient; scale comparand. | |
494 divide: | |
495 orcc %o1, %g0, %o5 | |
496 bne 1f | |
497 mov %o0, %o3 | |
498 | |
499 ! Divide by zero trap. If it returns, return 0 (about as | |
500 ! wrong as possible, but that is what SunOS does...). | |
501 ta 0x2 !ST_DIV0 | |
502 retl | |
503 clr %o0 | |
504 | |
505 1: | |
506 cmp %o3, %o5 ! if %o1 exceeds %o0, done | |
507 blu got_result ! (and algorithm fails otherwise) | |
508 clr %o2 | |
509 sethi %hi(1 << (32 - 4 - 1)), %g1 | |
510 cmp %o3, %g1 | |
511 blu not_really_big | |
512 clr %o4 | |
513 | |
514 ! Here the dividend is >= 2**(31-N) or so. We must be careful here, | |
515 ! as our usual N-at-a-shot divide step will cause overflow and havoc. | |
516 ! The number of bits in the result here is N*ITER+SC, where SC <= N. | |
517 ! Compute ITER in an unorthodox manner: know we need to shift V into | |
518 ! the top decade: so do not even bother to compare to R. | |
519 1: | |
520 cmp %o5, %g1 | |
521 bgeu 3f | |
522 mov 1, %g2 | |
523 sll %o5, 4, %o5 | |
524 b 1b | |
525 add %o4, 1, %o4 | |
526 | |
527 ! Now compute %g2. | |
528 2: addcc %o5, %o5, %o5 | |
529 bcc not_too_big | |
530 add %g2, 1, %g2 | |
531 | |
532 ! We get here if the %o1 overflowed while shifting. | |
533 ! This means that %o3 has the high-order bit set. | |
534 ! Restore %o5 and subtract from %o3. | |
535 sll %g1, 4, %g1 ! high order bit | |
536 srl %o5, 1, %o5 ! rest of %o5 | |
537 add %o5, %g1, %o5 | |
538 b do_single_div | |
539 sub %g2, 1, %g2 | |
540 | |
541 not_too_big: | |
542 3: cmp %o5, %o3 | |
543 blu 2b | |
544 nop | |
545 be do_single_div | |
546 nop | |
547 /* NB: these are commented out in the V8-SPARC manual as well */ | |
548 /* (I do not understand this) */ | |
549 ! %o5 > %o3: went too far: back up 1 step | |
550 ! srl %o5, 1, %o5 | |
551 ! dec %g2 | |
552 ! do single-bit divide steps | |
553 ! | |
554 ! We have to be careful here. We know that %o3 >= %o5, so we can do the | |
555 ! first divide step without thinking. BUT, the others are conditional, | |
556 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- | |
557 ! order bit set in the first step, just falling into the regular | |
558 ! division loop will mess up the first time around. | |
559 ! So we unroll slightly... | |
560 do_single_div: | |
561 subcc %g2, 1, %g2 | |
562 bl end_regular_divide | |
563 nop | |
564 sub %o3, %o5, %o3 | |
565 mov 1, %o2 | |
566 b end_single_divloop | |
567 nop | |
568 single_divloop: | |
569 sll %o2, 1, %o2 | |
570 bl 1f | |
571 srl %o5, 1, %o5 | |
572 ! %o3 >= 0 | |
573 sub %o3, %o5, %o3 | |
574 b 2f | |
575 add %o2, 1, %o2 | |
576 1: ! %o3 < 0 | |
577 add %o3, %o5, %o3 | |
578 sub %o2, 1, %o2 | |
579 2: | |
580 end_single_divloop: | |
581 subcc %g2, 1, %g2 | |
582 bge single_divloop | |
583 tst %o3 | |
584 b,a end_regular_divide | |
585 | |
586 not_really_big: | |
587 1: | |
588 sll %o5, 4, %o5 | |
589 cmp %o5, %o3 | |
590 bleu 1b | |
591 addcc %o4, 1, %o4 | |
592 be got_result | |
593 sub %o4, 1, %o4 | |
594 | |
595 tst %o3 ! set up for initial iteration | |
596 divloop: | |
597 sll %o2, 4, %o2 | |
598 ! depth 1, accumulated bits 0 | |
599 bl L1.16 | |
600 srl %o5,1,%o5 | |
601 ! remainder is positive | |
602 subcc %o3,%o5,%o3 | |
603 ! depth 2, accumulated bits 1 | |
604 bl L2.17 | |
605 srl %o5,1,%o5 | |
606 ! remainder is positive | |
607 subcc %o3,%o5,%o3 | |
608 ! depth 3, accumulated bits 3 | |
609 bl L3.19 | |
610 srl %o5,1,%o5 | |
611 ! remainder is positive | |
612 subcc %o3,%o5,%o3 | |
613 ! depth 4, accumulated bits 7 | |
614 bl L4.23 | |
615 srl %o5,1,%o5 | |
616 ! remainder is positive | |
617 subcc %o3,%o5,%o3 | |
618 b 9f | |
619 add %o2, (7*2+1), %o2 | |
620 L4.23: | |
621 ! remainder is negative | |
622 addcc %o3,%o5,%o3 | |
623 b 9f | |
624 add %o2, (7*2-1), %o2 | |
625 | |
626 L3.19: | |
627 ! remainder is negative | |
628 addcc %o3,%o5,%o3 | |
629 ! depth 4, accumulated bits 5 | |
630 bl L4.21 | |
631 srl %o5,1,%o5 | |
632 ! remainder is positive | |
633 subcc %o3,%o5,%o3 | |
634 b 9f | |
635 add %o2, (5*2+1), %o2 | |
636 | |
637 L4.21: | |
638 ! remainder is negative | |
639 addcc %o3,%o5,%o3 | |
640 b 9f | |
641 add %o2, (5*2-1), %o2 | |
642 | |
643 L2.17: | |
644 ! remainder is negative | |
645 addcc %o3,%o5,%o3 | |
646 ! depth 3, accumulated bits 1 | |
647 bl L3.17 | |
648 srl %o5,1,%o5 | |
649 ! remainder is positive | |
650 subcc %o3,%o5,%o3 | |
651 ! depth 4, accumulated bits 3 | |
652 bl L4.19 | |
653 srl %o5,1,%o5 | |
654 ! remainder is positive | |
655 subcc %o3,%o5,%o3 | |
656 b 9f | |
657 add %o2, (3*2+1), %o2 | |
658 | |
659 L4.19: | |
660 ! remainder is negative | |
661 addcc %o3,%o5,%o3 | |
662 b 9f | |
663 add %o2, (3*2-1), %o2 | |
664 | |
665 L3.17: | |
666 ! remainder is negative | |
667 addcc %o3,%o5,%o3 | |
668 ! depth 4, accumulated bits 1 | |
669 bl L4.17 | |
670 srl %o5,1,%o5 | |
671 ! remainder is positive | |
672 subcc %o3,%o5,%o3 | |
673 b 9f | |
674 add %o2, (1*2+1), %o2 | |
675 | |
676 L4.17: | |
677 ! remainder is negative | |
678 addcc %o3,%o5,%o3 | |
679 b 9f | |
680 add %o2, (1*2-1), %o2 | |
681 | |
682 L1.16: | |
683 ! remainder is negative | |
684 addcc %o3,%o5,%o3 | |
685 ! depth 2, accumulated bits -1 | |
686 bl L2.15 | |
687 srl %o5,1,%o5 | |
688 ! remainder is positive | |
689 subcc %o3,%o5,%o3 | |
690 ! depth 3, accumulated bits -1 | |
691 bl L3.15 | |
692 srl %o5,1,%o5 | |
693 ! remainder is positive | |
694 subcc %o3,%o5,%o3 | |
695 ! depth 4, accumulated bits -1 | |
696 bl L4.15 | |
697 srl %o5,1,%o5 | |
698 ! remainder is positive | |
699 subcc %o3,%o5,%o3 | |
700 b 9f | |
701 add %o2, (-1*2+1), %o2 | |
702 | |
703 L4.15: | |
704 ! remainder is negative | |
705 addcc %o3,%o5,%o3 | |
706 b 9f | |
707 add %o2, (-1*2-1), %o2 | |
708 | |
709 L3.15: | |
710 ! remainder is negative | |
711 addcc %o3,%o5,%o3 | |
712 ! depth 4, accumulated bits -3 | |
713 bl L4.13 | |
714 srl %o5,1,%o5 | |
715 ! remainder is positive | |
716 subcc %o3,%o5,%o3 | |
717 b 9f | |
718 add %o2, (-3*2+1), %o2 | |
719 | |
720 L4.13: | |
721 ! remainder is negative | |
722 addcc %o3,%o5,%o3 | |
723 b 9f | |
724 add %o2, (-3*2-1), %o2 | |
725 | |
726 L2.15: | |
727 ! remainder is negative | |
728 addcc %o3,%o5,%o3 | |
729 ! depth 3, accumulated bits -3 | |
730 bl L3.13 | |
731 srl %o5,1,%o5 | |
732 ! remainder is positive | |
733 subcc %o3,%o5,%o3 | |
734 ! depth 4, accumulated bits -5 | |
735 bl L4.11 | |
736 srl %o5,1,%o5 | |
737 ! remainder is positive | |
738 subcc %o3,%o5,%o3 | |
739 b 9f | |
740 add %o2, (-5*2+1), %o2 | |
741 | |
742 L4.11: | |
743 ! remainder is negative | |
744 addcc %o3,%o5,%o3 | |
745 b 9f | |
746 add %o2, (-5*2-1), %o2 | |
747 | |
748 L3.13: | |
749 ! remainder is negative | |
750 addcc %o3,%o5,%o3 | |
751 ! depth 4, accumulated bits -7 | |
752 bl L4.9 | |
753 srl %o5,1,%o5 | |
754 ! remainder is positive | |
755 subcc %o3,%o5,%o3 | |
756 b 9f | |
757 add %o2, (-7*2+1), %o2 | |
758 | |
759 L4.9: | |
760 ! remainder is negative | |
761 addcc %o3,%o5,%o3 | |
762 b 9f | |
763 add %o2, (-7*2-1), %o2 | |
764 | |
765 9: | |
766 end_regular_divide: | |
767 subcc %o4, 1, %o4 | |
768 bge divloop | |
769 tst %o3 | |
770 bl,a got_result | |
771 ! non-restoring fixup here (one instruction only!) | |
772 add %o3, %o1, %o3 | |
773 | |
774 got_result: | |
775 ! check to see if answer should be < 0 | |
776 tst %g3 | |
777 bl,a 1f | |
778 sub %g0, %o3, %o3 | |
779 1: | |
780 retl | |
781 mov %o3, %o0 | |
782 | |
783 #endif | |
784 |