diff gcc/config/m68k/lb1sf68.asm @ 0:a06113de4d67

first commit
author kent <kent@cr.ie.u-ryukyu.ac.jp>
date Fri, 17 Jul 2009 14:47:48 +0900
parents
children 77e2b8dfacca
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gcc/config/m68k/lb1sf68.asm	Fri Jul 17 14:47:48 2009 +0900
@@ -0,0 +1,4116 @@
+/* libgcc routines for 68000 w/o floating-point hardware.
+   Copyright (C) 1994, 1996, 1997, 1998, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Use this one for any 680x0; assumes no floating point hardware.
+   The trailing " '" appearing on some lines is for ANSI preprocessors.  Yuk.
+   Some of this code comes from MINIX, via the folks at ericsson.
+   D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
+*/
+
+/* These are predefined by new versions of GNU cpp.  */
+
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__ _
+#endif
+
+#ifndef __REGISTER_PREFIX__
+#define __REGISTER_PREFIX__
+#endif
+
+#ifndef __IMMEDIATE_PREFIX__
+#define __IMMEDIATE_PREFIX__ #
+#endif
+
+/* ANSI concatenation macros.  */
+
+#define CONCAT1(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+/* Use the right prefix for global labels.  */
+
+#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
+
+/* Note that X is a function.  */
+	
+#ifdef __ELF__
+#define FUNC(x) .type SYM(x),function
+#else
+/* The .proc pseudo-op is accepted, but ignored, by GAS.  We could just	
+   define this to the empty string for non-ELF systems, but defining it
+   to .proc means that the information is available to the assembler if
+   the need arises.  */
+#define FUNC(x) .proc
+#endif
+		
+/* Use the right prefix for registers.  */
+
+#define REG(x) CONCAT1 (__REGISTER_PREFIX__, x)
+
+/* Use the right prefix for immediate values.  */
+
+#define IMM(x) CONCAT1 (__IMMEDIATE_PREFIX__, x)
+
+#define d0 REG (d0)
+#define d1 REG (d1)
+#define d2 REG (d2)
+#define d3 REG (d3)
+#define d4 REG (d4)
+#define d5 REG (d5)
+#define d6 REG (d6)
+#define d7 REG (d7)
+#define a0 REG (a0)
+#define a1 REG (a1)
+#define a2 REG (a2)
+#define a3 REG (a3)
+#define a4 REG (a4)
+#define a5 REG (a5)
+#define a6 REG (a6)
+#define fp REG (fp)
+#define sp REG (sp)
+#define pc REG (pc)
+
+/* Provide a few macros to allow for PIC code support.
+ * With PIC, data is stored A5 relative so we've got to take a bit of special
+ * care to ensure that all loads of global data is via A5.  PIC also requires
+ * jumps and subroutine calls to be PC relative rather than absolute.  We cheat
+ * a little on this and in the PIC case, we use short offset branches and
+ * hope that the final object code is within range (which it should be).
+ */
+#ifndef __PIC__
+
+	/* Non PIC (absolute/relocatable) versions */
+
+	.macro PICCALL addr
+	jbsr	\addr
+	.endm
+
+	.macro PICJUMP addr
+	jmp	\addr
+	.endm
+
+	.macro PICLEA sym, reg
+	lea	\sym, \reg
+	.endm
+
+	.macro PICPEA sym, areg
+	pea	\sym
+	.endm
+
+#else /* __PIC__ */
+
+# if defined (__uClinux__)
+
+	/* Versions for uClinux */
+
+#  if defined(__ID_SHARED_LIBRARY__)
+
+	/* -mid-shared-library versions  */
+
+	.macro PICLEA sym, reg
+	movel	a5@(_current_shared_library_a5_offset_), \reg
+	movel	\sym@GOT(\reg), \reg
+	.endm
+
+	.macro PICPEA sym, areg
+	movel	a5@(_current_shared_library_a5_offset_), \areg
+	movel	\sym@GOT(\areg), sp@-
+	.endm
+
+	.macro PICCALL addr
+	PICLEA	\addr,a0
+	jsr	a0@
+	.endm
+
+	.macro PICJUMP addr
+	PICLEA	\addr,a0
+	jmp	a0@
+	.endm
+
+#  else /* !__ID_SHARED_LIBRARY__ */
+
+	/* Versions for -msep-data */
+
+	.macro PICLEA sym, reg
+	movel	\sym@GOT(a5), \reg
+	.endm
+
+	.macro PICPEA sym, areg
+	movel	\sym@GOT(a5), sp@-
+	.endm
+
+	.macro PICCALL addr
+#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__)
+	lea	\addr-.-8,a0
+	jsr	pc@(a0)
+#else
+	bsr	\addr
+#endif
+	.endm
+
+	.macro PICJUMP addr
+	/* ISA C has no bra.l instruction, and since this assembly file
+	   gets assembled into multiple object files, we avoid the
+	   bra instruction entirely.  */
+#if defined (__mcoldfire__) && !defined (__mcfisab__)
+	lea	\addr-.-8,a0
+	jmp	pc@(a0)
+#else
+	bra	\addr
+#endif
+	.endm
+
+#  endif
+
+# else /* !__uClinux__ */
+
+	/* Versions for Linux */
+
+	.macro PICLEA sym, reg
+	movel	#_GLOBAL_OFFSET_TABLE_@GOTPC, \reg
+	lea	(-6, pc, \reg), \reg
+	movel	\sym@GOT(\reg), \reg
+	.endm
+
+	.macro PICPEA sym, areg
+	movel	#_GLOBAL_OFFSET_TABLE_@GOTPC, \areg
+	lea	(-6, pc, \areg), \areg
+	movel	\sym@GOT(\areg), sp@-
+	.endm
+
+	.macro PICCALL addr
+#if defined (__mcoldfire__) && !defined (__mcfisab__) && !defined (__mcfisac__)
+	lea	\addr-.-8,a0
+	jsr	pc@(a0)
+#else
+	bsr	\addr
+#endif
+	.endm
+
+	.macro PICJUMP addr
+	/* ISA C has no bra.l instruction, and since this assembly file
+	   gets assembled into multiple object files, we avoid the
+	   bra instruction entirely.  */
+#if defined (__mcoldfire__) && !defined (__mcfisab__)
+	lea	\addr-.-8,a0
+	jmp	pc@(a0)
+#else
+	bra	\addr
+#endif
+	.endm
+
+# endif
+#endif /* __PIC__ */
+
+
+#ifdef L_floatex
+
+| This is an attempt at a decent floating point (single, double and 
+| extended double) code for the GNU C compiler. It should be easy to
+| adapt to other compilers (but beware of the local labels!).
+
+| Starting date: 21 October, 1990
+
+| It is convenient to introduce the notation (s,e,f) for a floating point
+| number, where s=sign, e=exponent, f=fraction. We will call a floating
+| point number fpn to abbreviate, independently of the precision.
+| Let MAX_EXP be in each case the maximum exponent (255 for floats, 1023 
+| for doubles and 16383 for long doubles). We then have the following 
+| different cases:
+|  1. Normalized fpns have 0 < e < MAX_EXP. They correspond to 
+|     (-1)^s x 1.f x 2^(e-bias-1).
+|  2. Denormalized fpns have e=0. They correspond to numbers of the form
+|     (-1)^s x 0.f x 2^(-bias).
+|  3. +/-INFINITY have e=MAX_EXP, f=0.
+|  4. Quiet NaN (Not a Number) have all bits set.
+|  5. Signaling NaN (Not a Number) have s=0, e=MAX_EXP, f=1.
+
+|=============================================================================
+|                                  exceptions
+|=============================================================================
+
+| This is the floating point condition code register (_fpCCR):
+|
+| struct {
+|   short _exception_bits;	
+|   short _trap_enable_bits;	
+|   short _sticky_bits;
+|   short _rounding_mode;
+|   short _format;
+|   short _last_operation;
+|   union {
+|     float sf;
+|     double df;
+|   } _operand1;
+|   union {
+|     float sf;
+|     double df;
+|   } _operand2;
+| } _fpCCR;
+
+	.data
+	.even
+
+	.globl	SYM (_fpCCR)
+	
+SYM (_fpCCR):
+__exception_bits:
+	.word	0
+__trap_enable_bits:
+	.word	0
+__sticky_bits:
+	.word	0
+__rounding_mode:
+	.word	ROUND_TO_NEAREST
+__format:
+	.word	NIL
+__last_operation:
+	.word	NOOP
+__operand1:
+	.long	0
+	.long	0
+__operand2:
+	.long 	0
+	.long	0
+
+| Offsets:
+EBITS  = __exception_bits - SYM (_fpCCR)
+TRAPE  = __trap_enable_bits - SYM (_fpCCR)
+STICK  = __sticky_bits - SYM (_fpCCR)
+ROUND  = __rounding_mode - SYM (_fpCCR)
+FORMT  = __format - SYM (_fpCCR)
+LASTO  = __last_operation - SYM (_fpCCR)
+OPER1  = __operand1 - SYM (_fpCCR)
+OPER2  = __operand2 - SYM (_fpCCR)
+
+| The following exception types are supported:
+INEXACT_RESULT 		= 0x0001
+UNDERFLOW 		= 0x0002
+OVERFLOW 		= 0x0004
+DIVIDE_BY_ZERO 		= 0x0008
+INVALID_OPERATION 	= 0x0010
+
+| The allowed rounding modes are:
+UNKNOWN           = -1
+ROUND_TO_NEAREST  = 0 | round result to nearest representable value
+ROUND_TO_ZERO     = 1 | round result towards zero
+ROUND_TO_PLUS     = 2 | round result towards plus infinity
+ROUND_TO_MINUS    = 3 | round result towards minus infinity
+
+| The allowed values of format are:
+NIL          = 0
+SINGLE_FLOAT = 1
+DOUBLE_FLOAT = 2
+LONG_FLOAT   = 3
+
+| The allowed values for the last operation are:
+NOOP         = 0
+ADD          = 1
+MULTIPLY     = 2
+DIVIDE       = 3
+NEGATE       = 4
+COMPARE      = 5
+EXTENDSFDF   = 6
+TRUNCDFSF    = 7
+
+|=============================================================================
+|                           __clear_sticky_bits
+|=============================================================================
+
+| The sticky bits are normally not cleared (thus the name), whereas the 
+| exception type and exception value reflect the last computation. 
+| This routine is provided to clear them (you can also write to _fpCCR,
+| since it is globally visible).
+
+	.globl  SYM (__clear_sticky_bit)
+
+	.text
+	.even
+
+| void __clear_sticky_bits(void);
+SYM (__clear_sticky_bit):		
+	PICLEA	SYM (_fpCCR),a0
+#ifndef __mcoldfire__
+	movew	IMM (0),a0@(STICK)
+#else
+	clr.w	a0@(STICK)
+#endif
+	rts
+
+|=============================================================================
+|                           $_exception_handler
+|=============================================================================
+
+	.globl  $_exception_handler
+
+	.text
+	.even
+
+| This is the common exit point if an exception occurs.
+| NOTE: it is NOT callable from C!
+| It expects the exception type in d7, the format (SINGLE_FLOAT,
+| DOUBLE_FLOAT or LONG_FLOAT) in d6, and the last operation code in d5.
+| It sets the corresponding exception and sticky bits, and the format. 
+| Depending on the format if fills the corresponding slots for the 
+| operands which produced the exception (all this information is provided
+| so if you write your own exception handlers you have enough information
+| to deal with the problem).
+| Then checks to see if the corresponding exception is trap-enabled, 
+| in which case it pushes the address of _fpCCR and traps through 
+| trap FPTRAP (15 for the moment).
+
+FPTRAP = 15
+
+$_exception_handler:
+	PICLEA	SYM (_fpCCR),a0
+	movew	d7,a0@(EBITS)	| set __exception_bits
+#ifndef __mcoldfire__
+	orw	d7,a0@(STICK)	| and __sticky_bits
+#else
+	movew	a0@(STICK),d4
+	orl	d7,d4
+	movew	d4,a0@(STICK)
+#endif
+	movew	d6,a0@(FORMT)	| and __format
+	movew	d5,a0@(LASTO)	| and __last_operation
+
+| Now put the operands in place:
+#ifndef __mcoldfire__
+	cmpw	IMM (SINGLE_FLOAT),d6
+#else
+	cmpl	IMM (SINGLE_FLOAT),d6
+#endif
+	beq	1f
+	movel	a6@(8),a0@(OPER1)
+	movel	a6@(12),a0@(OPER1+4)
+	movel	a6@(16),a0@(OPER2)
+	movel	a6@(20),a0@(OPER2+4)
+	bra	2f
+1:	movel	a6@(8),a0@(OPER1)
+	movel	a6@(12),a0@(OPER2)
+2:
+| And check whether the exception is trap-enabled:
+#ifndef __mcoldfire__
+	andw	a0@(TRAPE),d7	| is exception trap-enabled?
+#else
+	clrl	d6
+	movew	a0@(TRAPE),d6
+	andl	d6,d7
+#endif
+	beq	1f		| no, exit
+	PICPEA	SYM (_fpCCR),a1	| yes, push address of _fpCCR
+	trap	IMM (FPTRAP)	| and trap
+#ifndef __mcoldfire__
+1:	moveml	sp@+,d2-d7	| restore data registers
+#else
+1:	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| and return
+	rts
+#endif /* L_floatex */
+
+#ifdef  L_mulsi3
+	.text
+	FUNC(__mulsi3)
+	.globl	SYM (__mulsi3)
+SYM (__mulsi3):
+	movew	sp@(4), d0	/* x0 -> d0 */
+	muluw	sp@(10), d0	/* x0*y1 */
+	movew	sp@(6), d1	/* x1 -> d1 */
+	muluw	sp@(8), d1	/* x1*y0 */
+#ifndef __mcoldfire__
+	addw	d1, d0
+#else
+	addl	d1, d0
+#endif
+	swap	d0
+	clrw	d0
+	movew	sp@(6), d1	/* x1 -> d1 */
+	muluw	sp@(10), d1	/* x1*y1 */
+	addl	d1, d0
+
+	rts
+#endif /* L_mulsi3 */
+
+#ifdef  L_udivsi3
+	.text
+	FUNC(__udivsi3)
+	.globl	SYM (__udivsi3)
+SYM (__udivsi3):
+#ifndef __mcoldfire__
+	movel	d2, sp@-
+	movel	sp@(12), d1	/* d1 = divisor */
+	movel	sp@(8), d0	/* d0 = dividend */
+
+	cmpl	IMM (0x10000), d1 /* divisor >= 2 ^ 16 ?   */
+	jcc	L3		/* then try next algorithm */
+	movel	d0, d2
+	clrw	d2
+	swap	d2
+	divu	d1, d2          /* high quotient in lower word */
+	movew	d2, d0		/* save high quotient */
+	swap	d0
+	movew	sp@(10), d2	/* get low dividend + high rest */
+	divu	d1, d2		/* low quotient */
+	movew	d2, d0
+	jra	L6
+
+L3:	movel	d1, d2		/* use d2 as divisor backup */
+L4:	lsrl	IMM (1), d1	/* shift divisor */
+	lsrl	IMM (1), d0	/* shift dividend */
+	cmpl	IMM (0x10000), d1 /* still divisor >= 2 ^ 16 ?  */
+	jcc	L4
+	divu	d1, d0		/* now we have 16-bit divisor */
+	andl	IMM (0xffff), d0 /* mask out divisor, ignore remainder */
+
+/* Multiply the 16-bit tentative quotient with the 32-bit divisor.  Because of
+   the operand ranges, this might give a 33-bit product.  If this product is
+   greater than the dividend, the tentative quotient was too large. */
+	movel	d2, d1
+	mulu	d0, d1		/* low part, 32 bits */
+	swap	d2
+	mulu	d0, d2		/* high part, at most 17 bits */
+	swap	d2		/* align high part with low part */
+	tstw	d2		/* high part 17 bits? */
+	jne	L5		/* if 17 bits, quotient was too large */
+	addl	d2, d1		/* add parts */
+	jcs	L5		/* if sum is 33 bits, quotient was too large */
+	cmpl	sp@(8), d1	/* compare the sum with the dividend */
+	jls	L6		/* if sum > dividend, quotient was too large */
+L5:	subql	IMM (1), d0	/* adjust quotient */
+
+L6:	movel	sp@+, d2
+	rts
+
+#else /* __mcoldfire__ */
+
+/* ColdFire implementation of non-restoring division algorithm from
+   Hennessy & Patterson, Appendix A. */
+	link	a6,IMM (-12)
+	moveml	d2-d4,sp@
+	movel	a6@(8),d0
+	movel	a6@(12),d1
+	clrl	d2		| clear p
+	moveq	IMM (31),d4
+L1:	addl	d0,d0		| shift reg pair (p,a) one bit left
+	addxl	d2,d2
+	movl	d2,d3		| subtract b from p, store in tmp.
+	subl	d1,d3
+	jcs	L2		| if no carry,
+	bset	IMM (0),d0	| set the low order bit of a to 1,
+	movl	d3,d2		| and store tmp in p.
+L2:	subql	IMM (1),d4
+	jcc	L1
+	moveml	sp@,d2-d4	| restore data registers
+	unlk	a6		| and return
+	rts
+#endif /* __mcoldfire__ */
+
+#endif /* L_udivsi3 */
+
+#ifdef  L_divsi3
+	.text
+	FUNC(__divsi3)
+	.globl	SYM (__divsi3)
+SYM (__divsi3):
+	movel	d2, sp@-
+
+	moveq	IMM (1), d2	/* sign of result stored in d2 (=1 or =-1) */
+	movel	sp@(12), d1	/* d1 = divisor */
+	jpl	L1
+	negl	d1
+#ifndef __mcoldfire__
+	negb	d2		/* change sign because divisor <0  */
+#else
+	negl	d2		/* change sign because divisor <0  */
+#endif
+L1:	movel	sp@(8), d0	/* d0 = dividend */
+	jpl	L2
+	negl	d0
+#ifndef __mcoldfire__
+	negb	d2
+#else
+	negl	d2
+#endif
+
+L2:	movel	d1, sp@-
+	movel	d0, sp@-
+	PICCALL	SYM (__udivsi3)	/* divide abs(dividend) by abs(divisor) */
+	addql	IMM (8), sp
+
+	tstb	d2
+	jpl	L3
+	negl	d0
+
+L3:	movel	sp@+, d2
+	rts
+#endif /* L_divsi3 */
+
+#ifdef  L_umodsi3
+	.text
+	FUNC(__umodsi3)
+	.globl	SYM (__umodsi3)
+SYM (__umodsi3):
+	movel	sp@(8), d1	/* d1 = divisor */
+	movel	sp@(4), d0	/* d0 = dividend */
+	movel	d1, sp@-
+	movel	d0, sp@-
+	PICCALL	SYM (__udivsi3)
+	addql	IMM (8), sp
+	movel	sp@(8), d1	/* d1 = divisor */
+#ifndef __mcoldfire__
+	movel	d1, sp@-
+	movel	d0, sp@-
+	PICCALL	SYM (__mulsi3)	/* d0 = (a/b)*b */
+	addql	IMM (8), sp
+#else
+	mulsl	d1,d0
+#endif
+	movel	sp@(4), d1	/* d1 = dividend */
+	subl	d0, d1		/* d1 = a - (a/b)*b */
+	movel	d1, d0
+	rts
+#endif /* L_umodsi3 */
+
+#ifdef  L_modsi3
+	.text
+	FUNC(__modsi3)
+	.globl	SYM (__modsi3)
+SYM (__modsi3):
+	movel	sp@(8), d1	/* d1 = divisor */
+	movel	sp@(4), d0	/* d0 = dividend */
+	movel	d1, sp@-
+	movel	d0, sp@-
+	PICCALL	SYM (__divsi3)
+	addql	IMM (8), sp
+	movel	sp@(8), d1	/* d1 = divisor */
+#ifndef __mcoldfire__
+	movel	d1, sp@-
+	movel	d0, sp@-
+	PICCALL	SYM (__mulsi3)	/* d0 = (a/b)*b */
+	addql	IMM (8), sp
+#else
+	mulsl	d1,d0
+#endif
+	movel	sp@(4), d1	/* d1 = dividend */
+	subl	d0, d1		/* d1 = a - (a/b)*b */
+	movel	d1, d0
+	rts
+#endif /* L_modsi3 */
+
+
+#ifdef  L_double
+
+	.globl	SYM (_fpCCR)
+	.globl  $_exception_handler
+
+QUIET_NaN      = 0xffffffff
+
+D_MAX_EXP      = 0x07ff
+D_BIAS         = 1022
+DBL_MAX_EXP    = D_MAX_EXP - D_BIAS
+DBL_MIN_EXP    = 1 - D_BIAS
+DBL_MANT_DIG   = 53
+
+INEXACT_RESULT 		= 0x0001
+UNDERFLOW 		= 0x0002
+OVERFLOW 		= 0x0004
+DIVIDE_BY_ZERO 		= 0x0008
+INVALID_OPERATION 	= 0x0010
+
+DOUBLE_FLOAT = 2
+
+NOOP         = 0
+ADD          = 1
+MULTIPLY     = 2
+DIVIDE       = 3
+NEGATE       = 4
+COMPARE      = 5
+EXTENDSFDF   = 6
+TRUNCDFSF    = 7
+
+UNKNOWN           = -1
+ROUND_TO_NEAREST  = 0 | round result to nearest representable value
+ROUND_TO_ZERO     = 1 | round result towards zero
+ROUND_TO_PLUS     = 2 | round result towards plus infinity
+ROUND_TO_MINUS    = 3 | round result towards minus infinity
+
+| Entry points:
+
+	.globl SYM (__adddf3)
+	.globl SYM (__subdf3)
+	.globl SYM (__muldf3)
+	.globl SYM (__divdf3)
+	.globl SYM (__negdf2)
+	.globl SYM (__cmpdf2)
+	.globl SYM (__cmpdf2_internal)
+	.hidden SYM (__cmpdf2_internal)
+
+	.text
+	.even
+
+| These are common routines to return and signal exceptions.	
+
+Ld$den:
+| Return and signal a denormalized number
+	orl	d7,d0
+	movew	IMM (INEXACT_RESULT+UNDERFLOW),d7
+	moveq	IMM (DOUBLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Ld$infty:
+Ld$overflow:
+| Return a properly signed INFINITY and set the exception flags 
+	movel	IMM (0x7ff00000),d0
+	movel	IMM (0),d1
+	orl	d7,d0
+	movew	IMM (INEXACT_RESULT+OVERFLOW),d7
+	moveq	IMM (DOUBLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Ld$underflow:
+| Return 0 and set the exception flags 
+	movel	IMM (0),d0
+	movel	d0,d1
+	movew	IMM (INEXACT_RESULT+UNDERFLOW),d7
+	moveq	IMM (DOUBLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Ld$inop:
+| Return a quiet NaN and set the exception flags
+	movel	IMM (QUIET_NaN),d0
+	movel	d0,d1
+	movew	IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+	moveq	IMM (DOUBLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Ld$div$0:
+| Return a properly signed INFINITY and set the exception flags
+	movel	IMM (0x7ff00000),d0
+	movel	IMM (0),d1
+	orl	d7,d0
+	movew	IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7
+	moveq	IMM (DOUBLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+|=============================================================================
+|=============================================================================
+|                         double precision routines
+|=============================================================================
+|=============================================================================
+
+| A double precision floating point number (double) has the format:
+|
+| struct _double {
+|  unsigned int sign      : 1;  /* sign bit */ 
+|  unsigned int exponent  : 11; /* exponent, shifted by 126 */
+|  unsigned int fraction  : 52; /* fraction */
+| } double;
+| 
+| Thus sizeof(double) = 8 (64 bits). 
+|
+| All the routines are callable from C programs, and return the result 
+| in the register pair d0-d1. They also preserve all registers except 
+| d0-d1 and a0-a1.
+
+|=============================================================================
+|                              __subdf3
+|=============================================================================
+
+| double __subdf3(double, double);
+	FUNC(__subdf3)
+SYM (__subdf3):
+	bchg	IMM (31),sp@(12) | change sign of second operand
+				| and fall through, so we always add
+|=============================================================================
+|                              __adddf3
+|=============================================================================
+
+| double __adddf3(double, double);
+	FUNC(__adddf3)
+SYM (__adddf3):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)	| everything will be done in registers
+	moveml	d2-d7,sp@-	| save all data registers and a2 (but d0-d1)
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	movel	a6@(8),d0	| get first operand
+	movel	a6@(12),d1	| 
+	movel	a6@(16),d2	| get second operand
+	movel	a6@(20),d3	| 
+
+	movel	d0,d7		| get d0's sign bit in d7 '
+	addl	d1,d1		| check and clear sign bit of a, and gain one
+	addxl	d0,d0		| bit of extra precision
+	beq	Ladddf$b	| if zero return second operand
+
+	movel	d2,d6		| save sign in d6 
+	addl	d3,d3		| get rid of sign bit and gain one bit of
+	addxl	d2,d2		| extra precision
+	beq	Ladddf$a	| if zero return first operand
+
+	andl	IMM (0x80000000),d7 | isolate a's sign bit '
+        swap	d6		| and also b's sign bit '
+#ifndef __mcoldfire__
+	andw	IMM (0x8000),d6	|
+	orw	d6,d7		| and combine them into d7, so that a's sign '
+				| bit is in the high word and b's is in the '
+				| low word, so d6 is free to be used
+#else
+	andl	IMM (0x8000),d6
+	orl	d6,d7
+#endif
+	movel	d7,a0		| now save d7 into a0, so d7 is free to
+                		| be used also
+
+| Get the exponents and check for denormalized and/or infinity.
+
+	movel	IMM (0x001fffff),d6 | mask for the fraction
+	movel	IMM (0x00200000),d7 | mask to put hidden bit back
+
+	movel	d0,d4		| 
+	andl	d6,d0		| get fraction in d0
+	notl	d6		| make d6 into mask for the exponent
+	andl	d6,d4		| get exponent in d4
+	beq	Ladddf$a$den	| branch if a is denormalized
+	cmpl	d6,d4		| check for INFINITY or NaN
+	beq	Ladddf$nf       | 
+	orl	d7,d0		| and put hidden bit back
+Ladddf$1:
+	swap	d4		| shift right exponent so that it starts
+#ifndef __mcoldfire__
+	lsrw	IMM (5),d4	| in bit 0 and not bit 20
+#else
+	lsrl	IMM (5),d4	| in bit 0 and not bit 20
+#endif
+| Now we have a's exponent in d4 and fraction in d0-d1 '
+	movel	d2,d5		| save b to get exponent
+	andl	d6,d5		| get exponent in d5
+	beq	Ladddf$b$den	| branch if b is denormalized
+	cmpl	d6,d5		| check for INFINITY or NaN
+	beq	Ladddf$nf
+	notl	d6		| make d6 into mask for the fraction again
+	andl	d6,d2		| and get fraction in d2
+	orl	d7,d2		| and put hidden bit back
+Ladddf$2:
+	swap	d5		| shift right exponent so that it starts
+#ifndef __mcoldfire__
+	lsrw	IMM (5),d5	| in bit 0 and not bit 20
+#else
+	lsrl	IMM (5),d5	| in bit 0 and not bit 20
+#endif
+
+| Now we have b's exponent in d5 and fraction in d2-d3. '
+
+| The situation now is as follows: the signs are combined in a0, the 
+| numbers are in d0-d1 (a) and d2-d3 (b), and the exponents in d4 (a)
+| and d5 (b). To do the rounding correctly we need to keep all the
+| bits until the end, so we need to use d0-d1-d2-d3 for the first number
+| and d4-d5-d6-d7 for the second. To do this we store (temporarily) the
+| exponents in a2-a3.
+
+#ifndef __mcoldfire__
+	moveml	a2-a3,sp@-	| save the address registers
+#else
+	movel	a2,sp@-	
+	movel	a3,sp@-	
+	movel	a4,sp@-	
+#endif
+
+	movel	d4,a2		| save the exponents
+	movel	d5,a3		| 
+
+	movel	IMM (0),d7	| and move the numbers around
+	movel	d7,d6		|
+	movel	d3,d5		|
+	movel	d2,d4		|
+	movel	d7,d3		|
+	movel	d7,d2		|
+
+| Here we shift the numbers until the exponents are the same, and put 
+| the largest exponent in a2.
+#ifndef __mcoldfire__
+	exg	d4,a2		| get exponents back
+	exg	d5,a3		|
+	cmpw	d4,d5		| compare the exponents
+#else
+	movel	d4,a4		| get exponents back
+	movel	a2,d4
+	movel	a4,a2
+	movel	d5,a4
+	movel	a3,d5
+	movel	a4,a3
+	cmpl	d4,d5		| compare the exponents
+#endif
+	beq	Ladddf$3	| if equal don't shift '
+	bhi	9f		| branch if second exponent is higher
+
+| Here we have a's exponent larger than b's, so we have to shift b. We do 
+| this by using as counter d2:
+1:	movew	d4,d2		| move largest exponent to d2
+#ifndef __mcoldfire__
+	subw	d5,d2		| and subtract second exponent
+	exg	d4,a2		| get back the longs we saved
+	exg	d5,a3		|
+#else
+	subl	d5,d2		| and subtract second exponent
+	movel	d4,a4		| get back the longs we saved
+	movel	a2,d4
+	movel	a4,a2
+	movel	d5,a4
+	movel	a3,d5
+	movel	a4,a3
+#endif
+| if difference is too large we don't shift (actually, we can just exit) '
+#ifndef __mcoldfire__
+	cmpw	IMM (DBL_MANT_DIG+2),d2
+#else
+	cmpl	IMM (DBL_MANT_DIG+2),d2
+#endif
+	bge	Ladddf$b$small
+#ifndef __mcoldfire__
+	cmpw	IMM (32),d2	| if difference >= 32, shift by longs
+#else
+	cmpl	IMM (32),d2	| if difference >= 32, shift by longs
+#endif
+	bge	5f
+2:
+#ifndef __mcoldfire__
+	cmpw	IMM (16),d2	| if difference >= 16, shift by words	
+#else
+	cmpl	IMM (16),d2	| if difference >= 16, shift by words	
+#endif
+	bge	6f
+	bra	3f		| enter dbra loop
+
+4:
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d4
+	roxrl	IMM (1),d5
+	roxrl	IMM (1),d6
+	roxrl	IMM (1),d7
+#else
+	lsrl	IMM (1),d7
+	btst	IMM (0),d6
+	beq	10f
+	bset	IMM (31),d7
+10:	lsrl	IMM (1),d6
+	btst	IMM (0),d5
+	beq	11f
+	bset	IMM (31),d6
+11:	lsrl	IMM (1),d5
+	btst	IMM (0),d4
+	beq	12f
+	bset	IMM (31),d5
+12:	lsrl	IMM (1),d4
+#endif
+3:
+#ifndef __mcoldfire__
+	dbra	d2,4b
+#else
+	subql	IMM (1),d2
+	bpl	4b	
+#endif
+	movel	IMM (0),d2
+	movel	d2,d3	
+	bra	Ladddf$4
+5:
+	movel	d6,d7
+	movel	d5,d6
+	movel	d4,d5
+	movel	IMM (0),d4
+#ifndef __mcoldfire__
+	subw	IMM (32),d2
+#else
+	subl	IMM (32),d2
+#endif
+	bra	2b
+6:
+	movew	d6,d7
+	swap	d7
+	movew	d5,d6
+	swap	d6
+	movew	d4,d5
+	swap	d5
+	movew	IMM (0),d4
+	swap	d4
+#ifndef __mcoldfire__
+	subw	IMM (16),d2
+#else
+	subl	IMM (16),d2
+#endif
+	bra	3b
+	
+9:
+#ifndef __mcoldfire__
+	exg	d4,d5
+	movew	d4,d6
+	subw	d5,d6		| keep d5 (largest exponent) in d4
+	exg	d4,a2
+	exg	d5,a3
+#else
+	movel	d5,d6
+	movel	d4,d5
+	movel	d6,d4
+	subl	d5,d6
+	movel	d4,a4
+	movel	a2,d4
+	movel	a4,a2
+	movel	d5,a4
+	movel	a3,d5
+	movel	a4,a3
+#endif
+| if difference is too large we don't shift (actually, we can just exit) '
+#ifndef __mcoldfire__
+	cmpw	IMM (DBL_MANT_DIG+2),d6
+#else
+	cmpl	IMM (DBL_MANT_DIG+2),d6
+#endif
+	bge	Ladddf$a$small
+#ifndef __mcoldfire__
+	cmpw	IMM (32),d6	| if difference >= 32, shift by longs
+#else
+	cmpl	IMM (32),d6	| if difference >= 32, shift by longs
+#endif
+	bge	5f
+2:
+#ifndef __mcoldfire__
+	cmpw	IMM (16),d6	| if difference >= 16, shift by words	
+#else
+	cmpl	IMM (16),d6	| if difference >= 16, shift by words	
+#endif
+	bge	6f
+	bra	3f		| enter dbra loop
+
+4:
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+#else
+	lsrl	IMM (1),d3
+	btst	IMM (0),d2
+	beq	10f
+	bset	IMM (31),d3
+10:	lsrl	IMM (1),d2
+	btst	IMM (0),d1
+	beq	11f
+	bset	IMM (31),d2
+11:	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	12f
+	bset	IMM (31),d1
+12:	lsrl	IMM (1),d0
+#endif
+3:
+#ifndef __mcoldfire__
+	dbra	d6,4b
+#else
+	subql	IMM (1),d6
+	bpl	4b
+#endif
+	movel	IMM (0),d7
+	movel	d7,d6
+	bra	Ladddf$4
+5:
+	movel	d2,d3
+	movel	d1,d2
+	movel	d0,d1
+	movel	IMM (0),d0
+#ifndef __mcoldfire__
+	subw	IMM (32),d6
+#else
+	subl	IMM (32),d6
+#endif
+	bra	2b
+6:
+	movew	d2,d3
+	swap	d3
+	movew	d1,d2
+	swap	d2
+	movew	d0,d1
+	swap	d1
+	movew	IMM (0),d0
+	swap	d0
+#ifndef __mcoldfire__
+	subw	IMM (16),d6
+#else
+	subl	IMM (16),d6
+#endif
+	bra	3b
+Ladddf$3:
+#ifndef __mcoldfire__
+	exg	d4,a2	
+	exg	d5,a3
+#else
+	movel	d4,a4
+	movel	a2,d4
+	movel	a4,a2
+	movel	d5,a4
+	movel	a3,d5
+	movel	a4,a3
+#endif
+Ladddf$4:	
+| Now we have the numbers in d0--d3 and d4--d7, the exponent in a2, and
+| the signs in a4.
+
+| Here we have to decide whether to add or subtract the numbers:
+#ifndef __mcoldfire__
+	exg	d7,a0		| get the signs 
+	exg	d6,a3		| a3 is free to be used
+#else
+	movel	d7,a4
+	movel	a0,d7
+	movel	a4,a0
+	movel	d6,a4
+	movel	a3,d6
+	movel	a4,a3
+#endif
+	movel	d7,d6		|
+	movew	IMM (0),d7	| get a's sign in d7 '
+	swap	d6              |
+	movew	IMM (0),d6	| and b's sign in d6 '
+	eorl	d7,d6		| compare the signs
+	bmi	Lsubdf$0	| if the signs are different we have 
+				| to subtract
+#ifndef __mcoldfire__
+	exg	d7,a0		| else we add the numbers
+	exg	d6,a3		|
+#else
+	movel	d7,a4
+	movel	a0,d7
+	movel	a4,a0
+	movel	d6,a4
+	movel	a3,d6
+	movel	a4,a3
+#endif
+	addl	d7,d3		|
+	addxl	d6,d2		|
+	addxl	d5,d1		| 
+	addxl	d4,d0           |
+
+	movel	a2,d4		| return exponent to d4
+	movel	a0,d7		| 
+	andl	IMM (0x80000000),d7 | d7 now has the sign
+
+#ifndef __mcoldfire__
+	moveml	sp@+,a2-a3	
+#else
+	movel	sp@+,a4	
+	movel	sp@+,a3	
+	movel	sp@+,a2	
+#endif
+
+| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider
+| the case of denormalized numbers in the rounding routine itself).
+| As in the addition (not in the subtraction!) we could have set 
+| one more bit we check this:
+	btst	IMM (DBL_MANT_DIG+1),d0	
+	beq	1f
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+	addw	IMM (1),d4
+#else
+	lsrl	IMM (1),d3
+	btst	IMM (0),d2
+	beq	10f
+	bset	IMM (31),d3
+10:	lsrl	IMM (1),d2
+	btst	IMM (0),d1
+	beq	11f
+	bset	IMM (31),d2
+11:	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	12f
+	bset	IMM (31),d1
+12:	lsrl	IMM (1),d0
+	addl	IMM (1),d4
+#endif
+1:
+	lea	pc@(Ladddf$5),a0 | to return from rounding routine
+	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
+#ifdef __mcoldfire__
+	clrl	d6
+#endif
+	movew	a1@(6),d6	| rounding mode in d6
+	beq	Lround$to$nearest
+#ifndef __mcoldfire__
+	cmpw	IMM (ROUND_TO_PLUS),d6
+#else
+	cmpl	IMM (ROUND_TO_PLUS),d6
+#endif
+	bhi	Lround$to$minus
+	blt	Lround$to$zero
+	bra	Lround$to$plus
+Ladddf$5:
+| Put back the exponent and check for overflow
+#ifndef __mcoldfire__
+	cmpw	IMM (0x7ff),d4	| is the exponent big?
+#else
+	cmpl	IMM (0x7ff),d4	| is the exponent big?
+#endif
+	bge	1f
+	bclr	IMM (DBL_MANT_DIG-1),d0
+#ifndef __mcoldfire__
+	lslw	IMM (4),d4	| put exponent back into position
+#else
+	lsll	IMM (4),d4	| put exponent back into position
+#endif
+	swap	d0		| 
+#ifndef __mcoldfire__
+	orw	d4,d0		|
+#else
+	orl	d4,d0		|
+#endif
+	swap	d0		|
+	bra	Ladddf$ret
+1:
+	moveq	IMM (ADD),d5
+	bra	Ld$overflow
+
+Lsubdf$0:
+| Here we do the subtraction.
+#ifndef __mcoldfire__
+	exg	d7,a0		| put sign back in a0
+	exg	d6,a3		|
+#else
+	movel	d7,a4
+	movel	a0,d7
+	movel	a4,a0
+	movel	d6,a4
+	movel	a3,d6
+	movel	a4,a3
+#endif
+	subl	d7,d3		|
+	subxl	d6,d2		|
+	subxl	d5,d1		|
+	subxl	d4,d0		|
+	beq	Ladddf$ret$1	| if zero just exit
+	bpl	1f		| if positive skip the following
+	movel	a0,d7		|
+	bchg	IMM (31),d7	| change sign bit in d7
+	movel	d7,a0		|
+	negl	d3		|
+	negxl	d2		|
+	negxl	d1              | and negate result
+	negxl	d0              |
+1:	
+	movel	a2,d4		| return exponent to d4
+	movel	a0,d7
+	andl	IMM (0x80000000),d7 | isolate sign bit
+#ifndef __mcoldfire__
+	moveml	sp@+,a2-a3	|
+#else
+	movel	sp@+,a4
+	movel	sp@+,a3
+	movel	sp@+,a2
+#endif
+
+| Before rounding normalize so bit #DBL_MANT_DIG is set (we will consider
+| the case of denormalized numbers in the rounding routine itself).
+| As in the addition (not in the subtraction!) we could have set 
+| one more bit we check this:
+	btst	IMM (DBL_MANT_DIG+1),d0	
+	beq	1f
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+	addw	IMM (1),d4
+#else
+	lsrl	IMM (1),d3
+	btst	IMM (0),d2
+	beq	10f
+	bset	IMM (31),d3
+10:	lsrl	IMM (1),d2
+	btst	IMM (0),d1
+	beq	11f
+	bset	IMM (31),d2
+11:	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	12f
+	bset	IMM (31),d1
+12:	lsrl	IMM (1),d0
+	addl	IMM (1),d4
+#endif
+1:
+	lea	pc@(Lsubdf$1),a0 | to return from rounding routine
+	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
+#ifdef __mcoldfire__
+	clrl	d6
+#endif
+	movew	a1@(6),d6	| rounding mode in d6
+	beq	Lround$to$nearest
+#ifndef __mcoldfire__
+	cmpw	IMM (ROUND_TO_PLUS),d6
+#else
+	cmpl	IMM (ROUND_TO_PLUS),d6
+#endif
+	bhi	Lround$to$minus
+	blt	Lround$to$zero
+	bra	Lround$to$plus
+Lsubdf$1:
+| Put back the exponent and sign (we don't have overflow). '
+	bclr	IMM (DBL_MANT_DIG-1),d0	
+#ifndef __mcoldfire__
+	lslw	IMM (4),d4	| put exponent back into position
+#else
+	lsll	IMM (4),d4	| put exponent back into position
+#endif
+	swap	d0		| 
+#ifndef __mcoldfire__
+	orw	d4,d0		|
+#else
+	orl	d4,d0		|
+#endif
+	swap	d0		|
+	bra	Ladddf$ret
+
+| If one of the numbers was too small (difference of exponents >= 
+| DBL_MANT_DIG+1) we return the other (and now we don't have to '
+| check for finiteness or zero).
+Ladddf$a$small:
+#ifndef __mcoldfire__
+	moveml	sp@+,a2-a3	
+#else
+	movel	sp@+,a4
+	movel	sp@+,a3
+	movel	sp@+,a2
+#endif
+	movel	a6@(16),d0
+	movel	a6@(20),d1
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| restore data registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| and return
+	rts
+
+Ladddf$b$small:
+#ifndef __mcoldfire__
+	moveml	sp@+,a2-a3	
+#else
+	movel	sp@+,a4	
+	movel	sp@+,a3	
+	movel	sp@+,a2	
+#endif
+	movel	a6@(8),d0
+	movel	a6@(12),d1
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| restore data registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| and return
+	rts
+
+Ladddf$a$den:
+	movel	d7,d4		| d7 contains 0x00200000
+	bra	Ladddf$1
+
+Ladddf$b$den:
+	movel	d7,d5           | d7 contains 0x00200000
+	notl	d6
+	bra	Ladddf$2
+
+Ladddf$b:
+| Return b (if a is zero)
+	movel	d2,d0
+	movel	d3,d1
+	bne	1f			| Check if b is -0
+	cmpl	IMM (0x80000000),d0
+	bne	1f
+	andl	IMM (0x80000000),d7	| Use the sign of a
+	clrl	d0
+	bra	Ladddf$ret
+Ladddf$a:
+	movel	a6@(8),d0
+	movel	a6@(12),d1
+1:
+	moveq	IMM (ADD),d5
+| Check for NaN and +/-INFINITY.
+	movel	d0,d7         		|
+	andl	IMM (0x80000000),d7	|
+	bclr	IMM (31),d0		|
+	cmpl	IMM (0x7ff00000),d0	|
+	bge	2f			|
+	movel	d0,d0           	| check for zero, since we don't  '
+	bne	Ladddf$ret		| want to return -0 by mistake
+	bclr	IMM (31),d7		|
+	bra	Ladddf$ret		|
+2:
+	andl	IMM (0x000fffff),d0	| check for NaN (nonzero fraction)
+	orl	d1,d0			|
+	bne	Ld$inop         	|
+	bra	Ld$infty		|
+	
+Ladddf$ret$1:
+#ifndef __mcoldfire__
+	moveml	sp@+,a2-a3	| restore regs and exit
+#else
+	movel	sp@+,a4
+	movel	sp@+,a3
+	movel	sp@+,a2
+#endif
+
+Ladddf$ret:
+| Normal exit.
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+	orl	d7,d0		| put sign bit back
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+
+Ladddf$ret$den:
+| Return a denormalized number.
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0	| shift right once more
+	roxrl	IMM (1),d1	|
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+#endif
+	bra	Ladddf$ret
+
+Ladddf$nf:
+	moveq	IMM (ADD),d5
+| This could be faster but it is not worth the effort, since it is not
+| executed very often. We sacrifice speed for clarity here.
+	movel	a6@(8),d0	| get the numbers back (remember that we
+	movel	a6@(12),d1	| did some processing already)
+	movel	a6@(16),d2	| 
+	movel	a6@(20),d3	| 
+	movel	IMM (0x7ff00000),d4 | useful constant (INFINITY)
+	movel	d0,d7		| save sign bits
+	movel	d2,d6		| 
+	bclr	IMM (31),d0	| clear sign bits
+	bclr	IMM (31),d2	| 
+| We know that one of them is either NaN of +/-INFINITY
+| Check for NaN (if either one is NaN return NaN)
+	cmpl	d4,d0		| check first a (d0)
+	bhi	Ld$inop		| if d0 > 0x7ff00000 or equal and
+	bne	2f
+	tstl	d1		| d1 > 0, a is NaN
+	bne	Ld$inop		| 
+2:	cmpl	d4,d2		| check now b (d1)
+	bhi	Ld$inop		| 
+	bne	3f
+	tstl	d3		| 
+	bne	Ld$inop		| 
+3:
+| Now comes the check for +/-INFINITY. We know that both are (maybe not
+| finite) numbers, but we have to check if both are infinite whether we
+| are adding or subtracting them.
+	eorl	d7,d6		| to check sign bits
+	bmi	1f
+	andl	IMM (0x80000000),d7 | get (common) sign bit
+	bra	Ld$infty
+1:
+| We know one (or both) are infinite, so we test for equality between the
+| two numbers (if they are equal they have to be infinite both, so we
+| return NaN).
+	cmpl	d2,d0		| are both infinite?
+	bne	1f		| if d0 <> d2 they are not equal
+	cmpl	d3,d1		| if d0 == d2 test d3 and d1
+	beq	Ld$inop		| if equal return NaN
+1:	
+	andl	IMM (0x80000000),d7 | get a's sign bit '
+	cmpl	d4,d0		| test now for infinity
+	beq	Ld$infty	| if a is INFINITY return with this sign
+	bchg	IMM (31),d7	| else we know b is INFINITY and has
+	bra	Ld$infty	| the opposite sign
+
+|=============================================================================
+|                              __muldf3
+|=============================================================================
+
+| double __muldf3(double, double);
+	FUNC(__muldf3)
+SYM (__muldf3):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@-
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	movel	a6@(8),d0		| get a into d0-d1
+	movel	a6@(12),d1		| 
+	movel	a6@(16),d2		| and b into d2-d3
+	movel	a6@(20),d3		|
+	movel	d0,d7			| d7 will hold the sign of the product
+	eorl	d2,d7			|
+	andl	IMM (0x80000000),d7	|
+	movel	d7,a0			| save sign bit into a0 
+	movel	IMM (0x7ff00000),d7	| useful constant (+INFINITY)
+	movel	d7,d6			| another (mask for fraction)
+	notl	d6			|
+	bclr	IMM (31),d0		| get rid of a's sign bit '
+	movel	d0,d4			| 
+	orl	d1,d4			| 
+	beq	Lmuldf$a$0		| branch if a is zero
+	movel	d0,d4			|
+	bclr	IMM (31),d2		| get rid of b's sign bit '
+	movel	d2,d5			|
+	orl	d3,d5			| 
+	beq	Lmuldf$b$0		| branch if b is zero
+	movel	d2,d5			| 
+	cmpl	d7,d0			| is a big?
+	bhi	Lmuldf$inop		| if a is NaN return NaN
+	beq	Lmuldf$a$nf		| we still have to check d1 and b ...
+	cmpl	d7,d2			| now compare b with INFINITY
+	bhi	Lmuldf$inop		| is b NaN?
+	beq	Lmuldf$b$nf 		| we still have to check d3 ...
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d4 and d5.
+	andl	d7,d4			| isolate exponent in d4
+	beq	Lmuldf$a$den		| if exponent zero, have denormalized
+	andl	d6,d0			| isolate fraction
+	orl	IMM (0x00100000),d0	| and put hidden bit back
+	swap	d4			| I like exponents in the first byte
+#ifndef __mcoldfire__
+	lsrw	IMM (4),d4		| 
+#else
+	lsrl	IMM (4),d4		| 
+#endif
+Lmuldf$1:			
+	andl	d7,d5			|
+	beq	Lmuldf$b$den		|
+	andl	d6,d2			|
+	orl	IMM (0x00100000),d2	| and put hidden bit back
+	swap	d5			|
+#ifndef __mcoldfire__
+	lsrw	IMM (4),d5		|
+#else
+	lsrl	IMM (4),d5		|
+#endif
+Lmuldf$2:				|
+#ifndef __mcoldfire__
+	addw	d5,d4			| add exponents
+	subw	IMM (D_BIAS+1),d4	| and subtract bias (plus one)
+#else
+	addl	d5,d4			| add exponents
+	subl	IMM (D_BIAS+1),d4	| and subtract bias (plus one)
+#endif
+
+| We are now ready to do the multiplication. The situation is as follows:
+| both a and b have bit 52 ( bit 20 of d0 and d2) set (even if they were 
+| denormalized to start with!), which means that in the product bit 104 
+| (which will correspond to bit 8 of the fourth long) is set.
+
+| Here we have to do the product.
+| To do it we have to juggle the registers back and forth, as there are not
+| enough to keep everything in them. So we use the address registers to keep
+| some intermediate data.
+
+#ifndef __mcoldfire__
+	moveml	a2-a3,sp@-	| save a2 and a3 for temporary use
+#else
+	movel	a2,sp@-
+	movel	a3,sp@-
+	movel	a4,sp@-
+#endif
+	movel	IMM (0),a2	| a2 is a null register
+	movel	d4,a3		| and a3 will preserve the exponent
+
+| First, shift d2-d3 so bit 20 becomes bit 31:
+#ifndef __mcoldfire__
+	rorl	IMM (5),d2	| rotate d2 5 places right
+	swap	d2		| and swap it
+	rorl	IMM (5),d3	| do the same thing with d3
+	swap	d3		|
+	movew	d3,d6		| get the rightmost 11 bits of d3
+	andw	IMM (0x07ff),d6	|
+	orw	d6,d2		| and put them into d2
+	andw	IMM (0xf800),d3	| clear those bits in d3
+#else
+	moveq	IMM (11),d7	| left shift d2 11 bits
+	lsll	d7,d2
+	movel	d3,d6		| get a copy of d3
+	lsll	d7,d3		| left shift d3 11 bits
+	andl	IMM (0xffe00000),d6 | get the top 11 bits of d3
+	moveq	IMM (21),d7	| right shift them 21 bits
+	lsrl	d7,d6
+	orl	d6,d2		| stick them at the end of d2
+#endif
+
+	movel	d2,d6		| move b into d6-d7
+	movel	d3,d7           | move a into d4-d5
+	movel	d0,d4           | and clear d0-d1-d2-d3 (to put result)
+	movel	d1,d5           |
+	movel	IMM (0),d3	|
+	movel	d3,d2           |
+	movel	d3,d1           |
+	movel	d3,d0	        |
+
+| We use a1 as counter:	
+	movel	IMM (DBL_MANT_DIG-1),a1		
+#ifndef __mcoldfire__
+	exg	d7,a1
+#else
+	movel	d7,a4
+	movel	a1,d7
+	movel	a4,a1
+#endif
+
+1:
+#ifndef __mcoldfire__
+	exg	d7,a1		| put counter back in a1
+#else
+	movel	d7,a4
+	movel	a1,d7
+	movel	a4,a1
+#endif
+	addl	d3,d3		| shift sum once left
+	addxl	d2,d2           |
+	addxl	d1,d1           |
+	addxl	d0,d0           |
+	addl	d7,d7		|
+	addxl	d6,d6		|
+	bcc	2f		| if bit clear skip the following
+#ifndef __mcoldfire__
+	exg	d7,a2		|
+#else
+	movel	d7,a4
+	movel	a2,d7
+	movel	a4,a2
+#endif
+	addl	d5,d3		| else add a to the sum
+	addxl	d4,d2		|
+	addxl	d7,d1		|
+	addxl	d7,d0		|
+#ifndef __mcoldfire__
+	exg	d7,a2		| 
+#else
+	movel	d7,a4
+	movel	a2,d7
+	movel	a4,a2
+#endif
+2:
+#ifndef __mcoldfire__
+	exg	d7,a1		| put counter in d7
+	dbf	d7,1b		| decrement and branch
+#else
+	movel	d7,a4
+	movel	a1,d7
+	movel	a4,a1
+	subql	IMM (1),d7
+	bpl	1b
+#endif
+
+	movel	a3,d4		| restore exponent
+#ifndef __mcoldfire__
+	moveml	sp@+,a2-a3
+#else
+	movel	sp@+,a4
+	movel	sp@+,a3
+	movel	sp@+,a2
+#endif
+
+| Now we have the product in d0-d1-d2-d3, with bit 8 of d0 set. The 
+| first thing to do now is to normalize it so bit 8 becomes bit 
+| DBL_MANT_DIG-32 (to do the rounding); later we will shift right.
+	swap	d0
+	swap	d1
+	movew	d1,d0
+	swap	d2
+	movew	d2,d1
+	swap	d3
+	movew	d3,d2
+	movew	IMM (0),d3
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+#else
+	moveq	IMM (29),d6
+	lsrl	IMM (3),d3
+	movel	d2,d7
+	lsll	d6,d7
+	orl	d7,d3
+	lsrl	IMM (3),d2
+	movel	d1,d7
+	lsll	d6,d7
+	orl	d7,d2
+	lsrl	IMM (3),d1
+	movel	d0,d7
+	lsll	d6,d7
+	orl	d7,d1
+	lsrl	IMM (3),d0
+#endif
+	
+| Now round, check for over- and underflow, and exit.
+	movel	a0,d7		| get sign bit back into d7
+	moveq	IMM (MULTIPLY),d5
+
+	btst	IMM (DBL_MANT_DIG+1-32),d0
+	beq	Lround$exit
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	addw	IMM (1),d4
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+	addl	IMM (1),d4
+#endif
+	bra	Lround$exit
+
+Lmuldf$inop:
+	moveq	IMM (MULTIPLY),d5
+	bra	Ld$inop
+
+Lmuldf$b$nf:
+	moveq	IMM (MULTIPLY),d5
+	movel	a0,d7		| get sign bit back into d7
+	tstl	d3		| we know d2 == 0x7ff00000, so check d3
+	bne	Ld$inop		| if d3 <> 0 b is NaN
+	bra	Ld$overflow	| else we have overflow (since a is finite)
+
+Lmuldf$a$nf:
+	moveq	IMM (MULTIPLY),d5
+	movel	a0,d7		| get sign bit back into d7
+	tstl	d1		| we know d0 == 0x7ff00000, so check d1
+	bne	Ld$inop		| if d1 <> 0 a is NaN
+	bra	Ld$overflow	| else signal overflow
+
+| If either number is zero return zero, unless the other is +/-INFINITY or
+| NaN, in which case we return NaN.
+Lmuldf$b$0:
+	moveq	IMM (MULTIPLY),d5
+#ifndef __mcoldfire__
+	exg	d2,d0		| put b (==0) into d0-d1
+	exg	d3,d1		| and a (with sign bit cleared) into d2-d3
+	movel	a0,d0		| set result sign
+#else
+	movel	d0,d2		| put a into d2-d3
+	movel	d1,d3
+	movel	a0,d0		| put result zero into d0-d1
+	movq	IMM(0),d1
+#endif
+	bra	1f
+Lmuldf$a$0:
+	movel	a0,d0		| set result sign
+	movel	a6@(16),d2	| put b into d2-d3 again
+	movel	a6@(20),d3	|
+	bclr	IMM (31),d2	| clear sign bit
+1:	cmpl	IMM (0x7ff00000),d2 | check for non-finiteness
+	bge	Ld$inop		| in case NaN or +/-INFINITY return NaN
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+
+| If a number is denormalized we put an exponent of 1 but do not put the 
+| hidden bit back into the fraction; instead we shift left until bit 21
+| (the hidden bit) is set, adjusting the exponent accordingly. We do this
+| to ensure that the product of the fractions is close to 1.
+Lmuldf$a$den:
+	movel	IMM (1),d4
+	andl	d6,d0
+1:	addl	d1,d1           | shift a left until bit 20 is set
+	addxl	d0,d0		|
+#ifndef __mcoldfire__
+	subw	IMM (1),d4	| and adjust exponent
+#else
+	subl	IMM (1),d4	| and adjust exponent
+#endif
+	btst	IMM (20),d0	|
+	bne	Lmuldf$1        |
+	bra	1b
+
+Lmuldf$b$den:
+	movel	IMM (1),d5
+	andl	d6,d2
+1:	addl	d3,d3		| shift b left until bit 20 is set
+	addxl	d2,d2		|
+#ifndef __mcoldfire__
+	subw	IMM (1),d5	| and adjust exponent
+#else
+	subql	IMM (1),d5	| and adjust exponent
+#endif
+	btst	IMM (20),d2	|
+	bne	Lmuldf$2	|
+	bra	1b
+
+
+|=============================================================================
+|                              __divdf3
+|=============================================================================
+
+| double __divdf3(double, double);
+	FUNC(__divdf3)
+SYM (__divdf3):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@-
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	movel	a6@(8),d0	| get a into d0-d1
+	movel	a6@(12),d1	| 
+	movel	a6@(16),d2	| and b into d2-d3
+	movel	a6@(20),d3	|
+	movel	d0,d7		| d7 will hold the sign of the result
+	eorl	d2,d7		|
+	andl	IMM (0x80000000),d7
+	movel	d7,a0		| save sign into a0
+	movel	IMM (0x7ff00000),d7 | useful constant (+INFINITY)
+	movel	d7,d6		| another (mask for fraction)
+	notl	d6		|
+	bclr	IMM (31),d0	| get rid of a's sign bit '
+	movel	d0,d4		|
+	orl	d1,d4		|
+	beq	Ldivdf$a$0	| branch if a is zero
+	movel	d0,d4		|
+	bclr	IMM (31),d2	| get rid of b's sign bit '
+	movel	d2,d5		|
+	orl	d3,d5		|
+	beq	Ldivdf$b$0	| branch if b is zero
+	movel	d2,d5
+	cmpl	d7,d0		| is a big?
+	bhi	Ldivdf$inop	| if a is NaN return NaN
+	beq	Ldivdf$a$nf	| if d0 == 0x7ff00000 we check d1
+	cmpl	d7,d2		| now compare b with INFINITY 
+	bhi	Ldivdf$inop	| if b is NaN return NaN
+	beq	Ldivdf$b$nf	| if d2 == 0x7ff00000 we check d3
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d4 and d5 and normalize the numbers to
+| ensure that the ratio of the fractions is around 1. We do this by
+| making sure that both numbers have bit #DBL_MANT_DIG-32-1 (hidden bit)
+| set, even if they were denormalized to start with.
+| Thus, the result will satisfy: 2 > result > 1/2.
+	andl	d7,d4		| and isolate exponent in d4
+	beq	Ldivdf$a$den	| if exponent is zero we have a denormalized
+	andl	d6,d0		| and isolate fraction
+	orl	IMM (0x00100000),d0 | and put hidden bit back
+	swap	d4		| I like exponents in the first byte
+#ifndef __mcoldfire__
+	lsrw	IMM (4),d4	| 
+#else
+	lsrl	IMM (4),d4	| 
+#endif
+Ldivdf$1:			| 
+	andl	d7,d5		|
+	beq	Ldivdf$b$den	|
+	andl	d6,d2		|
+	orl	IMM (0x00100000),d2
+	swap	d5		|
+#ifndef __mcoldfire__
+	lsrw	IMM (4),d5	|
+#else
+	lsrl	IMM (4),d5	|
+#endif
+Ldivdf$2:			|
+#ifndef __mcoldfire__
+	subw	d5,d4		| subtract exponents
+	addw	IMM (D_BIAS),d4	| and add bias
+#else
+	subl	d5,d4		| subtract exponents
+	addl	IMM (D_BIAS),d4	| and add bias
+#endif
+
+| We are now ready to do the division. We have prepared things in such a way
+| that the ratio of the fractions will be less than 2 but greater than 1/2.
+| At this point the registers in use are:
+| d0-d1	hold a (first operand, bit DBL_MANT_DIG-32=0, bit 
+| DBL_MANT_DIG-1-32=1)
+| d2-d3	hold b (second operand, bit DBL_MANT_DIG-32=1)
+| d4	holds the difference of the exponents, corrected by the bias
+| a0	holds the sign of the ratio
+
+| To do the rounding correctly we need to keep information about the
+| nonsignificant bits. One way to do this would be to do the division
+| using four registers; another is to use two registers (as originally
+| I did), but use a sticky bit to preserve information about the 
+| fractional part. Note that we can keep that info in a1, which is not
+| used.
+	movel	IMM (0),d6	| d6-d7 will hold the result
+	movel	d6,d7		| 
+	movel	IMM (0),a1	| and a1 will hold the sticky bit
+
+	movel	IMM (DBL_MANT_DIG-32+1),d5	
+	
+1:	cmpl	d0,d2		| is a < b?
+	bhi	3f		| if b > a skip the following
+	beq	4f		| if d0==d2 check d1 and d3
+2:	subl	d3,d1		| 
+	subxl	d2,d0		| a <-- a - b
+	bset	d5,d6		| set the corresponding bit in d6
+3:	addl	d1,d1		| shift a by 1
+	addxl	d0,d0		|
+#ifndef __mcoldfire__
+	dbra	d5,1b		| and branch back
+#else
+	subql	IMM (1), d5
+	bpl	1b
+#endif
+	bra	5f			
+4:	cmpl	d1,d3		| here d0==d2, so check d1 and d3
+	bhi	3b		| if d1 > d2 skip the subtraction
+	bra	2b		| else go do it
+5:
+| Here we have to start setting the bits in the second long.
+	movel	IMM (31),d5	| again d5 is counter
+
+1:	cmpl	d0,d2		| is a < b?
+	bhi	3f		| if b > a skip the following
+	beq	4f		| if d0==d2 check d1 and d3
+2:	subl	d3,d1		| 
+	subxl	d2,d0		| a <-- a - b
+	bset	d5,d7		| set the corresponding bit in d7
+3:	addl	d1,d1		| shift a by 1
+	addxl	d0,d0		|
+#ifndef __mcoldfire__
+	dbra	d5,1b		| and branch back
+#else
+	subql	IMM (1), d5
+	bpl	1b
+#endif
+	bra	5f			
+4:	cmpl	d1,d3		| here d0==d2, so check d1 and d3
+	bhi	3b		| if d1 > d2 skip the subtraction
+	bra	2b		| else go do it
+5:
+| Now go ahead checking until we hit a one, which we store in d2.
+	movel	IMM (DBL_MANT_DIG),d5
+1:	cmpl	d2,d0		| is a < b?
+	bhi	4f		| if b < a, exit
+	beq	3f		| if d0==d2 check d1 and d3
+2:	addl	d1,d1		| shift a by 1
+	addxl	d0,d0		|
+#ifndef __mcoldfire__
+	dbra	d5,1b		| and branch back
+#else
+	subql	IMM (1), d5
+	bpl	1b
+#endif
+	movel	IMM (0),d2	| here no sticky bit was found
+	movel	d2,d3
+	bra	5f			
+3:	cmpl	d1,d3		| here d0==d2, so check d1 and d3
+	bhi	2b		| if d1 > d2 go back
+4:
+| Here put the sticky bit in d2-d3 (in the position which actually corresponds
+| to it; if you don't do this the algorithm loses in some cases). '
+	movel	IMM (0),d2
+	movel	d2,d3
+#ifndef __mcoldfire__
+	subw	IMM (DBL_MANT_DIG),d5
+	addw	IMM (63),d5
+	cmpw	IMM (31),d5
+#else
+	subl	IMM (DBL_MANT_DIG),d5
+	addl	IMM (63),d5
+	cmpl	IMM (31),d5
+#endif
+	bhi	2f
+1:	bset	d5,d3
+	bra	5f
+#ifndef __mcoldfire__
+	subw	IMM (32),d5
+#else
+	subl	IMM (32),d5
+#endif
+2:	bset	d5,d2
+5:
+| Finally we are finished! Move the longs in the address registers to
+| their final destination:
+	movel	d6,d0
+	movel	d7,d1
+	movel	IMM (0),d3
+
+| Here we have finished the division, with the result in d0-d1-d2-d3, with
+| 2^21 <= d6 < 2^23. Thus bit 23 is not set, but bit 22 could be set.
+| If it is not, then definitely bit 21 is set. Normalize so bit 22 is
+| not set:
+	btst	IMM (DBL_MANT_DIG-32+1),d0
+	beq	1f
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	roxrl	IMM (1),d2
+	roxrl	IMM (1),d3
+	addw	IMM (1),d4
+#else
+	lsrl	IMM (1),d3
+	btst	IMM (0),d2
+	beq	10f
+	bset	IMM (31),d3
+10:	lsrl	IMM (1),d2
+	btst	IMM (0),d1
+	beq	11f
+	bset	IMM (31),d2
+11:	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	12f
+	bset	IMM (31),d1
+12:	lsrl	IMM (1),d0
+	addl	IMM (1),d4
+#endif
+1:
+| Now round, check for over- and underflow, and exit.
+	movel	a0,d7		| restore sign bit to d7
+	moveq	IMM (DIVIDE),d5
+	bra	Lround$exit
+
+Ldivdf$inop:
+	moveq	IMM (DIVIDE),d5
+	bra	Ld$inop
+
+Ldivdf$a$0:
+| If a is zero check to see whether b is zero also. In that case return
+| NaN; then check if b is NaN, and return NaN also in that case. Else
+| return a properly signed zero.
+	moveq	IMM (DIVIDE),d5
+	bclr	IMM (31),d2	|
+	movel	d2,d4		| 
+	orl	d3,d4		| 
+	beq	Ld$inop		| if b is also zero return NaN
+	cmpl	IMM (0x7ff00000),d2 | check for NaN
+	bhi	Ld$inop		| 
+	blt	1f		|
+	tstl	d3		|
+	bne	Ld$inop		|
+1:	movel	a0,d0		| else return signed zero
+	moveq	IMM(0),d1	| 
+	PICLEA	SYM (_fpCCR),a0	| clear exception flags
+	movew	IMM (0),a0@	|
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| 
+#else
+	moveml	sp@,d2-d7	| 
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| 
+	rts			| 	
+
+Ldivdf$b$0:
+	moveq	IMM (DIVIDE),d5
+| If we got here a is not zero. Check if a is NaN; in that case return NaN,
+| else return +/-INFINITY. Remember that a is in d0 with the sign bit 
+| cleared already.
+	movel	a0,d7		| put a's sign bit back in d7 '
+	cmpl	IMM (0x7ff00000),d0 | compare d0 with INFINITY
+	bhi	Ld$inop		| if larger it is NaN
+	tstl	d1		| 
+	bne	Ld$inop		| 
+	bra	Ld$div$0	| else signal DIVIDE_BY_ZERO
+
+Ldivdf$b$nf:
+	moveq	IMM (DIVIDE),d5
+| If d2 == 0x7ff00000 we have to check d3.
+	tstl	d3		|
+	bne	Ld$inop		| if d3 <> 0, b is NaN
+	bra	Ld$underflow	| else b is +/-INFINITY, so signal underflow
+
+Ldivdf$a$nf:
+	moveq	IMM (DIVIDE),d5
+| If d0 == 0x7ff00000 we have to check d1.
+	tstl	d1		|
+	bne	Ld$inop		| if d1 <> 0, a is NaN
+| If a is INFINITY we have to check b
+	cmpl	d7,d2		| compare b with INFINITY 
+	bge	Ld$inop		| if b is NaN or INFINITY return NaN
+	tstl	d3		|
+	bne	Ld$inop		| 
+	bra	Ld$overflow	| else return overflow
+
+| If a number is denormalized we put an exponent of 1 but do not put the 
+| bit back into the fraction.
+Ldivdf$a$den:
+	movel	IMM (1),d4
+	andl	d6,d0
+1:	addl	d1,d1		| shift a left until bit 20 is set
+	addxl	d0,d0
+#ifndef __mcoldfire__
+	subw	IMM (1),d4	| and adjust exponent
+#else
+	subl	IMM (1),d4	| and adjust exponent
+#endif
+	btst	IMM (DBL_MANT_DIG-32-1),d0
+	bne	Ldivdf$1
+	bra	1b
+
+Ldivdf$b$den:
+	movel	IMM (1),d5
+	andl	d6,d2
+1:	addl	d3,d3		| shift b left until bit 20 is set
+	addxl	d2,d2
+#ifndef __mcoldfire__
+	subw	IMM (1),d5	| and adjust exponent
+#else
+	subql	IMM (1),d5	| and adjust exponent
+#endif
+	btst	IMM (DBL_MANT_DIG-32-1),d2
+	bne	Ldivdf$2
+	bra	1b
+
+Lround$exit:
+| This is a common exit point for __muldf3 and __divdf3. When they enter
+| this point the sign of the result is in d7, the result in d0-d1, normalized
+| so that 2^21 <= d0 < 2^22, and the exponent is in the lower byte of d4.
+
+| First check for underlow in the exponent:
+#ifndef __mcoldfire__
+	cmpw	IMM (-DBL_MANT_DIG-1),d4		
+#else
+	cmpl	IMM (-DBL_MANT_DIG-1),d4		
+#endif
+	blt	Ld$underflow	
+| It could happen that the exponent is less than 1, in which case the 
+| number is denormalized. In this case we shift right and adjust the 
+| exponent until it becomes 1 or the fraction is zero (in the latter case 
+| we signal underflow and return zero).
+	movel	d7,a0		|
+	movel	IMM (0),d6	| use d6-d7 to collect bits flushed right
+	movel	d6,d7		| use d6-d7 to collect bits flushed right
+#ifndef __mcoldfire__
+	cmpw	IMM (1),d4	| if the exponent is less than 1 we 
+#else
+	cmpl	IMM (1),d4	| if the exponent is less than 1 we 
+#endif
+	bge	2f		| have to shift right (denormalize)
+1:
+#ifndef __mcoldfire__
+	addw	IMM (1),d4	| adjust the exponent
+	lsrl	IMM (1),d0	| shift right once 
+	roxrl	IMM (1),d1	|
+	roxrl	IMM (1),d2	|
+	roxrl	IMM (1),d3	|
+	roxrl	IMM (1),d6	| 
+	roxrl	IMM (1),d7	|
+	cmpw	IMM (1),d4	| is the exponent 1 already?
+#else
+	addl	IMM (1),d4	| adjust the exponent
+	lsrl	IMM (1),d7
+	btst	IMM (0),d6
+	beq	13f
+	bset	IMM (31),d7
+13:	lsrl	IMM (1),d6
+	btst	IMM (0),d3
+	beq	14f
+	bset	IMM (31),d6
+14:	lsrl	IMM (1),d3
+	btst	IMM (0),d2
+	beq	10f
+	bset	IMM (31),d3
+10:	lsrl	IMM (1),d2
+	btst	IMM (0),d1
+	beq	11f
+	bset	IMM (31),d2
+11:	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	12f
+	bset	IMM (31),d1
+12:	lsrl	IMM (1),d0
+	cmpl	IMM (1),d4	| is the exponent 1 already?
+#endif
+	beq	2f		| if not loop back
+	bra	1b              |
+	bra	Ld$underflow	| safety check, shouldn't execute '
+2:	orl	d6,d2		| this is a trick so we don't lose  '
+	orl	d7,d3		| the bits which were flushed right
+	movel	a0,d7		| get back sign bit into d7
+| Now call the rounding routine (which takes care of denormalized numbers):
+	lea	pc@(Lround$0),a0 | to return from rounding routine
+	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
+#ifdef __mcoldfire__
+	clrl	d6
+#endif
+	movew	a1@(6),d6	| rounding mode in d6
+	beq	Lround$to$nearest
+#ifndef __mcoldfire__
+	cmpw	IMM (ROUND_TO_PLUS),d6
+#else
+	cmpl	IMM (ROUND_TO_PLUS),d6
+#endif
+	bhi	Lround$to$minus
+	blt	Lround$to$zero
+	bra	Lround$to$plus
+Lround$0:
+| Here we have a correctly rounded result (either normalized or denormalized).
+
+| Here we should have either a normalized number or a denormalized one, and
+| the exponent is necessarily larger or equal to 1 (so we don't have to  '
+| check again for underflow!). We have to check for overflow or for a 
+| denormalized number (which also signals underflow).
+| Check for overflow (i.e., exponent >= 0x7ff).
+#ifndef __mcoldfire__
+	cmpw	IMM (0x07ff),d4
+#else
+	cmpl	IMM (0x07ff),d4
+#endif
+	bge	Ld$overflow
+| Now check for a denormalized number (exponent==0):
+	movew	d4,d4
+	beq	Ld$den
+1:
+| Put back the exponents and sign and return.
+#ifndef __mcoldfire__
+	lslw	IMM (4),d4	| exponent back to fourth byte
+#else
+	lsll	IMM (4),d4	| exponent back to fourth byte
+#endif
+	bclr	IMM (DBL_MANT_DIG-32-1),d0
+	swap	d0		| and put back exponent
+#ifndef __mcoldfire__
+	orw	d4,d0		| 
+#else
+	orl	d4,d0		| 
+#endif
+	swap	d0		|
+	orl	d7,d0		| and sign also
+
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+
+|=============================================================================
+|                              __negdf2
+|=============================================================================
+
+| double __negdf2(double, double);
+	FUNC(__negdf2)
+SYM (__negdf2):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@-
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	moveq	IMM (NEGATE),d5
+	movel	a6@(8),d0	| get number to negate in d0-d1
+	movel	a6@(12),d1	|
+	bchg	IMM (31),d0	| negate
+	movel	d0,d2		| make a positive copy (for the tests)
+	bclr	IMM (31),d2	|
+	movel	d2,d4		| check for zero
+	orl	d1,d4		|
+	beq	2f		| if zero (either sign) return +zero
+	cmpl	IMM (0x7ff00000),d2 | compare to +INFINITY
+	blt	1f		| if finite, return
+	bhi	Ld$inop		| if larger (fraction not zero) is NaN
+	tstl	d1		| if d2 == 0x7ff00000 check d1
+	bne	Ld$inop		|
+	movel	d0,d7		| else get sign and return INFINITY
+	andl	IMM (0x80000000),d7
+	bra	Ld$infty		
+1:	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+2:	bclr	IMM (31),d0
+	bra	1b
+
+|=============================================================================
+|                              __cmpdf2
+|=============================================================================
+
+GREATER =  1
+LESS    = -1
+EQUAL   =  0
+
+| int __cmpdf2_internal(double, double, int);
+SYM (__cmpdf2_internal):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@- 	| save registers
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	moveq	IMM (COMPARE),d5
+	movel	a6@(8),d0	| get first operand
+	movel	a6@(12),d1	|
+	movel	a6@(16),d2	| get second operand
+	movel	a6@(20),d3	|
+| First check if a and/or b are (+/-) zero and in that case clear
+| the sign bit.
+	movel	d0,d6		| copy signs into d6 (a) and d7(b)
+	bclr	IMM (31),d0	| and clear signs in d0 and d2
+	movel	d2,d7		|
+	bclr	IMM (31),d2	|
+	cmpl	IMM (0x7ff00000),d0 | check for a == NaN
+	bhi	Lcmpd$inop		| if d0 > 0x7ff00000, a is NaN
+	beq	Lcmpdf$a$nf	| if equal can be INFINITY, so check d1
+	movel	d0,d4		| copy into d4 to test for zero
+	orl	d1,d4		|
+	beq	Lcmpdf$a$0	|
+Lcmpdf$0:
+	cmpl	IMM (0x7ff00000),d2 | check for b == NaN
+	bhi	Lcmpd$inop		| if d2 > 0x7ff00000, b is NaN
+	beq	Lcmpdf$b$nf	| if equal can be INFINITY, so check d3
+	movel	d2,d4		|
+	orl	d3,d4		|
+	beq	Lcmpdf$b$0	|
+Lcmpdf$1:
+| Check the signs
+	eorl	d6,d7
+	bpl	1f
+| If the signs are not equal check if a >= 0
+	tstl	d6
+	bpl	Lcmpdf$a$gt$b	| if (a >= 0 && b < 0) => a > b
+	bmi	Lcmpdf$b$gt$a	| if (a < 0 && b >= 0) => a < b
+1:
+| If the signs are equal check for < 0
+	tstl	d6
+	bpl	1f
+| If both are negative exchange them
+#ifndef __mcoldfire__
+	exg	d0,d2
+	exg	d1,d3
+#else
+	movel	d0,d7
+	movel	d2,d0
+	movel	d7,d2
+	movel	d1,d7
+	movel	d3,d1
+	movel	d7,d3
+#endif
+1:
+| Now that they are positive we just compare them as longs (does this also
+| work for denormalized numbers?).
+	cmpl	d0,d2
+	bhi	Lcmpdf$b$gt$a	| |b| > |a|
+	bne	Lcmpdf$a$gt$b	| |b| < |a|
+| If we got here d0 == d2, so we compare d1 and d3.
+	cmpl	d1,d3
+	bhi	Lcmpdf$b$gt$a	| |b| > |a|
+	bne	Lcmpdf$a$gt$b	| |b| < |a|
+| If we got here a == b.
+	movel	IMM (EQUAL),d0
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7 	| put back the registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+Lcmpdf$a$gt$b:
+	movel	IMM (GREATER),d0
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7 	| put back the registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+Lcmpdf$b$gt$a:
+	movel	IMM (LESS),d0
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7 	| put back the registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+
+Lcmpdf$a$0:	
+	bclr	IMM (31),d6
+	bra	Lcmpdf$0
+Lcmpdf$b$0:
+	bclr	IMM (31),d7
+	bra	Lcmpdf$1
+
+Lcmpdf$a$nf:
+	tstl	d1
+	bne	Ld$inop
+	bra	Lcmpdf$0
+
+Lcmpdf$b$nf:
+	tstl	d3
+	bne	Ld$inop
+	bra	Lcmpdf$1
+
+Lcmpd$inop:
+	movl	a6@(24),d0
+	moveq	IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+	moveq	IMM (DOUBLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+| int __cmpdf2(double, double);
+	FUNC(__cmpdf2)
+SYM (__cmpdf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+
+|=============================================================================
+|                           rounding routines
+|=============================================================================
+
+| The rounding routines expect the number to be normalized in registers
+| d0-d1-d2-d3, with the exponent in register d4. They assume that the 
+| exponent is larger or equal to 1. They return a properly normalized number
+| if possible, and a denormalized number otherwise. The exponent is returned
+| in d4.
+
+Lround$to$nearest:
+| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"):
+| Here we assume that the exponent is not too small (this should be checked
+| before entering the rounding routine), but the number could be denormalized.
+
+| Check for denormalized numbers:
+1:	btst	IMM (DBL_MANT_DIG-32),d0
+	bne	2f		| if set the number is normalized
+| Normalize shifting left until bit #DBL_MANT_DIG-32 is set or the exponent 
+| is one (remember that a denormalized number corresponds to an 
+| exponent of -D_BIAS+1).
+#ifndef __mcoldfire__
+	cmpw	IMM (1),d4	| remember that the exponent is at least one
+#else
+	cmpl	IMM (1),d4	| remember that the exponent is at least one
+#endif
+ 	beq	2f		| an exponent of one means denormalized
+	addl	d3,d3		| else shift and adjust the exponent
+	addxl	d2,d2		|
+	addxl	d1,d1		|
+	addxl	d0,d0		|
+#ifndef __mcoldfire__
+	dbra	d4,1b		|
+#else
+	subql	IMM (1), d4
+	bpl	1b
+#endif
+2:
+| Now round: we do it as follows: after the shifting we can write the
+| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2.
+| If delta < 1, do nothing. If delta > 1, add 1 to f. 
+| If delta == 1, we make sure the rounded number will be even (odd?) 
+| (after shifting).
+	btst	IMM (0),d1	| is delta < 1?
+	beq	2f		| if so, do not do anything
+	orl	d2,d3		| is delta == 1?
+	bne	1f		| if so round to even
+	movel	d1,d3		| 
+	andl	IMM (2),d3	| bit 1 is the last significant bit
+	movel	IMM (0),d2	|
+	addl	d3,d1		|
+	addxl	d2,d0		|
+	bra	2f		| 
+1:	movel	IMM (1),d3	| else add 1 
+	movel	IMM (0),d2	|
+	addl	d3,d1		|
+	addxl	d2,d0
+| Shift right once (because we used bit #DBL_MANT_DIG-32!).
+2:
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1		
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+#endif
+
+| Now check again bit #DBL_MANT_DIG-32 (rounding could have produced a
+| 'fraction overflow' ...).
+	btst	IMM (DBL_MANT_DIG-32),d0	
+	beq	1f
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	addw	IMM (1),d4
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+	addl	IMM (1),d4
+#endif
+1:
+| If bit #DBL_MANT_DIG-32-1 is clear we have a denormalized number, so we 
+| have to put the exponent to zero and return a denormalized number.
+	btst	IMM (DBL_MANT_DIG-32-1),d0
+	beq	1f
+	jmp	a0@
+1:	movel	IMM (0),d4
+	jmp	a0@
+
+Lround$to$zero:
+Lround$to$plus:
+Lround$to$minus:
+	jmp	a0@
+#endif /* L_double */
+
+#ifdef  L_float
+
+	.globl	SYM (_fpCCR)
+	.globl  $_exception_handler
+
+QUIET_NaN    = 0xffffffff
+SIGNL_NaN    = 0x7f800001
+INFINITY     = 0x7f800000
+
+F_MAX_EXP      = 0xff
+F_BIAS         = 126
+FLT_MAX_EXP    = F_MAX_EXP - F_BIAS
+FLT_MIN_EXP    = 1 - F_BIAS
+FLT_MANT_DIG   = 24
+
+INEXACT_RESULT 		= 0x0001
+UNDERFLOW 		= 0x0002
+OVERFLOW 		= 0x0004
+DIVIDE_BY_ZERO 		= 0x0008
+INVALID_OPERATION 	= 0x0010
+
+SINGLE_FLOAT = 1
+
+NOOP         = 0
+ADD          = 1
+MULTIPLY     = 2
+DIVIDE       = 3
+NEGATE       = 4
+COMPARE      = 5
+EXTENDSFDF   = 6
+TRUNCDFSF    = 7
+
+UNKNOWN           = -1
+ROUND_TO_NEAREST  = 0 | round result to nearest representable value
+ROUND_TO_ZERO     = 1 | round result towards zero
+ROUND_TO_PLUS     = 2 | round result towards plus infinity
+ROUND_TO_MINUS    = 3 | round result towards minus infinity
+
+| Entry points:
+
+	.globl SYM (__addsf3)
+	.globl SYM (__subsf3)
+	.globl SYM (__mulsf3)
+	.globl SYM (__divsf3)
+	.globl SYM (__negsf2)
+	.globl SYM (__cmpsf2)
+	.globl SYM (__cmpsf2_internal)
+	.hidden SYM (__cmpsf2_internal)
+
+| These are common routines to return and signal exceptions.	
+
+	.text
+	.even
+
+Lf$den:
+| Return and signal a denormalized number
+	orl	d7,d0
+	moveq	IMM (INEXACT_RESULT+UNDERFLOW),d7
+	moveq	IMM (SINGLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Lf$infty:
+Lf$overflow:
+| Return a properly signed INFINITY and set the exception flags 
+	movel	IMM (INFINITY),d0
+	orl	d7,d0
+	moveq	IMM (INEXACT_RESULT+OVERFLOW),d7
+	moveq	IMM (SINGLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Lf$underflow:
+| Return 0 and set the exception flags 
+	moveq	IMM (0),d0
+	moveq	IMM (INEXACT_RESULT+UNDERFLOW),d7
+	moveq	IMM (SINGLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Lf$inop:
+| Return a quiet NaN and set the exception flags
+	movel	IMM (QUIET_NaN),d0
+	moveq	IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+	moveq	IMM (SINGLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+Lf$div$0:
+| Return a properly signed INFINITY and set the exception flags
+	movel	IMM (INFINITY),d0
+	orl	d7,d0
+	moveq	IMM (INEXACT_RESULT+DIVIDE_BY_ZERO),d7
+	moveq	IMM (SINGLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+|=============================================================================
+|=============================================================================
+|                         single precision routines
+|=============================================================================
+|=============================================================================
+
+| A single precision floating point number (float) has the format:
+|
+| struct _float {
+|  unsigned int sign      : 1;  /* sign bit */ 
+|  unsigned int exponent  : 8;  /* exponent, shifted by 126 */
+|  unsigned int fraction  : 23; /* fraction */
+| } float;
+| 
+| Thus sizeof(float) = 4 (32 bits). 
+|
+| All the routines are callable from C programs, and return the result 
+| in the single register d0. They also preserve all registers except 
+| d0-d1 and a0-a1.
+
+|=============================================================================
+|                              __subsf3
+|=============================================================================
+
+| float __subsf3(float, float);
+	FUNC(__subsf3)
+SYM (__subsf3):
+	bchg	IMM (31),sp@(8)	| change sign of second operand
+				| and fall through
+|=============================================================================
+|                              __addsf3
+|=============================================================================
+
+| float __addsf3(float, float);
+	FUNC(__addsf3)
+SYM (__addsf3):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)	| everything will be done in registers
+	moveml	d2-d7,sp@-	| save all data registers but d0-d1
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	movel	a6@(8),d0	| get first operand
+	movel	a6@(12),d1	| get second operand
+	movel	d0,a0		| get d0's sign bit '
+	addl	d0,d0		| check and clear sign bit of a
+	beq	Laddsf$b	| if zero return second operand
+	movel	d1,a1		| save b's sign bit '
+	addl	d1,d1		| get rid of sign bit
+	beq	Laddsf$a	| if zero return first operand
+
+| Get the exponents and check for denormalized and/or infinity.
+
+	movel	IMM (0x00ffffff),d4	| mask to get fraction
+	movel	IMM (0x01000000),d5	| mask to put hidden bit back
+
+	movel	d0,d6		| save a to get exponent
+	andl	d4,d0		| get fraction in d0
+	notl 	d4		| make d4 into a mask for the exponent
+	andl	d4,d6		| get exponent in d6
+	beq	Laddsf$a$den	| branch if a is denormalized
+	cmpl	d4,d6		| check for INFINITY or NaN
+	beq	Laddsf$nf
+	swap	d6		| put exponent into first word
+	orl	d5,d0		| and put hidden bit back
+Laddsf$1:
+| Now we have a's exponent in d6 (second byte) and the mantissa in d0. '
+	movel	d1,d7		| get exponent in d7
+	andl	d4,d7		| 
+	beq	Laddsf$b$den	| branch if b is denormalized
+	cmpl	d4,d7		| check for INFINITY or NaN
+	beq	Laddsf$nf
+	swap	d7		| put exponent into first word
+	notl 	d4		| make d4 into a mask for the fraction
+	andl	d4,d1		| get fraction in d1
+	orl	d5,d1		| and put hidden bit back
+Laddsf$2:
+| Now we have b's exponent in d7 (second byte) and the mantissa in d1. '
+
+| Note that the hidden bit corresponds to bit #FLT_MANT_DIG-1, and we 
+| shifted right once, so bit #FLT_MANT_DIG is set (so we have one extra
+| bit).
+
+	movel	d1,d2		| move b to d2, since we want to use
+				| two registers to do the sum
+	movel	IMM (0),d1	| and clear the new ones
+	movel	d1,d3		|
+
+| Here we shift the numbers in registers d0 and d1 so the exponents are the
+| same, and put the largest exponent in d6. Note that we are using two
+| registers for each number (see the discussion by D. Knuth in "Seminumerical 
+| Algorithms").
+#ifndef __mcoldfire__
+	cmpw	d6,d7		| compare exponents
+#else
+	cmpl	d6,d7		| compare exponents
+#endif
+	beq	Laddsf$3	| if equal don't shift '
+	bhi	5f		| branch if second exponent largest
+1:
+	subl	d6,d7		| keep the largest exponent
+	negl	d7
+#ifndef __mcoldfire__
+	lsrw	IMM (8),d7	| put difference in lower byte
+#else
+	lsrl	IMM (8),d7	| put difference in lower byte
+#endif
+| if difference is too large we don't shift (actually, we can just exit) '
+#ifndef __mcoldfire__
+	cmpw	IMM (FLT_MANT_DIG+2),d7		
+#else
+	cmpl	IMM (FLT_MANT_DIG+2),d7		
+#endif
+	bge	Laddsf$b$small
+#ifndef __mcoldfire__
+	cmpw	IMM (16),d7	| if difference >= 16 swap
+#else
+	cmpl	IMM (16),d7	| if difference >= 16 swap
+#endif
+	bge	4f
+2:
+#ifndef __mcoldfire__
+	subw	IMM (1),d7
+#else
+	subql	IMM (1), d7
+#endif
+3:
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d2	| shift right second operand
+	roxrl	IMM (1),d3
+	dbra	d7,3b
+#else
+	lsrl	IMM (1),d3
+	btst	IMM (0),d2
+	beq	10f
+	bset	IMM (31),d3
+10:	lsrl	IMM (1),d2
+	subql	IMM (1), d7
+	bpl	3b
+#endif
+	bra	Laddsf$3
+4:
+	movew	d2,d3
+	swap	d3
+	movew	d3,d2
+	swap	d2
+#ifndef __mcoldfire__
+	subw	IMM (16),d7
+#else
+	subl	IMM (16),d7
+#endif
+	bne	2b		| if still more bits, go back to normal case
+	bra	Laddsf$3
+5:
+#ifndef __mcoldfire__
+	exg	d6,d7		| exchange the exponents
+#else
+	eorl	d6,d7
+	eorl	d7,d6
+	eorl	d6,d7
+#endif
+	subl	d6,d7		| keep the largest exponent
+	negl	d7		|
+#ifndef __mcoldfire__
+	lsrw	IMM (8),d7	| put difference in lower byte
+#else
+	lsrl	IMM (8),d7	| put difference in lower byte
+#endif
+| if difference is too large we don't shift (and exit!) '
+#ifndef __mcoldfire__
+	cmpw	IMM (FLT_MANT_DIG+2),d7		
+#else
+	cmpl	IMM (FLT_MANT_DIG+2),d7		
+#endif
+	bge	Laddsf$a$small
+#ifndef __mcoldfire__
+	cmpw	IMM (16),d7	| if difference >= 16 swap
+#else
+	cmpl	IMM (16),d7	| if difference >= 16 swap
+#endif
+	bge	8f
+6:
+#ifndef __mcoldfire__
+	subw	IMM (1),d7
+#else
+	subl	IMM (1),d7
+#endif
+7:
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0	| shift right first operand
+	roxrl	IMM (1),d1
+	dbra	d7,7b
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+	subql	IMM (1),d7
+	bpl	7b
+#endif
+	bra	Laddsf$3
+8:
+	movew	d0,d1
+	swap	d1
+	movew	d1,d0
+	swap	d0
+#ifndef __mcoldfire__
+	subw	IMM (16),d7
+#else
+	subl	IMM (16),d7
+#endif
+	bne	6b		| if still more bits, go back to normal case
+				| otherwise we fall through
+
+| Now we have a in d0-d1, b in d2-d3, and the largest exponent in d6 (the
+| signs are stored in a0 and a1).
+
+Laddsf$3:
+| Here we have to decide whether to add or subtract the numbers
+#ifndef __mcoldfire__
+	exg	d6,a0		| get signs back
+	exg	d7,a1		| and save the exponents
+#else
+	movel	d6,d4
+	movel	a0,d6
+	movel	d4,a0
+	movel	d7,d4
+	movel	a1,d7
+	movel	d4,a1
+#endif
+	eorl	d6,d7		| combine sign bits
+	bmi	Lsubsf$0	| if negative a and b have opposite 
+				| sign so we actually subtract the
+				| numbers
+
+| Here we have both positive or both negative
+#ifndef __mcoldfire__
+	exg	d6,a0		| now we have the exponent in d6
+#else
+	movel	d6,d4
+	movel	a0,d6
+	movel	d4,a0
+#endif
+	movel	a0,d7		| and sign in d7
+	andl	IMM (0x80000000),d7
+| Here we do the addition.
+	addl	d3,d1
+	addxl	d2,d0
+| Note: now we have d2, d3, d4 and d5 to play with! 
+
+| Put the exponent, in the first byte, in d2, to use the "standard" rounding
+| routines:
+	movel	d6,d2
+#ifndef __mcoldfire__
+	lsrw	IMM (8),d2
+#else
+	lsrl	IMM (8),d2
+#endif
+
+| Before rounding normalize so bit #FLT_MANT_DIG is set (we will consider
+| the case of denormalized numbers in the rounding routine itself).
+| As in the addition (not in the subtraction!) we could have set 
+| one more bit we check this:
+	btst	IMM (FLT_MANT_DIG+1),d0	
+	beq	1f
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+#endif
+	addl	IMM (1),d2
+1:
+	lea	pc@(Laddsf$4),a0 | to return from rounding routine
+	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
+#ifdef __mcoldfire__
+	clrl	d6
+#endif
+	movew	a1@(6),d6	| rounding mode in d6
+	beq	Lround$to$nearest
+#ifndef __mcoldfire__
+	cmpw	IMM (ROUND_TO_PLUS),d6
+#else
+	cmpl	IMM (ROUND_TO_PLUS),d6
+#endif
+	bhi	Lround$to$minus
+	blt	Lround$to$zero
+	bra	Lround$to$plus
+Laddsf$4:
+| Put back the exponent, but check for overflow.
+#ifndef __mcoldfire__
+	cmpw	IMM (0xff),d2
+#else
+	cmpl	IMM (0xff),d2
+#endif
+	bhi	1f
+	bclr	IMM (FLT_MANT_DIG-1),d0
+#ifndef __mcoldfire__
+	lslw	IMM (7),d2
+#else
+	lsll	IMM (7),d2
+#endif
+	swap	d2
+	orl	d2,d0
+	bra	Laddsf$ret
+1:
+	moveq	IMM (ADD),d5
+	bra	Lf$overflow
+
+Lsubsf$0:
+| We are here if a > 0 and b < 0 (sign bits cleared).
+| Here we do the subtraction.
+	movel	d6,d7		| put sign in d7
+	andl	IMM (0x80000000),d7
+
+	subl	d3,d1		| result in d0-d1
+	subxl	d2,d0		|
+	beq	Laddsf$ret	| if zero just exit
+	bpl	1f		| if positive skip the following
+	bchg	IMM (31),d7	| change sign bit in d7
+	negl	d1
+	negxl	d0
+1:
+#ifndef __mcoldfire__
+	exg	d2,a0		| now we have the exponent in d2
+	lsrw	IMM (8),d2	| put it in the first byte
+#else
+	movel	d2,d4
+	movel	a0,d2
+	movel	d4,a0
+	lsrl	IMM (8),d2	| put it in the first byte
+#endif
+
+| Now d0-d1 is positive and the sign bit is in d7.
+
+| Note that we do not have to normalize, since in the subtraction bit
+| #FLT_MANT_DIG+1 is never set, and denormalized numbers are handled by
+| the rounding routines themselves.
+	lea	pc@(Lsubsf$1),a0 | to return from rounding routine
+	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
+#ifdef __mcoldfire__
+	clrl	d6
+#endif
+	movew	a1@(6),d6	| rounding mode in d6
+	beq	Lround$to$nearest
+#ifndef __mcoldfire__
+	cmpw	IMM (ROUND_TO_PLUS),d6
+#else
+	cmpl	IMM (ROUND_TO_PLUS),d6
+#endif
+	bhi	Lround$to$minus
+	blt	Lround$to$zero
+	bra	Lround$to$plus
+Lsubsf$1:
+| Put back the exponent (we can't have overflow!). '
+	bclr	IMM (FLT_MANT_DIG-1),d0
+#ifndef __mcoldfire__
+	lslw	IMM (7),d2
+#else
+	lsll	IMM (7),d2
+#endif
+	swap	d2
+	orl	d2,d0
+	bra	Laddsf$ret
+
+| If one of the numbers was too small (difference of exponents >= 
+| FLT_MANT_DIG+2) we return the other (and now we don't have to '
+| check for finiteness or zero).
+Laddsf$a$small:
+	movel	a6@(12),d0
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| restore data registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| and return
+	rts
+
+Laddsf$b$small:
+	movel	a6@(8),d0
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| restore data registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| and return
+	rts
+
+| If the numbers are denormalized remember to put exponent equal to 1.
+
+Laddsf$a$den:
+	movel	d5,d6		| d5 contains 0x01000000
+	swap	d6
+	bra	Laddsf$1
+
+Laddsf$b$den:
+	movel	d5,d7
+	swap	d7
+	notl 	d4		| make d4 into a mask for the fraction
+				| (this was not executed after the jump)
+	bra	Laddsf$2
+
+| The rest is mainly code for the different results which can be 
+| returned (checking always for +/-INFINITY and NaN).
+
+Laddsf$b:
+| Return b (if a is zero).
+	movel	a6@(12),d0
+	cmpl	IMM (0x80000000),d0	| Check if b is -0
+	bne	1f
+	movel	a0,d7
+	andl	IMM (0x80000000),d7	| Use the sign of a
+	clrl	d0
+	bra	Laddsf$ret
+Laddsf$a:
+| Return a (if b is zero).
+	movel	a6@(8),d0
+1:
+	moveq	IMM (ADD),d5
+| We have to check for NaN and +/-infty.
+	movel	d0,d7
+	andl	IMM (0x80000000),d7	| put sign in d7
+	bclr	IMM (31),d0		| clear sign
+	cmpl	IMM (INFINITY),d0	| check for infty or NaN
+	bge	2f
+	movel	d0,d0		| check for zero (we do this because we don't '
+	bne	Laddsf$ret	| want to return -0 by mistake
+	bclr	IMM (31),d7	| if zero be sure to clear sign
+	bra	Laddsf$ret	| if everything OK just return
+2:
+| The value to be returned is either +/-infty or NaN
+	andl	IMM (0x007fffff),d0	| check for NaN
+	bne	Lf$inop			| if mantissa not zero is NaN
+	bra	Lf$infty
+
+Laddsf$ret:
+| Normal exit (a and b nonzero, result is not NaN nor +/-infty).
+| We have to clear the exception flags (just the exception type).
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+	orl	d7,d0		| put sign bit
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| restore data registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| and return
+	rts
+
+Laddsf$ret$den:
+| Return a denormalized number (for addition we don't signal underflow) '
+	lsrl	IMM (1),d0	| remember to shift right back once
+	bra	Laddsf$ret	| and return
+
+| Note: when adding two floats of the same sign if either one is 
+| NaN we return NaN without regard to whether the other is finite or 
+| not. When subtracting them (i.e., when adding two numbers of 
+| opposite signs) things are more complicated: if both are INFINITY 
+| we return NaN, if only one is INFINITY and the other is NaN we return
+| NaN, but if it is finite we return INFINITY with the corresponding sign.
+
+Laddsf$nf:
+	moveq	IMM (ADD),d5
+| This could be faster but it is not worth the effort, since it is not
+| executed very often. We sacrifice speed for clarity here.
+	movel	a6@(8),d0	| get the numbers back (remember that we
+	movel	a6@(12),d1	| did some processing already)
+	movel	IMM (INFINITY),d4 | useful constant (INFINITY)
+	movel	d0,d2		| save sign bits
+	movel	d1,d3
+	bclr	IMM (31),d0	| clear sign bits
+	bclr	IMM (31),d1
+| We know that one of them is either NaN of +/-INFINITY
+| Check for NaN (if either one is NaN return NaN)
+	cmpl	d4,d0		| check first a (d0)
+	bhi	Lf$inop		
+	cmpl	d4,d1		| check now b (d1)
+	bhi	Lf$inop		
+| Now comes the check for +/-INFINITY. We know that both are (maybe not
+| finite) numbers, but we have to check if both are infinite whether we
+| are adding or subtracting them.
+	eorl	d3,d2		| to check sign bits
+	bmi	1f
+	movel	d0,d7
+	andl	IMM (0x80000000),d7	| get (common) sign bit
+	bra	Lf$infty
+1:
+| We know one (or both) are infinite, so we test for equality between the
+| two numbers (if they are equal they have to be infinite both, so we
+| return NaN).
+	cmpl	d1,d0		| are both infinite?
+	beq	Lf$inop		| if so return NaN
+
+	movel	d0,d7
+	andl	IMM (0x80000000),d7 | get a's sign bit '
+	cmpl	d4,d0		| test now for infinity
+	beq	Lf$infty	| if a is INFINITY return with this sign
+	bchg	IMM (31),d7	| else we know b is INFINITY and has
+	bra	Lf$infty	| the opposite sign
+
+|=============================================================================
+|                             __mulsf3
+|=============================================================================
+
+| float __mulsf3(float, float);
+	FUNC(__mulsf3)
+SYM (__mulsf3):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@-
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	movel	a6@(8),d0	| get a into d0
+	movel	a6@(12),d1	| and b into d1
+	movel	d0,d7		| d7 will hold the sign of the product
+	eorl	d1,d7		|
+	andl	IMM (0x80000000),d7
+	movel	IMM (INFINITY),d6	| useful constant (+INFINITY)
+	movel	d6,d5			| another (mask for fraction)
+	notl	d5			|
+	movel	IMM (0x00800000),d4	| this is to put hidden bit back
+	bclr	IMM (31),d0		| get rid of a's sign bit '
+	movel	d0,d2			|
+	beq	Lmulsf$a$0		| branch if a is zero
+	bclr	IMM (31),d1		| get rid of b's sign bit '
+	movel	d1,d3		|
+	beq	Lmulsf$b$0	| branch if b is zero
+	cmpl	d6,d0		| is a big?
+	bhi	Lmulsf$inop	| if a is NaN return NaN
+	beq	Lmulsf$inf	| if a is INFINITY we have to check b
+	cmpl	d6,d1		| now compare b with INFINITY
+	bhi	Lmulsf$inop	| is b NaN?
+	beq	Lmulsf$overflow | is b INFINITY?
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d2 and d3.
+	andl	d6,d2		| and isolate exponent in d2
+	beq	Lmulsf$a$den	| if exponent is zero we have a denormalized
+	andl	d5,d0		| and isolate fraction
+	orl	d4,d0		| and put hidden bit back
+	swap	d2		| I like exponents in the first byte
+#ifndef __mcoldfire__
+	lsrw	IMM (7),d2	| 
+#else
+	lsrl	IMM (7),d2	| 
+#endif
+Lmulsf$1:			| number
+	andl	d6,d3		|
+	beq	Lmulsf$b$den	|
+	andl	d5,d1		|
+	orl	d4,d1		|
+	swap	d3		|
+#ifndef __mcoldfire__
+	lsrw	IMM (7),d3	|
+#else
+	lsrl	IMM (7),d3	|
+#endif
+Lmulsf$2:			|
+#ifndef __mcoldfire__
+	addw	d3,d2		| add exponents
+	subw	IMM (F_BIAS+1),d2 | and subtract bias (plus one)
+#else
+	addl	d3,d2		| add exponents
+	subl	IMM (F_BIAS+1),d2 | and subtract bias (plus one)
+#endif
+
+| We are now ready to do the multiplication. The situation is as follows:
+| both a and b have bit FLT_MANT_DIG-1 set (even if they were 
+| denormalized to start with!), which means that in the product 
+| bit 2*(FLT_MANT_DIG-1) (that is, bit 2*FLT_MANT_DIG-2-32 of the 
+| high long) is set. 
+
+| To do the multiplication let us move the number a little bit around ...
+	movel	d1,d6		| second operand in d6
+	movel	d0,d5		| first operand in d4-d5
+	movel	IMM (0),d4
+	movel	d4,d1		| the sums will go in d0-d1
+	movel	d4,d0
+
+| now bit FLT_MANT_DIG-1 becomes bit 31:
+	lsll	IMM (31-FLT_MANT_DIG+1),d6		
+
+| Start the loop (we loop #FLT_MANT_DIG times):
+	moveq	IMM (FLT_MANT_DIG-1),d3	
+1:	addl	d1,d1		| shift sum 
+	addxl	d0,d0
+	lsll	IMM (1),d6	| get bit bn
+	bcc	2f		| if not set skip sum
+	addl	d5,d1		| add a
+	addxl	d4,d0
+2:
+#ifndef __mcoldfire__
+	dbf	d3,1b		| loop back
+#else
+	subql	IMM (1),d3
+	bpl	1b
+#endif
+
+| Now we have the product in d0-d1, with bit (FLT_MANT_DIG - 1) + FLT_MANT_DIG
+| (mod 32) of d0 set. The first thing to do now is to normalize it so bit 
+| FLT_MANT_DIG is set (to do the rounding).
+#ifndef __mcoldfire__
+	rorl	IMM (6),d1
+	swap	d1
+	movew	d1,d3
+	andw	IMM (0x03ff),d3
+	andw	IMM (0xfd00),d1
+#else
+	movel	d1,d3
+	lsll	IMM (8),d1
+	addl	d1,d1
+	addl	d1,d1
+	moveq	IMM (22),d5
+	lsrl	d5,d3
+	orl	d3,d1
+	andl	IMM (0xfffffd00),d1
+#endif
+	lsll	IMM (8),d0
+	addl	d0,d0
+	addl	d0,d0
+#ifndef __mcoldfire__
+	orw	d3,d0
+#else
+	orl	d3,d0
+#endif
+
+	moveq	IMM (MULTIPLY),d5
+	
+	btst	IMM (FLT_MANT_DIG+1),d0
+	beq	Lround$exit
+#ifndef __mcoldfire__
+	lsrl	IMM (1),d0
+	roxrl	IMM (1),d1
+	addw	IMM (1),d2
+#else
+	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+	addql	IMM (1),d2
+#endif
+	bra	Lround$exit
+
+Lmulsf$inop:
+	moveq	IMM (MULTIPLY),d5
+	bra	Lf$inop
+
+Lmulsf$overflow:
+	moveq	IMM (MULTIPLY),d5
+	bra	Lf$overflow
+
+Lmulsf$inf:
+	moveq	IMM (MULTIPLY),d5
+| If either is NaN return NaN; else both are (maybe infinite) numbers, so
+| return INFINITY with the correct sign (which is in d7).
+	cmpl	d6,d1		| is b NaN?
+	bhi	Lf$inop		| if so return NaN
+	bra	Lf$overflow	| else return +/-INFINITY
+
+| If either number is zero return zero, unless the other is +/-INFINITY, 
+| or NaN, in which case we return NaN.
+Lmulsf$b$0:
+| Here d1 (==b) is zero.
+	movel	a6@(8),d1	| get a again to check for non-finiteness
+	bra	1f
+Lmulsf$a$0:
+	movel	a6@(12),d1	| get b again to check for non-finiteness
+1:	bclr	IMM (31),d1	| clear sign bit 
+	cmpl	IMM (INFINITY),d1 | and check for a large exponent
+	bge	Lf$inop		| if b is +/-INFINITY or NaN return NaN
+	movel	d7,d0		| else return signed zero
+	PICLEA	SYM (_fpCCR),a0	|
+	movew	IMM (0),a0@	| 
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7	| 
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6		| 
+	rts			| 
+
+| If a number is denormalized we put an exponent of 1 but do not put the 
+| hidden bit back into the fraction; instead we shift left until bit 23
+| (the hidden bit) is set, adjusting the exponent accordingly. We do this
+| to ensure that the product of the fractions is close to 1.
+Lmulsf$a$den:
+	movel	IMM (1),d2
+	andl	d5,d0
+1:	addl	d0,d0		| shift a left (until bit 23 is set)
+#ifndef __mcoldfire__
+	subw	IMM (1),d2	| and adjust exponent
+#else
+	subql	IMM (1),d2	| and adjust exponent
+#endif
+	btst	IMM (FLT_MANT_DIG-1),d0
+	bne	Lmulsf$1	|
+	bra	1b		| else loop back
+
+Lmulsf$b$den:
+	movel	IMM (1),d3
+	andl	d5,d1
+1:	addl	d1,d1		| shift b left until bit 23 is set
+#ifndef __mcoldfire__
+	subw	IMM (1),d3	| and adjust exponent
+#else
+	subql	IMM (1),d3	| and adjust exponent
+#endif
+	btst	IMM (FLT_MANT_DIG-1),d1
+	bne	Lmulsf$2	|
+	bra	1b		| else loop back
+
+|=============================================================================
+|                             __divsf3
+|=============================================================================
+
+| float __divsf3(float, float);
+	FUNC(__divsf3)
+SYM (__divsf3):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@-
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	movel	a6@(8),d0		| get a into d0
+	movel	a6@(12),d1		| and b into d1
+	movel	d0,d7			| d7 will hold the sign of the result
+	eorl	d1,d7			|
+	andl	IMM (0x80000000),d7	| 
+	movel	IMM (INFINITY),d6	| useful constant (+INFINITY)
+	movel	d6,d5			| another (mask for fraction)
+	notl	d5			|
+	movel	IMM (0x00800000),d4	| this is to put hidden bit back
+	bclr	IMM (31),d0		| get rid of a's sign bit '
+	movel	d0,d2			|
+	beq	Ldivsf$a$0		| branch if a is zero
+	bclr	IMM (31),d1		| get rid of b's sign bit '
+	movel	d1,d3			|
+	beq	Ldivsf$b$0		| branch if b is zero
+	cmpl	d6,d0			| is a big?
+	bhi	Ldivsf$inop		| if a is NaN return NaN
+	beq	Ldivsf$inf		| if a is INFINITY we have to check b
+	cmpl	d6,d1			| now compare b with INFINITY 
+	bhi	Ldivsf$inop		| if b is NaN return NaN
+	beq	Ldivsf$underflow
+| Here we have both numbers finite and nonzero (and with no sign bit).
+| Now we get the exponents into d2 and d3 and normalize the numbers to
+| ensure that the ratio of the fractions is close to 1. We do this by
+| making sure that bit #FLT_MANT_DIG-1 (hidden bit) is set.
+	andl	d6,d2		| and isolate exponent in d2
+	beq	Ldivsf$a$den	| if exponent is zero we have a denormalized
+	andl	d5,d0		| and isolate fraction
+	orl	d4,d0		| and put hidden bit back
+	swap	d2		| I like exponents in the first byte
+#ifndef __mcoldfire__
+	lsrw	IMM (7),d2	| 
+#else
+	lsrl	IMM (7),d2	| 
+#endif
+Ldivsf$1:			| 
+	andl	d6,d3		|
+	beq	Ldivsf$b$den	|
+	andl	d5,d1		|
+	orl	d4,d1		|
+	swap	d3		|
+#ifndef __mcoldfire__
+	lsrw	IMM (7),d3	|
+#else
+	lsrl	IMM (7),d3	|
+#endif
+Ldivsf$2:			|
+#ifndef __mcoldfire__
+	subw	d3,d2		| subtract exponents
+ 	addw	IMM (F_BIAS),d2	| and add bias
+#else
+	subl	d3,d2		| subtract exponents
+ 	addl	IMM (F_BIAS),d2	| and add bias
+#endif
+ 
+| We are now ready to do the division. We have prepared things in such a way
+| that the ratio of the fractions will be less than 2 but greater than 1/2.
+| At this point the registers in use are:
+| d0	holds a (first operand, bit FLT_MANT_DIG=0, bit FLT_MANT_DIG-1=1)
+| d1	holds b (second operand, bit FLT_MANT_DIG=1)
+| d2	holds the difference of the exponents, corrected by the bias
+| d7	holds the sign of the ratio
+| d4, d5, d6 hold some constants
+	movel	d7,a0		| d6-d7 will hold the ratio of the fractions
+	movel	IMM (0),d6	| 
+	movel	d6,d7
+
+	moveq	IMM (FLT_MANT_DIG+1),d3
+1:	cmpl	d0,d1		| is a < b?
+	bhi	2f		|
+	bset	d3,d6		| set a bit in d6
+	subl	d1,d0		| if a >= b  a <-- a-b
+	beq	3f		| if a is zero, exit
+2:	addl	d0,d0		| multiply a by 2
+#ifndef __mcoldfire__
+	dbra	d3,1b
+#else
+	subql	IMM (1),d3
+	bpl	1b
+#endif
+
+| Now we keep going to set the sticky bit ...
+	moveq	IMM (FLT_MANT_DIG),d3
+1:	cmpl	d0,d1
+	ble	2f
+	addl	d0,d0
+#ifndef __mcoldfire__
+	dbra	d3,1b
+#else
+	subql	IMM(1),d3
+	bpl	1b
+#endif
+	movel	IMM (0),d1
+	bra	3f
+2:	movel	IMM (0),d1
+#ifndef __mcoldfire__
+	subw	IMM (FLT_MANT_DIG),d3
+	addw	IMM (31),d3
+#else
+	subl	IMM (FLT_MANT_DIG),d3
+	addl	IMM (31),d3
+#endif
+	bset	d3,d1
+3:
+	movel	d6,d0		| put the ratio in d0-d1
+	movel	a0,d7		| get sign back
+
+| Because of the normalization we did before we are guaranteed that 
+| d0 is smaller than 2^26 but larger than 2^24. Thus bit 26 is not set,
+| bit 25 could be set, and if it is not set then bit 24 is necessarily set.
+	btst	IMM (FLT_MANT_DIG+1),d0		
+	beq	1f              | if it is not set, then bit 24 is set
+	lsrl	IMM (1),d0	|
+#ifndef __mcoldfire__
+	addw	IMM (1),d2	|
+#else
+	addl	IMM (1),d2	|
+#endif
+1:
+| Now round, check for over- and underflow, and exit.
+	moveq	IMM (DIVIDE),d5
+	bra	Lround$exit
+
+Ldivsf$inop:
+	moveq	IMM (DIVIDE),d5
+	bra	Lf$inop
+
+Ldivsf$overflow:
+	moveq	IMM (DIVIDE),d5
+	bra	Lf$overflow
+
+Ldivsf$underflow:
+	moveq	IMM (DIVIDE),d5
+	bra	Lf$underflow
+
+Ldivsf$a$0:
+	moveq	IMM (DIVIDE),d5
+| If a is zero check to see whether b is zero also. In that case return
+| NaN; then check if b is NaN, and return NaN also in that case. Else
+| return a properly signed zero.
+	andl	IMM (0x7fffffff),d1	| clear sign bit and test b
+	beq	Lf$inop			| if b is also zero return NaN
+	cmpl	IMM (INFINITY),d1	| check for NaN
+	bhi	Lf$inop			| 
+	movel	d7,d0			| else return signed zero
+	PICLEA	SYM (_fpCCR),a0		|
+	movew	IMM (0),a0@		|
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7		| 
+#else
+	moveml	sp@,d2-d7		| 
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6			| 
+	rts				| 
+	
+Ldivsf$b$0:
+	moveq	IMM (DIVIDE),d5
+| If we got here a is not zero. Check if a is NaN; in that case return NaN,
+| else return +/-INFINITY. Remember that a is in d0 with the sign bit 
+| cleared already.
+	cmpl	IMM (INFINITY),d0	| compare d0 with INFINITY
+	bhi	Lf$inop			| if larger it is NaN
+	bra	Lf$div$0		| else signal DIVIDE_BY_ZERO
+
+Ldivsf$inf:
+	moveq	IMM (DIVIDE),d5
+| If a is INFINITY we have to check b
+	cmpl	IMM (INFINITY),d1	| compare b with INFINITY 
+	bge	Lf$inop			| if b is NaN or INFINITY return NaN
+	bra	Lf$overflow		| else return overflow
+
+| If a number is denormalized we put an exponent of 1 but do not put the 
+| bit back into the fraction.
+Ldivsf$a$den:
+	movel	IMM (1),d2
+	andl	d5,d0
+1:	addl	d0,d0		| shift a left until bit FLT_MANT_DIG-1 is set
+#ifndef __mcoldfire__
+	subw	IMM (1),d2	| and adjust exponent
+#else
+	subl	IMM (1),d2	| and adjust exponent
+#endif
+	btst	IMM (FLT_MANT_DIG-1),d0
+	bne	Ldivsf$1
+	bra	1b
+
+Ldivsf$b$den:
+	movel	IMM (1),d3
+	andl	d5,d1
+1:	addl	d1,d1		| shift b left until bit FLT_MANT_DIG is set
+#ifndef __mcoldfire__
+	subw	IMM (1),d3	| and adjust exponent
+#else
+	subl	IMM (1),d3	| and adjust exponent
+#endif
+	btst	IMM (FLT_MANT_DIG-1),d1
+	bne	Ldivsf$2
+	bra	1b
+
+Lround$exit:
+| This is a common exit point for __mulsf3 and __divsf3. 
+
+| First check for underlow in the exponent:
+#ifndef __mcoldfire__
+	cmpw	IMM (-FLT_MANT_DIG-1),d2		
+#else
+	cmpl	IMM (-FLT_MANT_DIG-1),d2		
+#endif
+	blt	Lf$underflow	
+| It could happen that the exponent is less than 1, in which case the 
+| number is denormalized. In this case we shift right and adjust the 
+| exponent until it becomes 1 or the fraction is zero (in the latter case 
+| we signal underflow and return zero).
+	movel	IMM (0),d6	| d6 is used temporarily
+#ifndef __mcoldfire__
+	cmpw	IMM (1),d2	| if the exponent is less than 1 we 
+#else
+	cmpl	IMM (1),d2	| if the exponent is less than 1 we 
+#endif
+	bge	2f		| have to shift right (denormalize)
+1:
+#ifndef __mcoldfire__
+	addw	IMM (1),d2	| adjust the exponent
+	lsrl	IMM (1),d0	| shift right once 
+	roxrl	IMM (1),d1	|
+	roxrl	IMM (1),d6	| d6 collect bits we would lose otherwise
+	cmpw	IMM (1),d2	| is the exponent 1 already?
+#else
+	addql	IMM (1),d2	| adjust the exponent
+	lsrl	IMM (1),d6
+	btst	IMM (0),d1
+	beq	11f
+	bset	IMM (31),d6
+11:	lsrl	IMM (1),d1
+	btst	IMM (0),d0
+	beq	10f
+	bset	IMM (31),d1
+10:	lsrl	IMM (1),d0
+	cmpl	IMM (1),d2	| is the exponent 1 already?
+#endif
+	beq	2f		| if not loop back
+	bra	1b              |
+	bra	Lf$underflow	| safety check, shouldn't execute '
+2:	orl	d6,d1		| this is a trick so we don't lose  '
+				| the extra bits which were flushed right
+| Now call the rounding routine (which takes care of denormalized numbers):
+	lea	pc@(Lround$0),a0 | to return from rounding routine
+	PICLEA	SYM (_fpCCR),a1	| check the rounding mode
+#ifdef __mcoldfire__
+	clrl	d6
+#endif
+	movew	a1@(6),d6	| rounding mode in d6
+	beq	Lround$to$nearest
+#ifndef __mcoldfire__
+	cmpw	IMM (ROUND_TO_PLUS),d6
+#else
+	cmpl	IMM (ROUND_TO_PLUS),d6
+#endif
+	bhi	Lround$to$minus
+	blt	Lround$to$zero
+	bra	Lround$to$plus
+Lround$0:
+| Here we have a correctly rounded result (either normalized or denormalized).
+
+| Here we should have either a normalized number or a denormalized one, and
+| the exponent is necessarily larger or equal to 1 (so we don't have to  '
+| check again for underflow!). We have to check for overflow or for a 
+| denormalized number (which also signals underflow).
+| Check for overflow (i.e., exponent >= 255).
+#ifndef __mcoldfire__
+	cmpw	IMM (0x00ff),d2
+#else
+	cmpl	IMM (0x00ff),d2
+#endif
+	bge	Lf$overflow
+| Now check for a denormalized number (exponent==0).
+	movew	d2,d2
+	beq	Lf$den
+1:
+| Put back the exponents and sign and return.
+#ifndef __mcoldfire__
+	lslw	IMM (7),d2	| exponent back to fourth byte
+#else
+	lsll	IMM (7),d2	| exponent back to fourth byte
+#endif
+	bclr	IMM (FLT_MANT_DIG-1),d0
+	swap	d0		| and put back exponent
+#ifndef __mcoldfire__
+	orw	d2,d0		| 
+#else
+	orl	d2,d0
+#endif
+	swap	d0		|
+	orl	d7,d0		| and sign also
+
+	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+
+|=============================================================================
+|                             __negsf2
+|=============================================================================
+
+| This is trivial and could be shorter if we didn't bother checking for NaN '
+| and +/-INFINITY.
+
+| float __negsf2(float);
+	FUNC(__negsf2)
+SYM (__negsf2):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@-
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	moveq	IMM (NEGATE),d5
+	movel	a6@(8),d0	| get number to negate in d0
+	bchg	IMM (31),d0	| negate
+	movel	d0,d1		| make a positive copy
+	bclr	IMM (31),d1	|
+	tstl	d1		| check for zero
+	beq	2f		| if zero (either sign) return +zero
+	cmpl	IMM (INFINITY),d1 | compare to +INFINITY
+	blt	1f		|
+	bhi	Lf$inop		| if larger (fraction not zero) is NaN
+	movel	d0,d7		| else get sign and return INFINITY
+	andl	IMM (0x80000000),d7
+	bra	Lf$infty		
+1:	PICLEA	SYM (_fpCCR),a0
+	movew	IMM (0),a0@
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+2:	bclr	IMM (31),d0
+	bra	1b
+
+|=============================================================================
+|                             __cmpsf2
+|=============================================================================
+
+GREATER =  1
+LESS    = -1
+EQUAL   =  0
+
+| int __cmpsf2_internal(float, float, int);
+SYM (__cmpsf2_internal):
+#ifndef __mcoldfire__
+	link	a6,IMM (0)
+	moveml	d2-d7,sp@- 	| save registers
+#else
+	link	a6,IMM (-24)
+	moveml	d2-d7,sp@
+#endif
+	moveq	IMM (COMPARE),d5
+	movel	a6@(8),d0	| get first operand
+	movel	a6@(12),d1	| get second operand
+| Check if either is NaN, and in that case return garbage and signal
+| INVALID_OPERATION. Check also if either is zero, and clear the signs
+| if necessary.
+	movel	d0,d6
+	andl	IMM (0x7fffffff),d0
+	beq	Lcmpsf$a$0
+	cmpl	IMM (0x7f800000),d0
+	bhi	Lcmpf$inop
+Lcmpsf$1:
+	movel	d1,d7
+	andl	IMM (0x7fffffff),d1
+	beq	Lcmpsf$b$0
+	cmpl	IMM (0x7f800000),d1
+	bhi	Lcmpf$inop
+Lcmpsf$2:
+| Check the signs
+	eorl	d6,d7
+	bpl	1f
+| If the signs are not equal check if a >= 0
+	tstl	d6
+	bpl	Lcmpsf$a$gt$b	| if (a >= 0 && b < 0) => a > b
+	bmi	Lcmpsf$b$gt$a	| if (a < 0 && b >= 0) => a < b
+1:
+| If the signs are equal check for < 0
+	tstl	d6
+	bpl	1f
+| If both are negative exchange them
+#ifndef __mcoldfire__
+	exg	d0,d1
+#else
+	movel	d0,d7
+	movel	d1,d0
+	movel	d7,d1
+#endif
+1:
+| Now that they are positive we just compare them as longs (does this also
+| work for denormalized numbers?).
+	cmpl	d0,d1
+	bhi	Lcmpsf$b$gt$a	| |b| > |a|
+	bne	Lcmpsf$a$gt$b	| |b| < |a|
+| If we got here a == b.
+	movel	IMM (EQUAL),d0
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7 	| put back the registers
+#else
+	moveml	sp@,d2-d7
+#endif
+	unlk	a6
+	rts
+Lcmpsf$a$gt$b:
+	movel	IMM (GREATER),d0
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7 	| put back the registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+Lcmpsf$b$gt$a:
+	movel	IMM (LESS),d0
+#ifndef __mcoldfire__
+	moveml	sp@+,d2-d7 	| put back the registers
+#else
+	moveml	sp@,d2-d7
+	| XXX if frame pointer is ever removed, stack pointer must
+	| be adjusted here.
+#endif
+	unlk	a6
+	rts
+
+Lcmpsf$a$0:	
+	bclr	IMM (31),d6
+	bra	Lcmpsf$1
+Lcmpsf$b$0:
+	bclr	IMM (31),d7
+	bra	Lcmpsf$2
+
+Lcmpf$inop:
+	movl	a6@(16),d0
+	moveq	IMM (INEXACT_RESULT+INVALID_OPERATION),d7
+	moveq	IMM (SINGLE_FLOAT),d6
+	PICJUMP	$_exception_handler
+
+| int __cmpsf2(float, float);
+	FUNC(__cmpsf2)
+SYM (__cmpsf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+
+|=============================================================================
+|                           rounding routines
+|=============================================================================
+
+| The rounding routines expect the number to be normalized in registers
+| d0-d1, with the exponent in register d2. They assume that the 
+| exponent is larger or equal to 1. They return a properly normalized number
+| if possible, and a denormalized number otherwise. The exponent is returned
+| in d2.
+
+Lround$to$nearest:
+| We now normalize as suggested by D. Knuth ("Seminumerical Algorithms"):
+| Here we assume that the exponent is not too small (this should be checked
+| before entering the rounding routine), but the number could be denormalized.
+
+| Check for denormalized numbers:
+1:	btst	IMM (FLT_MANT_DIG),d0
+	bne	2f		| if set the number is normalized
+| Normalize shifting left until bit #FLT_MANT_DIG is set or the exponent 
+| is one (remember that a denormalized number corresponds to an 
+| exponent of -F_BIAS+1).
+#ifndef __mcoldfire__
+	cmpw	IMM (1),d2	| remember that the exponent is at least one
+#else
+	cmpl	IMM (1),d2	| remember that the exponent is at least one
+#endif
+ 	beq	2f		| an exponent of one means denormalized
+	addl	d1,d1		| else shift and adjust the exponent
+	addxl	d0,d0		|
+#ifndef __mcoldfire__
+	dbra	d2,1b		|
+#else
+	subql	IMM (1),d2
+	bpl	1b
+#endif
+2:
+| Now round: we do it as follows: after the shifting we can write the
+| fraction part as f + delta, where 1 < f < 2^25, and 0 <= delta <= 2.
+| If delta < 1, do nothing. If delta > 1, add 1 to f. 
+| If delta == 1, we make sure the rounded number will be even (odd?) 
+| (after shifting).
+	btst	IMM (0),d0	| is delta < 1?
+	beq	2f		| if so, do not do anything
+	tstl	d1		| is delta == 1?
+	bne	1f		| if so round to even
+	movel	d0,d1		| 
+	andl	IMM (2),d1	| bit 1 is the last significant bit
+	addl	d1,d0		| 
+	bra	2f		| 
+1:	movel	IMM (1),d1	| else add 1 
+	addl	d1,d0		|
+| Shift right once (because we used bit #FLT_MANT_DIG!).
+2:	lsrl	IMM (1),d0		
+| Now check again bit #FLT_MANT_DIG (rounding could have produced a
+| 'fraction overflow' ...).
+	btst	IMM (FLT_MANT_DIG),d0	
+	beq	1f
+	lsrl	IMM (1),d0
+#ifndef __mcoldfire__
+	addw	IMM (1),d2
+#else
+	addql	IMM (1),d2
+#endif
+1:
+| If bit #FLT_MANT_DIG-1 is clear we have a denormalized number, so we 
+| have to put the exponent to zero and return a denormalized number.
+	btst	IMM (FLT_MANT_DIG-1),d0
+	beq	1f
+	jmp	a0@
+1:	movel	IMM (0),d2
+	jmp	a0@
+
+Lround$to$zero:
+Lround$to$plus:
+Lround$to$minus:
+	jmp	a0@
+#endif /* L_float */
+
+| gcc expects the routines __eqdf2, __nedf2, __gtdf2, __gedf2,
+| __ledf2, __ltdf2 to all return the same value as a direct call to
+| __cmpdf2 would.  In this implementation, each of these routines
+| simply calls __cmpdf2.  It would be more efficient to give the
+| __cmpdf2 routine several names, but separating them out will make it
+| easier to write efficient versions of these routines someday.
+| If the operands recompare unordered unordered __gtdf2 and __gedf2 return -1.
+| The other routines return 1.
+
+#ifdef  L_eqdf2
+	.text
+	FUNC(__eqdf2)
+	.globl	SYM (__eqdf2)
+SYM (__eqdf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+#endif /* L_eqdf2 */
+
+#ifdef  L_nedf2
+	.text
+	FUNC(__nedf2)
+	.globl	SYM (__nedf2)
+SYM (__nedf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+#endif /* L_nedf2 */
+
+#ifdef  L_gtdf2
+	.text
+	FUNC(__gtdf2)
+	.globl	SYM (__gtdf2)
+SYM (__gtdf2):
+	link	a6,IMM (0)
+	pea	-1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+#endif /* L_gtdf2 */
+
+#ifdef  L_gedf2
+	.text
+	FUNC(__gedf2)
+	.globl	SYM (__gedf2)
+SYM (__gedf2):
+	link	a6,IMM (0)
+	pea	-1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+#endif /* L_gedf2 */
+
+#ifdef  L_ltdf2
+	.text
+	FUNC(__ltdf2)
+	.globl	SYM (__ltdf2)
+SYM (__ltdf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+#endif /* L_ltdf2 */
+
+#ifdef  L_ledf2
+	.text
+	FUNC(__ledf2)
+	.globl	SYM (__ledf2)
+SYM (__ledf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(20),sp@-
+	movl	a6@(16),sp@-
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpdf2_internal)
+	unlk	a6
+	rts
+#endif /* L_ledf2 */
+
+| The comments above about __eqdf2, et. al., also apply to __eqsf2,
+| et. al., except that the latter call __cmpsf2 rather than __cmpdf2.
+
+#ifdef  L_eqsf2
+	.text
+	FUNC(__eqsf2)
+	.globl	SYM (__eqsf2)
+SYM (__eqsf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+#endif /* L_eqsf2 */
+
+#ifdef  L_nesf2
+	.text
+	FUNC(__nesf2)
+	.globl	SYM (__nesf2)
+SYM (__nesf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+#endif /* L_nesf2 */
+
+#ifdef  L_gtsf2
+	.text
+	FUNC(__gtsf2)
+	.globl	SYM (__gtsf2)
+SYM (__gtsf2):
+	link	a6,IMM (0)
+	pea	-1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+#endif /* L_gtsf2 */
+
+#ifdef  L_gesf2
+	.text
+	FUNC(__gesf2)
+	.globl	SYM (__gesf2)
+SYM (__gesf2):
+	link	a6,IMM (0)
+	pea	-1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+#endif /* L_gesf2 */
+
+#ifdef  L_ltsf2
+	.text
+	FUNC(__ltsf2)
+	.globl	SYM (__ltsf2)
+SYM (__ltsf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+#endif /* L_ltsf2 */
+
+#ifdef  L_lesf2
+	.text
+	FUNC(__lesf2)
+	.globl	SYM (__lesf2)
+SYM (__lesf2):
+	link	a6,IMM (0)
+	pea	1
+	movl	a6@(12),sp@-
+	movl	a6@(8),sp@-
+	PICCALL	SYM (__cmpsf2_internal)
+	unlk	a6
+	rts
+#endif /* L_lesf2 */
+
+#if defined (__ELF__) && defined (__linux__)
+	/* Make stack non-executable for ELF linux targets.  */
+	.section	.note.GNU-stack,"",@progbits
+#endif