summaryrefslogtreecommitdiffstats
path: root/arch/m68k/math-emu
diff options
context:
space:
mode:
Diffstat (limited to 'arch/m68k/math-emu')
-rw-r--r--arch/m68k/math-emu/Makefile11
-rw-r--r--arch/m68k/math-emu/fp_arith.c701
-rw-r--r--arch/m68k/math-emu/fp_arith.h52
-rw-r--r--arch/m68k/math-emu/fp_cond.S334
-rw-r--r--arch/m68k/math-emu/fp_decode.h417
-rw-r--r--arch/m68k/math-emu/fp_emu.h146
-rw-r--r--arch/m68k/math-emu/fp_entry.S325
-rw-r--r--arch/m68k/math-emu/fp_log.c223
-rw-r--r--arch/m68k/math-emu/fp_move.S244
-rw-r--r--arch/m68k/math-emu/fp_movem.S368
-rw-r--r--arch/m68k/math-emu/fp_scan.S478
-rw-r--r--arch/m68k/math-emu/fp_trig.c183
-rw-r--r--arch/m68k/math-emu/fp_trig.h32
-rw-r--r--arch/m68k/math-emu/fp_util.S1455
-rw-r--r--arch/m68k/math-emu/multi_arith.h819
15 files changed, 5788 insertions, 0 deletions
diff --git a/arch/m68k/math-emu/Makefile b/arch/m68k/math-emu/Makefile
new file mode 100644
index 000000000000..539940401814
--- /dev/null
+++ b/arch/m68k/math-emu/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for the linux kernel.
+#
+
+EXTRA_AFLAGS := -traditional
+
+#EXTRA_AFLAGS += -DFPU_EMU_DEBUG
+#EXTRA_CFLAGS += -DFPU_EMU_DEBUG
+
+obj-y := fp_entry.o fp_scan.o fp_util.o fp_move.o fp_movem.o \
+ fp_cond.o fp_arith.o fp_log.o fp_trig.o
diff --git a/arch/m68k/math-emu/fp_arith.c b/arch/m68k/math-emu/fp_arith.c
new file mode 100644
index 000000000000..08f286db3c5a
--- /dev/null
+++ b/arch/m68k/math-emu/fp_arith.c
@@ -0,0 +1,701 @@
+/*
+
+ fp_arith.c: floating-point math routines for the Linux-m68k
+ floating point emulator.
+
+ Copyright (c) 1998-1999 David Huggins-Daines.
+
+ Somewhat based on the AlphaLinux floating point emulator, by David
+ Mosberger-Tang.
+
+ You may copy, modify, and redistribute this file under the terms of
+ the GNU General Public License, version 2, or any later version, at
+ your convenience.
+ */
+
+#include "fp_emu.h"
+#include "multi_arith.h"
+#include "fp_arith.h"
+
+const struct fp_ext fp_QNaN =
+{
+ .exp = 0x7fff,
+ .mant = { .m64 = ~0 }
+};
+
+const struct fp_ext fp_Inf =
+{
+ .exp = 0x7fff,
+};
+
+/* let's start with the easy ones */
+
+struct fp_ext *
+fp_fabs(struct fp_ext *dest, struct fp_ext *src)
+{
+ dprint(PINSTR, "fabs\n");
+
+ fp_monadic_check(dest, src);
+
+ dest->sign = 0;
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fneg(struct fp_ext *dest, struct fp_ext *src)
+{
+ dprint(PINSTR, "fneg\n");
+
+ fp_monadic_check(dest, src);
+
+ dest->sign = !dest->sign;
+
+ return dest;
+}
+
+/* Now, the slightly harder ones */
+
+/* fp_fadd: Implements the kernel of the FADD, FSADD, FDADD, FSUB,
+ FDSUB, and FCMP instructions. */
+
+struct fp_ext *
+fp_fadd(struct fp_ext *dest, struct fp_ext *src)
+{
+ int diff;
+
+ dprint(PINSTR, "fadd\n");
+
+ fp_dyadic_check(dest, src);
+
+ if (IS_INF(dest)) {
+ /* infinity - infinity == NaN */
+ if (IS_INF(src) && (src->sign != dest->sign))
+ fp_set_nan(dest);
+ return dest;
+ }
+ if (IS_INF(src)) {
+ fp_copy_ext(dest, src);
+ return dest;
+ }
+
+ if (IS_ZERO(dest)) {
+ if (IS_ZERO(src)) {
+ if (src->sign != dest->sign) {
+ if (FPDATA->rnd == FPCR_ROUND_RM)
+ dest->sign = 1;
+ else
+ dest->sign = 0;
+ }
+ } else
+ fp_copy_ext(dest, src);
+ return dest;
+ }
+
+ dest->lowmant = src->lowmant = 0;
+
+ if ((diff = dest->exp - src->exp) > 0)
+ fp_denormalize(src, diff);
+ else if ((diff = -diff) > 0)
+ fp_denormalize(dest, diff);
+
+ if (dest->sign == src->sign) {
+ if (fp_addmant(dest, src))
+ if (!fp_addcarry(dest))
+ return dest;
+ } else {
+ if (dest->mant.m64 < src->mant.m64) {
+ fp_submant(dest, src, dest);
+ dest->sign = !dest->sign;
+ } else
+ fp_submant(dest, dest, src);
+ }
+
+ return dest;
+}
+
+/* fp_fsub: Implements the kernel of the FSUB, FSSUB, and FDSUB
+ instructions.
+
+ Remember that the arguments are in assembler-syntax order! */
+
+struct fp_ext *
+fp_fsub(struct fp_ext *dest, struct fp_ext *src)
+{
+ dprint(PINSTR, "fsub ");
+
+ src->sign = !src->sign;
+ return fp_fadd(dest, src);
+}
+
+
+struct fp_ext *
+fp_fcmp(struct fp_ext *dest, struct fp_ext *src)
+{
+ dprint(PINSTR, "fcmp ");
+
+ FPDATA->temp[1] = *dest;
+ src->sign = !src->sign;
+ return fp_fadd(&FPDATA->temp[1], src);
+}
+
+struct fp_ext *
+fp_ftst(struct fp_ext *dest, struct fp_ext *src)
+{
+ dprint(PINSTR, "ftst\n");
+
+ (void)dest;
+
+ return src;
+}
+
+struct fp_ext *
+fp_fmul(struct fp_ext *dest, struct fp_ext *src)
+{
+ union fp_mant128 temp;
+ int exp;
+
+ dprint(PINSTR, "fmul\n");
+
+ fp_dyadic_check(dest, src);
+
+ /* calculate the correct sign now, as it's necessary for infinities */
+ dest->sign = src->sign ^ dest->sign;
+
+ /* Handle infinities */
+ if (IS_INF(dest)) {
+ if (IS_ZERO(src))
+ fp_set_nan(dest);
+ return dest;
+ }
+ if (IS_INF(src)) {
+ if (IS_ZERO(dest))
+ fp_set_nan(dest);
+ else
+ fp_copy_ext(dest, src);
+ return dest;
+ }
+
+ /* Of course, as we all know, zero * anything = zero. You may
+ not have known that it might be a positive or negative
+ zero... */
+ if (IS_ZERO(dest) || IS_ZERO(src)) {
+ dest->exp = 0;
+ dest->mant.m64 = 0;
+ dest->lowmant = 0;
+
+ return dest;
+ }
+
+ exp = dest->exp + src->exp - 0x3ffe;
+
+ /* shift up the mantissa for denormalized numbers,
+ so that the highest bit is set, this makes the
+ shift of the result below easier */
+ if ((long)dest->mant.m32[0] >= 0)
+ exp -= fp_overnormalize(dest);
+ if ((long)src->mant.m32[0] >= 0)
+ exp -= fp_overnormalize(src);
+
+ /* now, do a 64-bit multiply with expansion */
+ fp_multiplymant(&temp, dest, src);
+
+ /* normalize it back to 64 bits and stuff it back into the
+ destination struct */
+ if ((long)temp.m32[0] > 0) {
+ exp--;
+ fp_putmant128(dest, &temp, 1);
+ } else
+ fp_putmant128(dest, &temp, 0);
+
+ if (exp >= 0x7fff) {
+ fp_set_ovrflw(dest);
+ return dest;
+ }
+ dest->exp = exp;
+ if (exp < 0) {
+ fp_set_sr(FPSR_EXC_UNFL);
+ fp_denormalize(dest, -exp);
+ }
+
+ return dest;
+}
+
+/* fp_fdiv: Implements the "kernel" of the FDIV, FSDIV, FDDIV and
+ FSGLDIV instructions.
+
+ Note that the order of the operands is counter-intuitive: instead
+ of src / dest, the result is actually dest / src. */
+
+struct fp_ext *
+fp_fdiv(struct fp_ext *dest, struct fp_ext *src)
+{
+ union fp_mant128 temp;
+ int exp;
+
+ dprint(PINSTR, "fdiv\n");
+
+ fp_dyadic_check(dest, src);
+
+ /* calculate the correct sign now, as it's necessary for infinities */
+ dest->sign = src->sign ^ dest->sign;
+
+ /* Handle infinities */
+ if (IS_INF(dest)) {
+ /* infinity / infinity = NaN (quiet, as always) */
+ if (IS_INF(src))
+ fp_set_nan(dest);
+ /* infinity / anything else = infinity (with approprate sign) */
+ return dest;
+ }
+ if (IS_INF(src)) {
+ /* anything / infinity = zero (with appropriate sign) */
+ dest->exp = 0;
+ dest->mant.m64 = 0;
+ dest->lowmant = 0;
+
+ return dest;
+ }
+
+ /* zeroes */
+ if (IS_ZERO(dest)) {
+ /* zero / zero = NaN */
+ if (IS_ZERO(src))
+ fp_set_nan(dest);
+ /* zero / anything else = zero */
+ return dest;
+ }
+ if (IS_ZERO(src)) {
+ /* anything / zero = infinity (with appropriate sign) */
+ fp_set_sr(FPSR_EXC_DZ);
+ dest->exp = 0x7fff;
+ dest->mant.m64 = 0;
+
+ return dest;
+ }
+
+ exp = dest->exp - src->exp + 0x3fff;
+
+ /* shift up the mantissa for denormalized numbers,
+ so that the highest bit is set, this makes lots
+ of things below easier */
+ if ((long)dest->mant.m32[0] >= 0)
+ exp -= fp_overnormalize(dest);
+ if ((long)src->mant.m32[0] >= 0)
+ exp -= fp_overnormalize(src);
+
+ /* now, do the 64-bit divide */
+ fp_dividemant(&temp, dest, src);
+
+ /* normalize it back to 64 bits and stuff it back into the
+ destination struct */
+ if (!temp.m32[0]) {
+ exp--;
+ fp_putmant128(dest, &temp, 32);
+ } else
+ fp_putmant128(dest, &temp, 31);
+
+ if (exp >= 0x7fff) {
+ fp_set_ovrflw(dest);
+ return dest;
+ }
+ dest->exp = exp;
+ if (exp < 0) {
+ fp_set_sr(FPSR_EXC_UNFL);
+ fp_denormalize(dest, -exp);
+ }
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fsglmul(struct fp_ext *dest, struct fp_ext *src)
+{
+ int exp;
+
+ dprint(PINSTR, "fsglmul\n");
+
+ fp_dyadic_check(dest, src);
+
+ /* calculate the correct sign now, as it's necessary for infinities */
+ dest->sign = src->sign ^ dest->sign;
+
+ /* Handle infinities */
+ if (IS_INF(dest)) {
+ if (IS_ZERO(src))
+ fp_set_nan(dest);
+ return dest;
+ }
+ if (IS_INF(src)) {
+ if (IS_ZERO(dest))
+ fp_set_nan(dest);
+ else
+ fp_copy_ext(dest, src);
+ return dest;
+ }
+
+ /* Of course, as we all know, zero * anything = zero. You may
+ not have known that it might be a positive or negative
+ zero... */
+ if (IS_ZERO(dest) || IS_ZERO(src)) {
+ dest->exp = 0;
+ dest->mant.m64 = 0;
+ dest->lowmant = 0;
+
+ return dest;
+ }
+
+ exp = dest->exp + src->exp - 0x3ffe;
+
+ /* do a 32-bit multiply */
+ fp_mul64(dest->mant.m32[0], dest->mant.m32[1],
+ dest->mant.m32[0] & 0xffffff00,
+ src->mant.m32[0] & 0xffffff00);
+
+ if (exp >= 0x7fff) {
+ fp_set_ovrflw(dest);
+ return dest;
+ }
+ dest->exp = exp;
+ if (exp < 0) {
+ fp_set_sr(FPSR_EXC_UNFL);
+ fp_denormalize(dest, -exp);
+ }
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fsgldiv(struct fp_ext *dest, struct fp_ext *src)
+{
+ int exp;
+ unsigned long quot, rem;
+
+ dprint(PINSTR, "fsgldiv\n");
+
+ fp_dyadic_check(dest, src);
+
+ /* calculate the correct sign now, as it's necessary for infinities */
+ dest->sign = src->sign ^ dest->sign;
+
+ /* Handle infinities */
+ if (IS_INF(dest)) {
+ /* infinity / infinity = NaN (quiet, as always) */
+ if (IS_INF(src))
+ fp_set_nan(dest);
+ /* infinity / anything else = infinity (with approprate sign) */
+ return dest;
+ }
+ if (IS_INF(src)) {
+ /* anything / infinity = zero (with appropriate sign) */
+ dest->exp = 0;
+ dest->mant.m64 = 0;
+ dest->lowmant = 0;
+
+ return dest;
+ }
+
+ /* zeroes */
+ if (IS_ZERO(dest)) {
+ /* zero / zero = NaN */
+ if (IS_ZERO(src))
+ fp_set_nan(dest);
+ /* zero / anything else = zero */
+ return dest;
+ }
+ if (IS_ZERO(src)) {
+ /* anything / zero = infinity (with appropriate sign) */
+ fp_set_sr(FPSR_EXC_DZ);
+ dest->exp = 0x7fff;
+ dest->mant.m64 = 0;
+
+ return dest;
+ }
+
+ exp = dest->exp - src->exp + 0x3fff;
+
+ dest->mant.m32[0] &= 0xffffff00;
+ src->mant.m32[0] &= 0xffffff00;
+
+ /* do the 32-bit divide */
+ if (dest->mant.m32[0] >= src->mant.m32[0]) {
+ fp_sub64(dest->mant, src->mant);
+ fp_div64(quot, rem, dest->mant.m32[0], 0, src->mant.m32[0]);
+ dest->mant.m32[0] = 0x80000000 | (quot >> 1);
+ dest->mant.m32[1] = (quot & 1) | rem; /* only for rounding */
+ } else {
+ fp_div64(quot, rem, dest->mant.m32[0], 0, src->mant.m32[0]);
+ dest->mant.m32[0] = quot;
+ dest->mant.m32[1] = rem; /* only for rounding */
+ exp--;
+ }
+
+ if (exp >= 0x7fff) {
+ fp_set_ovrflw(dest);
+ return dest;
+ }
+ dest->exp = exp;
+ if (exp < 0) {
+ fp_set_sr(FPSR_EXC_UNFL);
+ fp_denormalize(dest, -exp);
+ }
+
+ return dest;
+}
+
+/* fp_roundint: Internal rounding function for use by several of these
+ emulated instructions.
+
+ This one rounds off the fractional part using the rounding mode
+ specified. */
+
+static void fp_roundint(struct fp_ext *dest, int mode)
+{
+ union fp_mant64 oldmant;
+ unsigned long mask;
+
+ if (!fp_normalize_ext(dest))
+ return;
+
+ /* infinities and zeroes */
+ if (IS_INF(dest) || IS_ZERO(dest))
+ return;
+
+ /* first truncate the lower bits */
+ oldmant = dest->mant;
+ switch (dest->exp) {
+ case 0 ... 0x3ffe:
+ dest->mant.m64 = 0;
+ break;
+ case 0x3fff ... 0x401e:
+ dest->mant.m32[0] &= 0xffffffffU << (0x401e - dest->exp);
+ dest->mant.m32[1] = 0;
+ if (oldmant.m64 == dest->mant.m64)
+ return;
+ break;
+ case 0x401f ... 0x403e:
+ dest->mant.m32[1] &= 0xffffffffU << (0x403e - dest->exp);
+ if (oldmant.m32[1] == dest->mant.m32[1])
+ return;
+ break;
+ default:
+ return;
+ }
+ fp_set_sr(FPSR_EXC_INEX2);
+
+ /* We might want to normalize upwards here... however, since
+ we know that this is only called on the output of fp_fdiv,
+ or with the input to fp_fint or fp_fintrz, and the inputs
+ to all these functions are either normal or denormalized
+ (no subnormals allowed!), there's really no need.
+
+ In the case of fp_fdiv, observe that 0x80000000 / 0xffff =
+ 0xffff8000, and the same holds for 128-bit / 64-bit. (i.e. the
+ smallest possible normal dividend and the largest possible normal
+ divisor will still produce a normal quotient, therefore, (normal
+ << 64) / normal is normal in all cases) */
+
+ switch (mode) {
+ case FPCR_ROUND_RN:
+ switch (dest->exp) {
+ case 0 ... 0x3ffd:
+ return;
+ case 0x3ffe:
+ /* As noted above, the input is always normal, so the
+ guard bit (bit 63) is always set. therefore, the
+ only case in which we will NOT round to 1.0 is when
+ the input is exactly 0.5. */
+ if (oldmant.m64 == (1ULL << 63))
+ return;
+ break;
+ case 0x3fff ... 0x401d:
+ mask = 1 << (0x401d - dest->exp);
+ if (!(oldmant.m32[0] & mask))
+ return;
+ if (oldmant.m32[0] & (mask << 1))
+ break;
+ if (!(oldmant.m32[0] << (dest->exp - 0x3ffd)) &&
+ !oldmant.m32[1])
+ return;
+ break;
+ case 0x401e:
+ if (!(oldmant.m32[1] >= 0))
+ return;
+ if (oldmant.m32[0] & 1)
+ break;
+ if (!(oldmant.m32[1] << 1))
+ return;
+ break;
+ case 0x401f ... 0x403d:
+ mask = 1 << (0x403d - dest->exp);
+ if (!(oldmant.m32[1] & mask))
+ return;
+ if (oldmant.m32[1] & (mask << 1))
+ break;
+ if (!(oldmant.m32[1] << (dest->exp - 0x401d)))
+ return;
+ break;
+ default:
+ return;
+ }
+ break;
+ case FPCR_ROUND_RZ:
+ return;
+ default:
+ if (dest->sign ^ (mode - FPCR_ROUND_RM))
+ break;
+ return;
+ }
+
+ switch (dest->exp) {
+ case 0 ... 0x3ffe:
+ dest->exp = 0x3fff;
+ dest->mant.m64 = 1ULL << 63;
+ break;
+ case 0x3fff ... 0x401e:
+ mask = 1 << (0x401e - dest->exp);
+ if (dest->mant.m32[0] += mask)
+ break;
+ dest->mant.m32[0] = 0x80000000;
+ dest->exp++;
+ break;
+ case 0x401f ... 0x403e:
+ mask = 1 << (0x403e - dest->exp);
+ if (dest->mant.m32[1] += mask)
+ break;
+ if (dest->mant.m32[0] += 1)
+ break;
+ dest->mant.m32[0] = 0x80000000;
+ dest->exp++;
+ break;
+ }
+}
+
+/* modrem_kernel: Implementation of the FREM and FMOD instructions
+ (which are exactly the same, except for the rounding used on the
+ intermediate value) */
+
+static struct fp_ext *
+modrem_kernel(struct fp_ext *dest, struct fp_ext *src, int mode)
+{
+ struct fp_ext tmp;
+
+ fp_dyadic_check(dest, src);
+
+ /* Infinities and zeros */
+ if (IS_INF(dest) || IS_ZERO(src)) {
+ fp_set_nan(dest);
+ return dest;
+ }
+ if (IS_ZERO(dest) || IS_INF(src))
+ return dest;
+
+ /* FIXME: there is almost certainly a smarter way to do this */
+ fp_copy_ext(&tmp, dest);
+ fp_fdiv(&tmp, src); /* NOTE: src might be modified */
+ fp_roundint(&tmp, mode);
+ fp_fmul(&tmp, src);
+ fp_fsub(dest, &tmp);
+
+ /* set the quotient byte */
+ fp_set_quotient((dest->mant.m64 & 0x7f) | (dest->sign << 7));
+ return dest;
+}
+
+/* fp_fmod: Implements the kernel of the FMOD instruction.
+
+ Again, the argument order is backwards. The result, as defined in
+ the Motorola manuals, is:
+
+ fmod(src,dest) = (dest - (src * floor(dest / src))) */
+
+struct fp_ext *
+fp_fmod(struct fp_ext *dest, struct fp_ext *src)
+{
+ dprint(PINSTR, "fmod\n");
+ return modrem_kernel(dest, src, FPCR_ROUND_RZ);
+}
+
+/* fp_frem: Implements the kernel of the FREM instruction.
+
+ frem(src,dest) = (dest - (src * round(dest / src)))
+ */
+
+struct fp_ext *
+fp_frem(struct fp_ext *dest, struct fp_ext *src)
+{
+ dprint(PINSTR, "frem\n");
+ return modrem_kernel(dest, src, FPCR_ROUND_RN);
+}
+
+struct fp_ext *
+fp_fint(struct fp_ext *dest, struct fp_ext *src)
+{
+ dprint(PINSTR, "fint\n");
+
+ fp_copy_ext(dest, src);
+
+ fp_roundint(dest, FPDATA->rnd);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fintrz(struct fp_ext *dest, struct fp_ext *src)
+{
+ dprint(PINSTR, "fintrz\n");
+
+ fp_copy_ext(dest, src);
+
+ fp_roundint(dest, FPCR_ROUND_RZ);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fscale(struct fp_ext *dest, struct fp_ext *src)
+{
+ int scale, oldround;
+
+ dprint(PINSTR, "fscale\n");
+
+ fp_dyadic_check(dest, src);
+
+ /* Infinities */
+ if (IS_INF(src)) {
+ fp_set_nan(dest);
+ return dest;
+ }
+ if (IS_INF(dest))
+ return dest;
+
+ /* zeroes */
+ if (IS_ZERO(src) || IS_ZERO(dest))
+ return dest;
+
+ /* Source exponent out of range */
+ if (src->exp >= 0x400c) {
+ fp_set_ovrflw(dest);
+ return dest;
+ }
+
+ /* src must be rounded with round to zero. */
+ oldround = FPDATA->rnd;
+ FPDATA->rnd = FPCR_ROUND_RZ;
+ scale = fp_conv_ext2long(src);
+ FPDATA->rnd = oldround;
+
+ /* new exponent */
+ scale += dest->exp;
+
+ if (scale >= 0x7fff) {
+ fp_set_ovrflw(dest);
+ } else if (scale <= 0) {
+ fp_set_sr(FPSR_EXC_UNFL);
+ fp_denormalize(dest, -scale);
+ } else
+ dest->exp = scale;
+
+ return dest;
+}
+
diff --git a/arch/m68k/math-emu/fp_arith.h b/arch/m68k/math-emu/fp_arith.h
new file mode 100644
index 000000000000..2cc3f846c393
--- /dev/null
+++ b/arch/m68k/math-emu/fp_arith.h
@@ -0,0 +1,52 @@
+/*
+
+ fp_arith.h: floating-point math routines for the Linux-m68k
+ floating point emulator.
+
+ Copyright (c) 1998 David Huggins-Daines.
+
+ Somewhat based on the AlphaLinux floating point emulator, by David
+ Mosberger-Tang.
+
+ You may copy, modify, and redistribute this file under the terms of
+ the GNU General Public License, version 2, or any later version, at
+ your convenience.
+
+ */
+
+#ifndef FP_ARITH_H
+#define FP_ARITH_H
+
+/* easy ones */
+struct fp_ext *
+fp_fabs(struct fp_ext *dest, struct fp_ext *src);
+struct fp_ext *
+fp_fneg(struct fp_ext *dest, struct fp_ext *src);
+
+/* straightforward arithmetic */
+struct fp_ext *
+fp_fadd(struct fp_ext *dest, struct fp_ext *src);
+struct fp_ext *
+fp_fsub(struct fp_ext *dest, struct fp_ext *src);
+struct fp_ext *
+fp_fcmp(struct fp_ext *dest, struct fp_ext *src);
+struct fp_ext *
+fp_ftst(struct fp_ext *dest, struct fp_ext *src);
+struct fp_ext *
+fp_fmul(struct fp_ext *dest, struct fp_ext *src);
+struct fp_ext *
+fp_fdiv(struct fp_ext *dest, struct fp_ext *src);
+
+/* ones that do rounding and integer conversions */
+struct fp_ext *
+fp_fmod(struct fp_ext *dest, struct fp_ext *src);
+struct fp_ext *
+fp_frem(struct fp_ext *dest, struct fp_ext *src);
+struct fp_ext *
+fp_fint(struct fp_ext *dest, struct fp_ext *src);
+struct fp_ext *
+fp_fintrz(struct fp_ext *dest, struct fp_ext *src);
+struct fp_ext *
+fp_fscale(struct fp_ext *dest, struct fp_ext *src);
+
+#endif /* FP_ARITH__H */
diff --git a/arch/m68k/math-emu/fp_cond.S b/arch/m68k/math-emu/fp_cond.S
new file mode 100644
index 000000000000..ddae8b1b8b83
--- /dev/null
+++ b/arch/m68k/math-emu/fp_cond.S
@@ -0,0 +1,334 @@
+/*
+ * fp_cond.S
+ *
+ * Copyright Roman Zippel, 1997. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * ALTERNATIVELY, this product may be distributed under the terms of
+ * the GNU General Public License, in which case the provisions of the GPL are
+ * required INSTEAD OF the above restrictions. (This clause is
+ * necessary due to a potential bad interaction between the GPL and
+ * the restrictions contained in a BSD-style copyright.)
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "fp_emu.h"
+#include "fp_decode.h"
+
+ .globl fp_fscc, fp_fbccw, fp_fbccl
+
+#ifdef FPU_EMU_DEBUG
+fp_fnop:
+ printf PDECODE,"fnop\n"
+ jra fp_end
+#else
+#define fp_fnop fp_end
+#endif
+
+fp_fbccw:
+ tst.w %d2
+ jeq fp_fnop
+ printf PDECODE,"fbccw "
+ fp_get_pc %a0
+ lea (-2,%a0,%d2.w),%a0
+ jra 1f
+
+fp_fbccl:
+ printf PDECODE,"fbccl "
+ fp_get_pc %a0
+ move.l %d2,%d0
+ swap %d0
+ fp_get_instr_word %d0,fp_err_ua1
+ lea (-2,%a0,%d0.l),%a0
+1: printf PDECODE,"%x",1,%a0
+ move.l %d2,%d0
+ swap %d0
+ jsr fp_compute_cond
+ tst.l %d0
+ jeq 1f
+ fp_put_pc %a0,1
+1: printf PDECODE,"\n"
+ jra fp_end
+
+fp_fdbcc:
+ printf PDECODE,"fdbcc "
+ fp_get_pc %a1 | calculate new pc
+ fp_get_instr_word %d0,fp_err_ua1
+ add.w %d0,%a1
+ fp_decode_addr_reg
+ printf PDECODE,"d%d,%x\n",2,%d0,%a1
+ swap %d1 | test condition in %d1
+ tst.w %d1
+ jne 2f
+ move.l %d0,%d1
+ jsr fp_get_data_reg
+ subq.w #1,%d0
+ jcs 1f
+ fp_put_pc %a1,1
+1: jsr fp_put_data_reg
+2: jra fp_end
+
+| set flags for decode macros for fs<cc>
+do_fscc=1
+do_no_pc_mode=1
+
+fp_fscc:
+ printf PDECODE,"fscc "
+ move.l %d2,%d0
+ jsr fp_compute_cond
+ move.w %d0,%d1
+ swap %d1
+
+ | decode addressing mode
+ fp_decode_addr_mode
+
+ .long fp_data, fp_fdbcc
+ .long fp_indirect, fp_postinc
+ .long fp_predecr, fp_disp16
+ .long fp_extmode0, fp_extmode1
+
+ | addressing mode: data register direct
+fp_data:
+ fp_mode_data_direct
+ move.w %d0,%d1 | save register nr
+ jsr fp_get_data_reg
+ swap %d1
+ move.b %d1,%d0
+ swap %d1
+ jsr fp_put_data_reg
+ printf PDECODE,"\n"
+ jra fp_end
+
+fp_indirect:
+ fp_mode_addr_indirect
+ jra fp_do_scc
+
+fp_postinc:
+ fp_mode_addr_indirect_postinc
+ jra fp_do_scc
+
+fp_predecr:
+ fp_mode_addr_indirect_predec
+ jra fp_do_scc
+
+fp_disp16:
+ fp_mode_addr_indirect_disp16
+ jra fp_do_scc
+
+fp_extmode0:
+ fp_mode_addr_indirect_extmode0
+ jra fp_do_scc
+
+fp_extmode1:
+ bfextu %d2{#13,#3},%d0
+ jmp ([0f:w,%pc,%d0*4])
+
+ .align 4
+0:
+ .long fp_absolute_short, fp_absolute_long
+ .long fp_ill, fp_ill | NOTE: jump here to ftrap.x
+ .long fp_ill, fp_ill
+ .long fp_ill, fp_ill
+
+fp_absolute_short:
+ fp_mode_abs_short
+ jra fp_do_scc
+
+fp_absolute_long:
+ fp_mode_abs_long
+| jra fp_do_scc
+
+fp_do_scc:
+ swap %d1
+ putuser.b %d1,(%a0),fp_err_ua1,%a0
+ printf PDECODE,"\n"
+ jra fp_end
+
+
+#define tst_NAN btst #24,%d1
+#define tst_Z btst #26,%d1
+#define tst_N btst #27,%d1
+
+fp_compute_cond:
+ move.l (FPD_FPSR,FPDATA),%d1
+ btst #4,%d0
+ jeq 1f
+ tst_NAN
+ jeq 1f
+ bset #15,%d1
+ bset #7,%d1
+ move.l %d1,(FPD_FPSR,FPDATA)
+1: and.w #0xf,%d0
+ jmp ([0f:w,%pc,%d0.w*4])
+
+ .align 4
+0:
+ .long fp_f , fp_eq , fp_ogt, fp_oge
+ .long fp_olt, fp_ole, fp_ogl, fp_or
+ .long fp_un , fp_ueq, fp_ugt, fp_uge
+ .long fp_ult, fp_ule, fp_ne , fp_t
+
+fp_f:
+ moveq #0,%d0
+ rts
+
+fp_eq:
+ moveq #0,%d0
+ tst_Z
+ jeq 1f
+ moveq #-1,%d0
+1: rts
+
+fp_ogt:
+ moveq #0,%d0
+ tst_NAN
+ jne 1f
+ tst_Z
+ jne 1f
+ tst_N
+ jne 1f
+ moveq #-1,%d0
+1: rts
+
+fp_oge:
+ moveq #-1,%d0
+ tst_Z
+ jne 2f
+ tst_NAN
+ jne 1f
+ tst_N
+ jeq 2f
+1: moveq #0,%d0
+2: rts
+
+fp_olt:
+ moveq #0,%d0
+ tst_NAN
+ jne 1f
+ tst_Z
+ jne 1f
+ tst_N
+ jeq 1f
+ moveq #-1,%d0
+1: rts
+
+fp_ole:
+ moveq #-1,%d0
+ tst_Z
+ jne 2f
+ tst_NAN
+ jne 1f
+ tst_N
+ jne 2f
+1: moveq #0,%d0
+2: rts
+
+fp_ogl:
+ moveq #0,%d0
+ tst_NAN
+ jne 1f
+ tst_Z
+ jne 1f
+ moveq #-1,%d0
+1: rts
+
+fp_or:
+ moveq #0,%d0
+ tst_NAN
+ jne 1f
+ moveq #-1,%d0
+1: rts
+
+fp_un:
+ moveq #0,%d0
+ tst_NAN
+ jeq 1f
+ moveq #-1,%d0
+ rts
+
+fp_ueq:
+ moveq #-1,%d0
+ tst_NAN
+ jne 1f
+ tst_Z
+ jne 1f
+ moveq #0,%d0
+1: rts
+
+fp_ugt:
+ moveq #-1,%d0
+ tst_NAN
+ jne 2f
+ tst_N
+ jne 1f
+ tst_Z
+ jeq 2f
+1: moveq #0,%d0
+2: rts
+
+fp_uge:
+ moveq #-1,%d0
+ tst_NAN
+ jne 1f
+ tst_Z
+ jne 1f
+ tst_N
+ jeq 1f
+ moveq #0,%d0
+1: rts
+
+fp_ult:
+ moveq #-1,%d0
+ tst_NAN
+ jne 2f
+ tst_Z
+ jne 1f
+ tst_N
+ jne 2f
+1: moveq #0,%d0
+2: rts
+
+fp_ule:
+ moveq #-1,%d0
+ tst_NAN
+ jne 1f
+ tst_Z
+ jne 1f
+ tst_N
+ jne 1f
+ moveq #0,%d0
+1: rts
+
+fp_ne:
+ moveq #0,%d0
+ tst_Z
+ jne 1f
+ moveq #-1,%d0
+1: rts
+
+fp_t:
+ moveq #-1,%d0
+ rts
diff --git a/arch/m68k/math-emu/fp_decode.h b/arch/m68k/math-emu/fp_decode.h
new file mode 100644
index 000000000000..759679d9ab96
--- /dev/null
+++ b/arch/m68k/math-emu/fp_decode.h
@@ -0,0 +1,417 @@
+/*
+ * fp_decode.h
+ *
+ * Copyright Roman Zippel, 1997. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * ALTERNATIVELY, this product may be distributed under the terms of
+ * the GNU General Public License, in which case the provisions of the GPL are
+ * required INSTEAD OF the above restrictions. (This clause is
+ * necessary due to a potential bad interaction between the GPL and
+ * the restrictions contained in a BSD-style copyright.)
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _FP_DECODE_H
+#define _FP_DECODE_H
+
+/* These macros do the dirty work of the instr decoding, several variables
+ * can be defined in the source file to modify the work of these macros,
+ * currently the following variables are used:
+ * ...
+ * The register usage:
+ * d0 - will contain source operand for data direct mode,
+ * otherwise scratch register
+ * d1 - upper 16bit are reserved for caller
+ * lower 16bit may contain further arguments,
+ * is destroyed during decoding
+ * d2 - contains first two instruction words,
+ * first word will be used for extension word
+ * a0 - will point to source/dest operand for any indirect mode
+ * otherwise scratch register
+ * a1 - scratch register
+ * a2 - base addr to the task structure
+ *
+ * the current implementation doesn't check for every disallowed
+ * addressing mode (e.g. pc relative modes as destination), as long
+ * as it only means a new addressing mode, which should not appear
+ * in a program and that doesn't crash the emulation, I think it's
+ * not a problem to allow these modes.
+ */
+
+do_fmovem=0
+do_fmovem_cr=0
+do_no_pc_mode=0
+do_fscc=0
+
+| first decoding of the instr type
+| this separates the conditional instr
+.macro fp_decode_cond_instr_type
+ bfextu %d2{#8,#2},%d0
+ jmp ([0f:w,%pc,%d0*4])
+
+ .align 4
+0:
+| .long "f<op>","fscc/fdbcc"
+| .long "fbccw","fbccl"
+.endm
+
+| second decoding of the instr type
+| this separates most move instr
+.macro fp_decode_move_instr_type
+ bfextu %d2{#16,#3},%d0
+ jmp ([0f:w,%pc,%d0*4])
+
+ .align 4
+0:
+| .long "f<op> fpx,fpx","invalid instr"
+| .long "f<op> <ea>,fpx","fmove fpx,<ea>"
+| .long "fmovem <ea>,fpcr","fmovem <ea>,fpx"
+| .long "fmovem fpcr,<ea>","fmovem fpx,<ea>"
+.endm
+
+| extract the source specifier, specifies
+| either source fp register or data format
+.macro fp_decode_sourcespec
+ bfextu %d2{#19,#3},%d0
+.endm
+
+| decode destination format for fmove reg,ea
+.macro fp_decode_dest_format
+ bfextu %d2{#19,#3},%d0
+.endm
+
+| decode source register for fmove reg,ea
+.macro fp_decode_src_reg
+ bfextu %d2{#22,#3},%d0
+.endm
+
+| extract the addressing mode
+| it depends on the instr which of the modes is valid
+.macro fp_decode_addr_mode
+ bfextu %d2{#10,#3},%d0
+ jmp ([0f:w,%pc,%d0*4])
+
+ .align 4
+0:
+| .long "data register direct","addr register direct"
+| .long "addr register indirect"
+| .long "addr register indirect postincrement"
+| .long "addr register indirect predecrement"
+| .long "addr register + index16"
+| .long "extension mode1","extension mode2"
+.endm
+
+| extract the register for the addressing mode
+.macro fp_decode_addr_reg
+ bfextu %d2{#13,#3},%d0
+.endm
+
+| decode the 8bit diplacement from the brief extension word
+.macro fp_decode_disp8
+ move.b %d2,%d0
+ ext.w %d0
+.endm
+
+| decode the index of the brief/full extension word
+.macro fp_decode_index
+ bfextu %d2{#17,#3},%d0 | get the register nr
+ btst #15,%d2 | test for data/addr register
+ jne 1\@f
+ printf PDECODE,"d%d",1,%d0
+ jsr fp_get_data_reg
+ jra 2\@f
+1\@: printf PDECODE,"a%d",1,%d0
+ jsr fp_get_addr_reg
+ move.l %a0,%d0
+2\@:
+debug lea "'l'.w,%a0"
+ btst #11,%d2 | 16/32 bit size?
+ jne 3\@f
+debug lea "'w'.w,%a0"
+ ext.l %d0
+3\@: printf PDECODE,":%c",1,%a0
+ move.w %d2,%d1 | scale factor
+ rol.w #7,%d1
+ and.w #3,%d1
+debug move.l "%d1,-(%sp)"
+debug ext.l "%d1"
+ printf PDECODE,":%d",1,%d1
+debug move.l "(%sp)+,%d1"
+ lsl.l %d1,%d0
+.endm
+
+| decode the base displacement size
+.macro fp_decode_basedisp
+ bfextu %d2{#26,#2},%d0
+ jmp ([0f:w,%pc,%d0*4])
+
+ .align 4
+0:
+| .long "reserved","null displacement"
+| .long "word displacement","long displacement"
+.endm
+
+.macro fp_decode_outerdisp
+ bfextu %d2{#30,#2},%d0
+ jmp ([0f:w,%pc,%d0*4])
+
+ .align 4
+0:
+| .long "no memory indirect action/reserved","null outer displacement"
+| .long "word outer displacement","long outer displacement"
+.endm
+
+| get the extension word and test for brief or full extension type
+.macro fp_get_test_extword label
+ fp_get_instr_word %d2,fp_err_ua1
+ btst #8,%d2
+ jne \label
+.endm
+
+
+| test if %pc is the base register for the indirect addr mode
+.macro fp_test_basereg_d16 label
+ btst #20,%d2
+ jeq \label
+.endm
+
+| test if %pc is the base register for one of the extended modes
+.macro fp_test_basereg_ext label
+ btst #19,%d2
+ jeq \label
+.endm
+
+.macro fp_test_suppr_index label
+ btst #6,%d2
+ jne \label
+.endm
+
+
+| addressing mode: data register direct
+.macro fp_mode_data_direct
+ fp_decode_addr_reg
+ printf PDECODE,"d%d",1,%d0
+.endm
+
+| addressing mode: address register indirect
+.macro fp_mode_addr_indirect
+ fp_decode_addr_reg
+ printf PDECODE,"(a%d)",1,%d0
+ jsr fp_get_addr_reg
+.endm
+
+| adjust stack for byte moves from/to stack
+.macro fp_test_sp_byte_move
+ .if !do_fmovem
+ .if do_fscc
+ move.w #6,%d1
+ .endif
+ cmp.w #7,%d0
+ jne 1\@f
+ .if !do_fscc
+ cmp.w #6,%d1
+ jne 1\@f
+ .endif
+ move.w #4,%d1
+1\@:
+ .endif
+.endm
+
+| addressing mode: address register indirect with postincrement
+.macro fp_mode_addr_indirect_postinc
+ fp_decode_addr_reg
+ printf PDECODE,"(a%d)+",1,%d0
+ fp_test_sp_byte_move
+ jsr fp_get_addr_reg
+ move.l %a0,%a1 | save addr
+ .if do_fmovem
+ lea (%a0,%d1.w*4),%a0
+ .if !do_fmovem_cr
+ lea (%a0,%d1.w*8),%a0
+ .endif
+ .else
+ add.w (fp_datasize,%d1.w*2),%a0
+ .endif
+ jsr fp_put_addr_reg
+ move.l %a1,%a0
+.endm
+
+| addressing mode: address register indirect with predecrement
+.macro fp_mode_addr_indirect_predec
+ fp_decode_addr_reg
+ printf PDECODE,"-(a%d)",1,%d0
+ fp_test_sp_byte_move
+ jsr fp_get_addr_reg
+ .if do_fmovem
+ .if !do_fmovem_cr
+ lea (-12,%a0),%a1 | setup to addr of 1st reg to move
+ neg.w %d1
+ lea (%a0,%d1.w*4),%a0
+ add.w %d1,%d1
+ lea (%a0,%d1.w*4),%a0
+ jsr fp_put_addr_reg
+ move.l %a1,%a0
+ .else
+ neg.w %d1
+ lea (%a0,%d1.w*4),%a0
+ jsr fp_put_addr_reg
+ .endif
+ .else
+ sub.w (fp_datasize,%d1.w*2),%a0
+ jsr fp_put_addr_reg
+ .endif
+.endm
+
+| addressing mode: address register/programm counter indirect
+| with 16bit displacement
+.macro fp_mode_addr_indirect_disp16
+ .if !do_no_pc_mode
+ fp_test_basereg_d16 1f
+ printf PDECODE,"pc"
+ fp_get_pc %a0
+ jra 2f
+ .endif
+1: fp_decode_addr_reg
+ printf PDECODE,"a%d",1,%d0
+ jsr fp_get_addr_reg
+2: fp_get_instr_word %a1,fp_err_ua1
+ printf PDECODE,"@(%x)",1,%a1
+ add.l %a1,%a0
+.endm
+
+| perform preindex (if I/IS == 0xx and xx != 00)
+.macro fp_do_preindex
+ moveq #3,%d0
+ and.w %d2,%d0
+ jeq 1f
+ btst #2,%d2
+ jne 1f
+ printf PDECODE,")@("
+ getuser.l (%a1),%a1,fp_err_ua1,%a1
+debug jra "2f"
+1: printf PDECODE,","
+2:
+.endm
+
+| perform postindex (if I/IS == 1xx)
+.macro fp_do_postindex
+ btst #2,%d2
+ jeq 1f
+ printf PDECODE,")@("
+ getuser.l (%a1),%a1,fp_err_ua1,%a1
+debug jra "2f"
+1: printf PDECODE,","
+2:
+.endm
+
+| all other indirect addressing modes will finally end up here
+.macro fp_mode_addr_indirect_extmode0
+ .if !do_no_pc_mode
+ fp_test_basereg_ext 1f
+ printf PDECODE,"pc"
+ fp_get_pc %a0
+ jra 2f
+ .endif
+1: fp_decode_addr_reg
+ printf PDECODE,"a%d",1,%d0
+ jsr fp_get_addr_reg
+2: move.l %a0,%a1
+ swap %d2
+ fp_get_test_extword 3f
+ | addressing mode: address register/programm counter indirect
+ | with index and 8bit displacement
+ fp_decode_disp8
+debug ext.l "%d0"
+ printf PDECODE,"@(%x,",1,%d0
+ add.w %d0,%a1
+ fp_decode_index
+ add.l %d0,%a1
+ printf PDECODE,")"
+ jra 9f
+3: | addressing mode: address register/programm counter memory indirect
+ | with base and/or outer displacement
+ btst #7,%d2 | base register suppressed?
+ jeq 1f
+ printf PDECODE,"!"
+ sub.l %a1,%a1
+1: printf PDECODE,"@("
+ fp_decode_basedisp
+
+ .long fp_ill,1f
+ .long 2f,3f
+
+#ifdef FPU_EMU_DEBUG
+1: printf PDECODE,"0" | null base displacement
+ jra 1f
+#endif
+2: fp_get_instr_word %a0,fp_err_ua1 | 16bit base displacement
+ printf PDECODE,"%x:w",1,%a0
+ jra 4f
+3: fp_get_instr_long %a0,fp_err_ua1 | 32bit base displacement
+ printf PDECODE,"%x:l",1,%a0
+4: add.l %a0,%a1
+1:
+ fp_do_postindex
+ fp_test_suppr_index 1f
+ fp_decode_index
+ add.l %d0,%a1
+1: fp_do_preindex
+
+ fp_decode_outerdisp
+
+ .long 5f,1f
+ .long 2f,3f
+
+#ifdef FPU_EMU_DEBUG
+1: printf PDECODE,"0" | null outer displacement
+ jra 1f
+#endif
+2: fp_get_instr_word %a0,fp_err_ua1 | 16bit outer displacement
+ printf PDECODE,"%x:w",1,%a0
+ jra 4f
+3: fp_get_instr_long %a0,fp_err_ua1 | 32bit outer displacement
+ printf PDECODE,"%x:l",1,%a0
+4: add.l %a0,%a1
+1:
+5: printf PDECODE,")"
+9: move.l %a1,%a0
+ swap %d2
+.endm
+
+| get the absolute short address from user space
+.macro fp_mode_abs_short
+ fp_get_instr_word %a0,fp_err_ua1
+ printf PDECODE,"%x.w",1,%a0
+.endm
+
+| get the absolute long address from user space
+.macro fp_mode_abs_long
+ fp_get_instr_long %a0,fp_err_ua1
+ printf PDECODE,"%x.l",1,%a0
+.endm
+
+#endif /* _FP_DECODE_H */
diff --git a/arch/m68k/math-emu/fp_emu.h b/arch/m68k/math-emu/fp_emu.h
new file mode 100644
index 000000000000..1d6edc975d89
--- /dev/null
+++ b/arch/m68k/math-emu/fp_emu.h
@@ -0,0 +1,146 @@
+/*
+ * fp_emu.h
+ *
+ * Copyright Roman Zippel, 1997. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * ALTERNATIVELY, this product may be distributed under the terms of
+ * the GNU General Public License, in which case the provisions of the GPL are
+ * required INSTEAD OF the above restrictions. (This clause is
+ * necessary due to a potential bad interaction between the GPL and
+ * the restrictions contained in a BSD-style copyright.)
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _FP_EMU_H
+#define _FP_EMU_H
+
+#ifdef __ASSEMBLY__
+#include <asm/offsets.h>
+#endif
+#include <asm/math-emu.h>
+
+#ifndef __ASSEMBLY__
+
+#define IS_INF(a) ((a)->exp == 0x7fff)
+#define IS_ZERO(a) ((a)->mant.m64 == 0)
+
+
+#define fp_set_sr(bit) ({ \
+ FPDATA->fpsr |= 1 << (bit); \
+})
+
+#define fp_set_quotient(quotient) ({ \
+ FPDATA->fpsr &= 0xff00ffff; \
+ FPDATA->fpsr |= ((quotient) & 0xff) << 16; \
+})
+
+/* linkage for several useful functions */
+
+/* Normalize the extended struct, return 0 for a NaN */
+#define fp_normalize_ext(fpreg) ({ \
+ register struct fp_ext *reg asm ("a0") = fpreg; \
+ register int res asm ("d0"); \
+ \
+ asm volatile ("jsr fp_conv_ext2ext" \
+ : "=d" (res) : "a" (reg) \
+ : "a1", "d1", "d2", "memory"); \
+ res; \
+})
+
+#define fp_copy_ext(dest, src) ({ \
+ *dest = *src; \
+})
+
+#define fp_monadic_check(dest, src) ({ \
+ fp_copy_ext(dest, src); \
+ if (!fp_normalize_ext(dest)) \
+ return dest; \
+})
+
+#define fp_dyadic_check(dest, src) ({ \
+ if (!fp_normalize_ext(dest)) \
+ return dest; \
+ if (!fp_normalize_ext(src)) { \
+ fp_copy_ext(dest, src); \
+ return dest; \
+ } \
+})
+
+extern const struct fp_ext fp_QNaN;
+extern const struct fp_ext fp_Inf;
+
+#define fp_set_nan(dest) ({ \
+ fp_set_sr(FPSR_EXC_OPERR); \
+ *dest = fp_QNaN; \
+})
+
+/* TODO check rounding mode? */
+#define fp_set_ovrflw(dest) ({ \
+ fp_set_sr(FPSR_EXC_OVFL); \
+ dest->exp = 0x7fff; \
+ dest->mant.m64 = 0; \
+})
+
+#define fp_conv_ext2long(src) ({ \
+ register struct fp_ext *__src asm ("a0") = src; \
+ register int __res asm ("d0"); \
+ \
+ asm volatile ("jsr fp_conv_ext2long" \
+ : "=d" (__res) : "a" (__src) \
+ : "a1", "d1", "d2", "memory"); \
+ __res; \
+})
+
+#define fp_conv_long2ext(dest, src) ({ \
+ register struct fp_ext *__dest asm ("a0") = dest; \
+ register int __src asm ("d0") = src; \
+ \
+ asm volatile ("jsr fp_conv_ext2long" \
+ : : "d" (__src), "a" (__dest) \
+ : "a1", "d1", "d2", "memory"); \
+})
+
+#else /* __ASSEMBLY__ */
+
+/*
+ * set, reset or clear a bit in the fp status register
+ */
+.macro fp_set_sr bit
+ bset #(\bit&7),(FPD_FPSR+3-(\bit/8),FPDATA)
+.endm
+
+.macro fp_clr_sr bit
+ bclr #(\bit&7),(FPD_FPSR+3-(\bit/8),FPDATA)
+.endm
+
+.macro fp_tst_sr bit
+ btst #(\bit&7),(FPD_FPSR+3-(\bit/8),FPDATA)
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _FP_EMU_H */
diff --git a/arch/m68k/math-emu/fp_entry.S b/arch/m68k/math-emu/fp_entry.S
new file mode 100644
index 000000000000..5ec2d9101ea3
--- /dev/null
+++ b/arch/m68k/math-emu/fp_entry.S
@@ -0,0 +1,325 @@
+/*
+ * fp_emu.S
+ *
+ * Copyright Roman Zippel, 1997. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * ALTERNATIVELY, this product may be distributed under the terms of
+ * the GNU General Public License, in which case the provisions of the GPL are
+ * required INSTEAD OF the above restrictions. (This clause is
+ * necessary due to a potential bad interaction between the GPL and
+ * the restrictions contained in a BSD-style copyright.)
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/entry.h>
+
+#include "fp_emu.h"
+
+ .globl fpu_emu
+ .globl fp_debugprint
+ .globl fp_err_ua1,fp_err_ua2
+
+ .text
+fpu_emu:
+ SAVE_ALL_INT
+ GET_CURRENT(%d0)
+
+#if defined(CPU_M68020_OR_M68030) && defined(CPU_M68040_OR_M68060)
+ tst.l m68k_is040or060
+ jeq 1f
+#endif
+#if defined(CPU_M68040_OR_M68060)
+ move.l (FPS_PC2,%sp),(FPS_PC,%sp)
+#endif
+1:
+ | emulate the instruction
+ jsr fp_scan
+
+#if defined(CONFIG_M68060)
+#if !defined(CPU_M68060_ONLY)
+ btst #3,m68k_cputype+3
+ jeq 1f
+#endif
+ btst #7,(FPS_SR,%sp)
+ jne fp_sendtrace060
+#endif
+1:
+ | emulation successful?
+ tst.l %d0
+ jeq ret_from_exception
+
+ | send some signal to program here
+
+ jra ret_from_exception
+
+ | we jump here after an access error while trying to access
+ | user space, we correct stackpointer and send a SIGSEGV to
+ | the user process
+fp_err_ua2:
+ addq.l #4,%sp
+fp_err_ua1:
+ addq.l #4,%sp
+ move.l %a0,-(%sp)
+ pea SEGV_MAPERR
+ pea SIGSEGV
+ jsr fpemu_signal
+ add.w #12,%sp
+ jra ret_from_exception
+
+#if defined(CONFIG_M68060)
+ | send a trace signal if we are debugged
+ | it does not really belong here, but...
+fp_sendtrace060:
+ move.l (FPS_PC,%sp),-(%sp)
+ pea TRAP_TRACE
+ pea SIGTRAP
+ jsr fpemu_signal
+ add.w #12,%sp
+ jra ret_from_exception
+#endif
+
+ .globl fp_get_data_reg, fp_put_data_reg
+ .globl fp_get_addr_reg, fp_put_addr_reg
+
+ | Entry points to get/put a register. Some of them can be get/put
+ | directly, others are on the stack, as we read/write the stack
+ | directly here, these function may only be called from within
+ | instruction decoding, otherwise the stack pointer is incorrect
+ | and the stack gets corrupted.
+fp_get_data_reg:
+ jmp ([0f:w,%pc,%d0.w*4])
+
+ .align 4
+0:
+ .long fp_get_d0, fp_get_d1
+ .long fp_get_d2, fp_get_d3
+ .long fp_get_d4, fp_get_d5
+ .long fp_get_d6, fp_get_d7
+
+fp_get_d0:
+ move.l (PT_D0+8,%sp),%d0
+ printf PREGISTER,"{d0->%08x}",1,%d0
+ rts
+
+fp_get_d1:
+ move.l (PT_D1+8,%sp),%d0
+ printf PREGISTER,"{d1->%08x}",1,%d0
+ rts
+
+fp_get_d2:
+ move.l (PT_D2+8,%sp),%d0
+ printf PREGISTER,"{d2->%08x}",1,%d0
+ rts
+
+fp_get_d3:
+ move.l %d3,%d0
+ printf PREGISTER,"{d3->%08x}",1,%d0
+ rts
+
+fp_get_d4:
+ move.l %d4,%d0
+ printf PREGISTER,"{d4->%08x}",1,%d0
+ rts
+
+fp_get_d5:
+ move.l %d5,%d0
+ printf PREGISTER,"{d5->%08x}",1,%d0
+ rts
+
+fp_get_d6:
+ move.l %d6,%d0
+ printf PREGISTER,"{d6->%08x}",1,%d0
+ rts
+
+fp_get_d7:
+ move.l %d7,%d0
+ printf PREGISTER,"{d7->%08x}",1,%d0
+ rts
+
+fp_put_data_reg:
+ jmp ([0f:w,%pc,%d1.w*4])
+
+ .align 4
+0:
+ .long fp_put_d0, fp_put_d1
+ .long fp_put_d2, fp_put_d3
+ .long fp_put_d4, fp_put_d5
+ .long fp_put_d6, fp_put_d7
+
+fp_put_d0:
+ printf PREGISTER,"{d0<-%08x}",1,%d0
+ move.l %d0,(PT_D0+8,%sp)
+ rts
+
+fp_put_d1:
+ printf PREGISTER,"{d1<-%08x}",1,%d0
+ move.l %d0,(PT_D1+8,%sp)
+ rts
+
+fp_put_d2:
+ printf PREGISTER,"{d2<-%08x}",1,%d0
+ move.l %d0,(PT_D2+8,%sp)
+ rts
+
+fp_put_d3:
+ printf PREGISTER,"{d3<-%08x}",1,%d0
+| move.l %d0,%d3
+ move.l %d0,(PT_D3+8,%sp)
+ rts
+
+fp_put_d4:
+ printf PREGISTER,"{d4<-%08x}",1,%d0
+| move.l %d0,%d4
+ move.l %d0,(PT_D4+8,%sp)
+ rts
+
+fp_put_d5:
+ printf PREGISTER,"{d5<-%08x}",1,%d0
+| move.l %d0,%d5
+ move.l %d0,(PT_D5+8,%sp)
+ rts
+
+fp_put_d6:
+ printf PREGISTER,"{d6<-%08x}",1,%d0
+ move.l %d0,%d6
+ rts
+
+fp_put_d7:
+ printf PREGISTER,"{d7<-%08x}",1,%d0
+ move.l %d0,%d7
+ rts
+
+fp_get_addr_reg:
+ jmp ([0f:w,%pc,%d0.w*4])
+
+ .align 4
+0:
+ .long fp_get_a0, fp_get_a1
+ .long fp_get_a2, fp_get_a3
+ .long fp_get_a4, fp_get_a5
+ .long fp_get_a6, fp_get_a7
+
+fp_get_a0:
+ move.l (PT_A0+8,%sp),%a0
+ printf PREGISTER,"{a0->%08x}",1,%a0
+ rts
+
+fp_get_a1:
+ move.l (PT_A1+8,%sp),%a0
+ printf PREGISTER,"{a1->%08x}",1,%a0
+ rts
+
+fp_get_a2:
+ move.l (PT_A2+8,%sp),%a0
+ printf PREGISTER,"{a2->%08x}",1,%a0
+ rts
+
+fp_get_a3:
+ move.l %a3,%a0
+ printf PREGISTER,"{a3->%08x}",1,%a0
+ rts
+
+fp_get_a4:
+ move.l %a4,%a0
+ printf PREGISTER,"{a4->%08x}",1,%a0
+ rts
+
+fp_get_a5:
+ move.l %a5,%a0
+ printf PREGISTER,"{a5->%08x}",1,%a0
+ rts
+
+fp_get_a6:
+ move.l %a6,%a0
+ printf PREGISTER,"{a6->%08x}",1,%a0
+ rts
+
+fp_get_a7:
+ move.l %usp,%a0
+ printf PREGISTER,"{a7->%08x}",1,%a0
+ rts
+
+fp_put_addr_reg:
+ jmp ([0f:w,%pc,%d0.w*4])
+
+ .align 4
+0:
+ .long fp_put_a0, fp_put_a1
+ .long fp_put_a2, fp_put_a3
+ .long fp_put_a4, fp_put_a5
+ .long fp_put_a6, fp_put_a7
+
+fp_put_a0:
+ printf PREGISTER,"{a0<-%08x}",1,%a0
+ move.l %a0,(PT_A0+8,%sp)
+ rts
+
+fp_put_a1:
+ printf PREGISTER,"{a1<-%08x}",1,%a0
+ move.l %a0,(PT_A1+8,%sp)
+ rts
+
+fp_put_a2:
+ printf PREGISTER,"{a2<-%08x}",1,%a0
+ move.l %a0,(PT_A2+8,%sp)
+ rts
+
+fp_put_a3:
+ printf PREGISTER,"{a3<-%08x}",1,%a0
+ move.l %a0,%a3
+ rts
+
+fp_put_a4:
+ printf PREGISTER,"{a4<-%08x}",1,%a0
+ move.l %a0,%a4
+ rts
+
+fp_put_a5:
+ printf PREGISTER,"{a5<-%08x}",1,%a0
+ move.l %a0,%a5
+ rts
+
+fp_put_a6:
+ printf PREGISTER,"{a6<-%08x}",1,%a0
+ move.l %a0,%a6
+ rts
+
+fp_put_a7:
+ printf PREGISTER,"{a7<-%08x}",1,%a0
+ move.l %a0,%usp
+ rts
+
+ .data
+ .align 4
+
+fp_debugprint:
+| .long PMDECODE
+ .long PMINSTR+PMDECODE+PMCONV+PMNORM
+| .long PMCONV+PMNORM+PMINSTR
+| .long 0
diff --git a/arch/m68k/math-emu/fp_log.c b/arch/m68k/math-emu/fp_log.c
new file mode 100644
index 000000000000..87b4f0158560
--- /dev/null
+++ b/arch/m68k/math-emu/fp_log.c
@@ -0,0 +1,223 @@
+/*
+
+ fp_trig.c: floating-point math routines for the Linux-m68k
+ floating point emulator.
+
+ Copyright (c) 1998-1999 David Huggins-Daines / Roman Zippel.
+
+ I hereby give permission, free of charge, to copy, modify, and
+ redistribute this software, in source or binary form, provided that
+ the above copyright notice and the following disclaimer are included
+ in all such copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS", WITH ABSOLUTELY NO WARRANTY, REAL
+ OR IMPLIED.
+
+*/
+
+#include "fp_emu.h"
+
+static const struct fp_ext fp_one =
+{
+ .exp = 0x3fff,
+};
+
+extern struct fp_ext *fp_fadd(struct fp_ext *dest, const struct fp_ext *src);
+extern struct fp_ext *fp_fdiv(struct fp_ext *dest, const struct fp_ext *src);
+extern struct fp_ext *fp_fmul(struct fp_ext *dest, const struct fp_ext *src);
+
+struct fp_ext *
+fp_fsqrt(struct fp_ext *dest, struct fp_ext *src)
+{
+ struct fp_ext tmp, src2;
+ int i, exp;
+
+ dprint(PINSTR, "fsqrt\n");
+
+ fp_monadic_check(dest, src);
+
+ if (IS_ZERO(dest))
+ return dest;
+
+ if (dest->sign) {
+ fp_set_nan(dest);
+ return dest;
+ }
+ if (IS_INF(dest))
+ return dest;
+
+ /*
+ * sqrt(m) * 2^(p) , if e = 2*p
+ * sqrt(m*2^e) =
+ * sqrt(2*m) * 2^(p) , if e = 2*p + 1
+ *
+ * So we use the last bit of the exponent to decide wether to
+ * use the m or 2*m.
+ *
+ * Since only the fractional part of the mantissa is stored and
+ * the integer part is assumed to be one, we place a 1 or 2 into
+ * the fixed point representation.
+ */
+ exp = dest->exp;
+ dest->exp = 0x3FFF;
+ if (!(exp & 1)) /* lowest bit of exponent is set */
+ dest->exp++;
+ fp_copy_ext(&src2, dest);
+
+ /*
+ * The taylor row arround a for sqrt(x) is:
+ * sqrt(x) = sqrt(a) + 1/(2*sqrt(a))*(x-a) + R
+ * With a=1 this gives:
+ * sqrt(x) = 1 + 1/2*(x-1)
+ * = 1/2*(1+x)
+ */
+ fp_fadd(dest, &fp_one);
+ dest->exp--; /* * 1/2 */
+
+ /*
+ * We now apply the newton rule to the function
+ * f(x) := x^2 - r
+ * which has a null point on x = sqrt(r).
+ *
+ * It gives:
+ * x' := x - f(x)/f'(x)
+ * = x - (x^2 -r)/(2*x)
+ * = x - (x - r/x)/2
+ * = (2*x - x + r/x)/2
+ * = (x + r/x)/2
+ */
+ for (i = 0; i < 9; i++) {
+ fp_copy_ext(&tmp, &src2);
+
+ fp_fdiv(&tmp, dest);
+ fp_fadd(dest, &tmp);
+ dest->exp--;
+ }
+
+ dest->exp += (exp - 0x3FFF) / 2;
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fetoxm1(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("fetoxm1\n");
+
+ fp_monadic_check(dest, src);
+
+ if (IS_ZERO(dest))
+ return dest;
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fetox(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("fetox\n");
+
+ fp_monadic_check(dest, src);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_ftwotox(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("ftwotox\n");
+
+ fp_monadic_check(dest, src);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_ftentox(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("ftentox\n");
+
+ fp_monadic_check(dest, src);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_flogn(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("flogn\n");
+
+ fp_monadic_check(dest, src);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_flognp1(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("flognp1\n");
+
+ fp_monadic_check(dest, src);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_flog10(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("flog10\n");
+
+ fp_monadic_check(dest, src);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_flog2(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("flog2\n");
+
+ fp_monadic_check(dest, src);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fgetexp(struct fp_ext *dest, struct fp_ext *src)
+{
+ dprint(PINSTR, "fgetexp\n");
+
+ fp_monadic_check(dest, src);
+
+ if (IS_INF(dest)) {
+ fp_set_nan(dest);
+ return dest;
+ }
+ if (IS_ZERO(dest))
+ return dest;
+
+ fp_conv_long2ext(dest, (int)dest->exp - 0x3FFF);
+
+ fp_normalize_ext(dest);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fgetman(struct fp_ext *dest, struct fp_ext *src)
+{
+ dprint(PINSTR, "fgetman\n");
+
+ fp_monadic_check(dest, src);
+
+ if (IS_ZERO(dest))
+ return dest;
+
+ if (IS_INF(dest))
+ return dest;
+
+ dest->exp = 0x3FFF;
+
+ return dest;
+}
+
diff --git a/arch/m68k/math-emu/fp_move.S b/arch/m68k/math-emu/fp_move.S
new file mode 100644
index 000000000000..71bdf83ba61a
--- /dev/null
+++ b/arch/m68k/math-emu/fp_move.S
@@ -0,0 +1,244 @@
+/*
+ * fp_move.S
+ *
+ * Copyright Roman Zippel, 1997. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * ALTERNATIVELY, this product may be distributed under the terms of
+ * the GNU General Public License, in which case the provisions of the GPL are
+ * required INSTEAD OF the above restrictions. (This clause is
+ * necessary due to a potential bad interaction between the GPL and
+ * the restrictions contained in a BSD-style copyright.)
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "fp_emu.h"
+#include "fp_decode.h"
+
+do_no_pc_mode=1
+
+ .globl fp_fmove_fp2mem
+
+fp_fmove_fp2mem:
+ clr.b (2+FPD_FPSR,FPDATA)
+ fp_decode_dest_format
+ move.w %d0,%d1 | store data size twice in %d1
+ swap %d1 | one can be trashed below
+ move.w %d0,%d1
+#ifdef FPU_EMU_DEBUG
+ lea 0f,%a0
+ clr.l %d0
+ move.b (%a0,%d1.w),%d0
+ printf PDECODE,"fmove.%c ",1,%d0
+ fp_decode_src_reg
+ printf PDECODE,"fp%d,",1,%d0
+
+ .data
+0: .byte 'l','s','x','p','w','d','b','p'
+ .previous
+#endif
+
+ | encode addressing mode for dest
+ fp_decode_addr_mode
+
+ .long fp_data, fp_ill
+ .long fp_indirect, fp_postinc
+ .long fp_predecr, fp_disp16
+ .long fp_extmode0, fp_extmode1
+
+ | addressing mode: data register direct
+fp_data:
+ fp_mode_data_direct
+ move.w %d0,%d1
+ fp_decode_src_reg
+ fp_get_fp_reg
+ lea (FPD_TEMPFP1,FPDATA),%a1
+ move.l (%a0)+,(%a1)+
+ move.l (%a0)+,(%a1)+
+ move.l (%a0),(%a1)
+ lea (-8,%a1),%a0
+ swap %d1
+ move.l %d1,%d2
+ printf PDECODE,"\n"
+ jmp ([0f:w,%pc,%d1.w*4])
+
+ .align 4
+0:
+ .long fp_data_long, fp_data_single
+ .long fp_ill, fp_ill
+ .long fp_data_word, fp_ill
+ .long fp_data_byte, fp_ill
+
+fp_data_byte:
+ jsr fp_normalize_ext
+ jsr fp_conv_ext2byte
+ move.l %d0,%d1
+ swap %d2
+ move.w %d2,%d0
+ jsr fp_get_data_reg
+ move.b %d1,%d0
+ move.w %d2,%d1
+ jsr fp_put_data_reg
+ jra fp_final
+
+fp_data_word:
+ jsr fp_normalize_ext
+ jsr fp_conv_ext2short
+ move.l %d0,%d1
+ swap %d2
+ move.w %d2,%d0
+ jsr fp_get_data_reg
+ move.w %d1,%d0
+ move.l %d2,%d1
+ jsr fp_put_data_reg
+ jra fp_final
+
+fp_data_long:
+ jsr fp_normalize_ext
+ jsr fp_conv_ext2long
+ swap %d2
+ move.w %d2,%d1
+ jsr fp_put_data_reg
+ jra fp_final
+
+fp_data_single:
+ jsr fp_normalize_ext
+ jsr fp_conv_ext2single
+ swap %d2
+ move.w %d2,%d1
+ jsr fp_put_data_reg
+ jra fp_final
+
+ | addressing mode: address register indirect
+fp_indirect:
+ fp_mode_addr_indirect
+ jra fp_putdest
+
+ | addressing mode: address register indirect with postincrement
+fp_postinc:
+ fp_mode_addr_indirect_postinc
+ jra fp_putdest
+
+ | addressing mode: address register indirect with predecrement
+fp_predecr:
+ fp_mode_addr_indirect_predec
+ jra fp_putdest
+
+ | addressing mode: address register indirect with 16bit displacement
+fp_disp16:
+ fp_mode_addr_indirect_disp16
+ jra fp_putdest
+
+fp_extmode0:
+ fp_mode_addr_indirect_extmode0
+ jra fp_putdest
+
+fp_extmode1:
+ fp_decode_addr_reg
+ jmp ([0f:w,%pc,%d0*4])
+
+ .align 4
+0:
+ .long fp_abs_short, fp_abs_long
+ .long fp_ill, fp_ill
+ .long fp_ill, fp_ill
+ .long fp_ill, fp_ill
+
+fp_abs_short:
+ fp_mode_abs_short
+ jra fp_putdest
+
+fp_abs_long:
+ fp_mode_abs_long
+ jra fp_putdest
+
+fp_putdest:
+ move.l %a0,%a1
+ fp_decode_src_reg
+ move.l %d1,%d2 | save size
+ fp_get_fp_reg
+ printf PDECODE,"\n"
+ addq.l #8,%a0
+ move.l (%a0),-(%sp)
+ move.l -(%a0),-(%sp)
+ move.l -(%a0),-(%sp)
+ move.l %sp,%a0
+ jsr fp_normalize_ext
+
+ swap %d2
+ jmp ([0f:w,%pc,%d2.w*4])
+
+ .align 4
+0:
+ .long fp_format_long, fp_format_single
+ .long fp_format_extended, fp_format_packed
+ .long fp_format_word, fp_format_double
+ .long fp_format_byte, fp_format_packed
+
+fp_format_long:
+ jsr fp_conv_ext2long
+ putuser.l %d0,(%a1),fp_err_ua1,%a1
+ jra fp_finish_move
+
+fp_format_single:
+ jsr fp_conv_ext2single
+ putuser.l %d0,(%a1),fp_err_ua1,%a1
+ jra fp_finish_move
+
+fp_format_extended:
+ move.l (%a0)+,%d0
+ lsl.w #1,%d0
+ lsl.l #7,%d0
+ lsl.l #8,%d0
+ putuser.l %d0,(%a1)+,fp_err_ua1,%a1
+ move.l (%a0)+,%d0
+ putuser.l %d0,(%a1)+,fp_err_ua1,%a1
+ move.l (%a0),%d0
+ putuser.l %d0,(%a1),fp_err_ua1,%a1
+ jra fp_finish_move
+
+fp_format_packed:
+ /* not supported yet */
+ lea (12,%sp),%sp
+ jra fp_ill
+
+fp_format_word:
+ jsr fp_conv_ext2short
+ putuser.w %d0,(%a1),fp_err_ua1,%a1
+ jra fp_finish_move
+
+fp_format_double:
+ jsr fp_conv_ext2double
+ jra fp_finish_move
+
+fp_format_byte:
+ jsr fp_conv_ext2byte
+ putuser.b %d0,(%a1),fp_err_ua1,%a1
+| jra fp_finish_move
+
+fp_finish_move:
+ lea (12,%sp),%sp
+ jra fp_final
diff --git a/arch/m68k/math-emu/fp_movem.S b/arch/m68k/math-emu/fp_movem.S
new file mode 100644
index 000000000000..8354d39e6c47
--- /dev/null
+++ b/arch/m68k/math-emu/fp_movem.S
@@ -0,0 +1,368 @@
+/*
+ * fp_movem.S
+ *
+ * Copyright Roman Zippel, 1997. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * ALTERNATIVELY, this product may be distributed under the terms of
+ * the GNU General Public License, in which case the provisions of the GPL are
+ * required INSTEAD OF the above restrictions. (This clause is
+ * necessary due to a potential bad interaction between the GPL and
+ * the restrictions contained in a BSD-style copyright.)
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "fp_emu.h"
+#include "fp_decode.h"
+
+| set flags for decode macros for fmovem
+do_fmovem=1
+
+ .globl fp_fmovem_fp, fp_fmovem_cr
+
+| %d1 contains the mask and count of the register list
+| for other register usage see fp_decode.h
+
+fp_fmovem_fp:
+ printf PDECODE,"fmovem.x "
+ | get register list and count them
+ btst #11,%d2
+ jne 1f
+ bfextu %d2{#24,#8},%d0 | static register list
+ jra 2f
+1: bfextu %d2{#25,#3},%d0 | dynamic register list
+ jsr fp_get_data_reg
+2: move.l %d0,%d1
+ swap %d1
+ jra 2f
+1: addq.w #1,%d1 | count the # of registers in
+2: lsr.b #1,%d0 | register list and keep it in %d1
+ jcs 1b
+ jne 2b
+ printf PDECODE,"#%08x",1,%d1
+#ifdef FPU_EMU_DEBUG
+ btst #12,%d2
+ jne 1f
+ printf PDECODE,"-" | decremental move
+ jra 2f
+1: printf PDECODE,"+" | incremental move
+2: btst #13,%d2
+ jeq 1f
+ printf PDECODE,"->" | fpu -> cpu
+ jra 2f
+1: printf PDECODE,"<-" | fpu <- cpu
+2:
+#endif
+
+ | decode address mode
+ fp_decode_addr_mode
+
+ .long fp_ill, fp_ill
+ .long fpr_indirect, fpr_postinc
+ .long fpr_predecr, fpr_disp16
+ .long fpr_extmode0, fpr_extmode1
+
+ | addressing mode: address register indirect
+fpr_indirect:
+ fp_mode_addr_indirect
+ jra fpr_do_movem
+
+ | addressing mode: address register indirect with postincrement
+fpr_postinc:
+ fp_mode_addr_indirect_postinc
+ jra fpr_do_movem
+
+fpr_predecr:
+ fp_mode_addr_indirect_predec
+ jra fpr_do_movem
+
+ | addressing mode: address register/programm counter indirect
+ | with 16bit displacement
+fpr_disp16:
+ fp_mode_addr_indirect_disp16
+ jra fpr_do_movem
+
+fpr_extmode0:
+ fp_mode_addr_indirect_extmode0
+ jra fpr_do_movem
+
+fpr_extmode1:
+ fp_decode_addr_reg
+ jmp ([0f:w,%pc,%d0*4])
+
+ .align 4
+0:
+ .long fpr_absolute_short, fpr_absolute_long
+ .long fpr_disp16, fpr_extmode0
+ .long fp_ill, fp_ill
+ .long fp_ill, fp_ill
+
+fpr_absolute_short:
+ fp_mode_abs_short
+ jra fpr_do_movem
+
+fpr_absolute_long:
+ fp_mode_abs_long
+| jra fpr_do_movem
+
+fpr_do_movem:
+ swap %d1 | get fpu register list
+ lea (FPD_FPREG,FPDATA),%a1
+ moveq #12,%d0
+ btst #12,%d2
+ jne 1f
+ lea (-12,%a1,%d0*8),%a1
+ neg.l %d0
+1: btst #13,%d2
+ jne 4f
+ | move register from memory into fpu
+ jra 3f
+1: printf PMOVEM,"(%p>%p)",2,%a0,%a1
+ getuser.l (%a0)+,%d2,fp_err_ua1,%a0
+ lsr.l #8,%d2
+ lsr.l #7,%d2
+ lsr.w #1,%d2
+ move.l %d2,(%a1)+
+ getuser.l (%a0)+,%d2,fp_err_ua1,%a0
+ move.l %d2,(%a1)+
+ getuser.l (%a0),%d2,fp_err_ua1,%a0
+ move.l %d2,(%a1)
+ subq.l #8,%a0
+ subq.l #8,%a1
+ add.l %d0,%a0
+2: add.l %d0,%a1
+3: lsl.b #1,%d1
+ jcs 1b
+ jne 2b
+ jra 5f
+ | move register from fpu into memory
+1: printf PMOVEM,"(%p>%p)",2,%a1,%a0
+ move.l (%a1)+,%d2
+ lsl.w #1,%d2
+ lsl.l #7,%d2
+ lsl.l #8,%d2
+ putuser.l %d2,(%a0)+,fp_err_ua1,%a0
+ move.l (%a1)+,%d2
+ putuser.l %d2,(%a0)+,fp_err_ua1,%a0
+ move.l (%a1),%d2
+ putuser.l %d2,(%a0),fp_err_ua1,%a0
+ subq.l #8,%a1
+ subq.l #8,%a0
+ add.l %d0,%a0
+2: add.l %d0,%a1
+4: lsl.b #1,%d1
+ jcs 1b
+ jne 2b
+5:
+ printf PDECODE,"\n"
+#if 0
+ lea (FPD_FPREG,FPDATA),%a0
+ printf PMOVEM,"fp:"
+ printx PMOVEM,%a0@(0)
+ printx PMOVEM,%a0@(12)
+ printf PMOVEM,"\n "
+ printx PMOVEM,%a0@(24)
+ printx PMOVEM,%a0@(36)
+ printf PMOVEM,"\n "
+ printx PMOVEM,%a0@(48)
+ printx PMOVEM,%a0@(60)
+ printf PMOVEM,"\n "
+ printx PMOVEM,%a0@(72)
+ printx PMOVEM,%a0@(84)
+ printf PMOVEM,"\n"
+#endif
+ jra fp_end
+
+| set flags for decode macros for fmovem control register
+do_fmovem=1
+do_fmovem_cr=1
+
+fp_fmovem_cr:
+ printf PDECODE,"fmovem.cr "
+ | get register list and count them
+ bfextu %d2{#19,#3},%d0
+ move.l %d0,%d1
+ swap %d1
+ jra 2f
+1: addq.w #1,%d1
+2: lsr.l #1,%d0
+ jcs 1b
+ jne 2b
+ printf PDECODE,"#%08x",1,%d1
+#ifdef FPU_EMU_DEBUG
+ btst #13,%d2
+ jeq 1f
+ printf PDECODE,"->" | fpu -> cpu
+ jra 2f
+1: printf PDECODE,"<-" | fpu <- cpu
+2:
+#endif
+
+ | decode address mode
+ fp_decode_addr_mode
+
+ .long fpc_data, fpc_addr
+ .long fpc_indirect, fpc_postinc
+ .long fpc_predecr, fpc_disp16
+ .long fpc_extmode0, fpc_extmode1
+
+fpc_data:
+ fp_mode_data_direct
+ move.w %d0,%d1
+ bfffo %d2{#19,#3},%d0
+ sub.w #19,%d0
+ lea (FPD_FPCR,FPDATA,%d0.w*4),%a1
+ btst #13,%d2
+ jne 1f
+ move.w %d1,%d0
+ jsr fp_get_data_reg
+ move.l %d0,(%a1)
+ jra fpc_movem_fin
+1: move.l (%a1),%d0
+ jsr fp_put_data_reg
+ jra fpc_movem_fin
+
+fpc_addr:
+ fp_decode_addr_reg
+ printf PDECODE,"a%d",1,%d0
+ btst #13,%d2
+ jne 1f
+ jsr fp_get_addr_reg
+ move.l %a0,(FPD_FPIAR,FPDATA)
+ jra fpc_movem_fin
+1: move.l (FPD_FPIAR,FPDATA),%a0
+ jsr fp_put_addr_reg
+ jra fpc_movem_fin
+
+fpc_indirect:
+ fp_mode_addr_indirect
+ jra fpc_do_movem
+
+fpc_postinc:
+ fp_mode_addr_indirect_postinc
+ jra fpc_do_movem
+
+fpc_predecr:
+ fp_mode_addr_indirect_predec
+ jra fpc_do_movem
+
+fpc_disp16:
+ fp_mode_addr_indirect_disp16
+ jra fpc_do_movem
+
+fpc_extmode0:
+ fp_mode_addr_indirect_extmode0
+ jra fpc_do_movem
+
+fpc_extmode1:
+ fp_decode_addr_reg
+ jmp ([0f:w,%pc,%d0*4])
+
+ .align 4
+0:
+ .long fpc_absolute_short, fpc_absolute_long
+ .long fpc_disp16, fpc_extmode0
+ .long fpc_immediate, fp_ill
+ .long fp_ill, fp_ill
+
+fpc_absolute_short:
+ fp_mode_abs_short
+ jra fpc_do_movem
+
+fpc_absolute_long:
+ fp_mode_abs_long
+ jra fpc_do_movem
+
+fpc_immediate:
+ fp_get_pc %a0
+ lea (%a0,%d1.w*4),%a1
+ fp_put_pc %a1
+ printf PDECODE,"#imm"
+| jra fpc_do_movem
+#if 0
+ swap %d1
+ lsl.l #5,%d1
+ lea (FPD_FPCR,FPDATA),%a0
+ jra 3f
+1: move.l %d0,(%a0)
+2: addq.l #4,%a0
+3: lsl.b #1,%d1
+ jcs 1b
+ jne 2b
+ jra fpc_movem_fin
+#endif
+
+fpc_do_movem:
+ swap %d1 | get fpu register list
+ lsl.l #5,%d1
+ lea (FPD_FPCR,FPDATA),%a1
+1: btst #13,%d2
+ jne 4f
+
+ | move register from memory into fpu
+ jra 3f
+1: printf PMOVEM,"(%p>%p)",2,%a0,%a1
+ getuser.l (%a0)+,%d0,fp_err_ua1,%a0
+ move.l %d0,(%a1)
+2: addq.l #4,%a1
+3: lsl.b #1,%d1
+ jcs 1b
+ jne 2b
+ jra fpc_movem_fin
+
+ | move register from fpu into memory
+1: printf PMOVEM,"(%p>%p)",2,%a1,%a0
+ move.l (%a1),%d0
+ putuser.l %d0,(%a0)+,fp_err_ua1,%a0
+2: addq.l #4,%a1
+4: lsl.b #1,%d1
+ jcs 1b
+ jne 2b
+
+fpc_movem_fin:
+ and.l #0x0000fff0,(FPD_FPCR,FPDATA)
+ and.l #0x0ffffff8,(FPD_FPSR,FPDATA)
+ move.l (FPD_FPCR,FPDATA),%d0
+ lsr.l #4,%d0
+ moveq #3,%d1
+ and.l %d0,%d1
+ move.w %d1,(FPD_RND,FPDATA)
+ lsr.l #2,%d0
+ moveq #3,%d1
+ and.l %d0,%d1
+ move.w %d1,(FPD_PREC,FPDATA)
+ printf PDECODE,"\n"
+#if 0
+ printf PMOVEM,"fpcr : %08x\n",1,FPDATA@(FPD_FPCR)
+ printf PMOVEM,"fpsr : %08x\n",1,FPDATA@(FPD_FPSR)
+ printf PMOVEM,"fpiar: %08x\n",1,FPDATA@(FPD_FPIAR)
+ clr.l %d0
+ move.w (FPD_PREC,FPDATA),%d0
+ printf PMOVEM,"prec : %04x\n",1,%d0
+ move.w (FPD_RND,FPDATA),%d0
+ printf PMOVEM,"rnd : %04x\n",1,%d0
+#endif
+ jra fp_end
diff --git a/arch/m68k/math-emu/fp_scan.S b/arch/m68k/math-emu/fp_scan.S
new file mode 100644
index 000000000000..e4146ed574db
--- /dev/null
+++ b/arch/m68k/math-emu/fp_scan.S
@@ -0,0 +1,478 @@
+/*
+ * fp_scan.S
+ *
+ * Copyright Roman Zippel, 1997. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * ALTERNATIVELY, this product may be distributed under the terms of
+ * the GNU General Public License, in which case the provisions of the GPL are
+ * required INSTEAD OF the above restrictions. (This clause is
+ * necessary due to a potential bad interaction between the GPL and
+ * the restrictions contained in a BSD-style copyright.)
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "fp_emu.h"
+#include "fp_decode.h"
+
+ .globl fp_scan, fp_datasize
+
+ .data
+
+| %d2 - first two instr words
+| %d1 - operand size
+
+/* operand formats are:
+
+ Long = 0, i.e. fmove.l
+ Single, i.e. fmove.s
+ Extended, i.e. fmove.x
+ Packed-BCD, i.e. fmove.p
+ Word, i.e. fmove.w
+ Double, i.e. fmove.d
+*/
+
+ .text
+
+| On entry:
+| FPDATA - base of emulated FPU registers
+
+fp_scan:
+| normal fpu instruction? (this excludes fsave/frestore)
+ fp_get_pc %a0
+ printf PDECODE,"%08x: ",1,%a0
+ getuser.b (%a0),%d0,fp_err_ua1,%a0
+#if 1
+ cmp.b #0xf2,%d0 | cpid = 1
+#else
+ cmp.b #0xfc,%d0 | cpid = 6
+#endif
+ jne fp_nonstd
+| first two instruction words are kept in %d2
+ getuser.l (%a0)+,%d2,fp_err_ua1,%a0
+ fp_put_pc %a0
+fp_decode_cond: | separate conditional instr
+ fp_decode_cond_instr_type
+
+ .long fp_decode_move, fp_fscc
+ .long fp_fbccw, fp_fbccl
+
+fp_decode_move: | separate move instr
+ fp_decode_move_instr_type
+
+ .long fp_fgen_fp, fp_ill
+ .long fp_fgen_ea, fp_fmove_fp2mem
+ .long fp_fmovem_cr, fp_fmovem_cr
+ .long fp_fmovem_fp, fp_fmovem_fp
+
+| now all arithmetic instr and a few move instr are left
+fp_fgen_fp: | source is a fpu register
+ clr.b (FPD_FPSR+2,FPDATA) | clear the exception byte
+ fp_decode_sourcespec
+ printf PDECODE,"f<op>.x fp%d",1,%d0
+ fp_get_fp_reg
+ lea (FPD_TEMPFP1,FPDATA),%a1 | copy src into a temp location
+ move.l (%a0)+,(%a1)+
+ move.l (%a0)+,(%a1)+
+ move.l (%a0),(%a1)
+ lea (-8,%a1),%a0
+ jra fp_getdest
+
+fp_fgen_ea: | source is <ea>
+ clr.b (FPD_FPSR+2,FPDATA) | clear the exception byte
+ | sort out fmovecr, keep data size in %d1
+ fp_decode_sourcespec
+ cmp.w #7,%d0
+ jeq fp_fmovecr
+ move.w %d0,%d1 | store data size twice in %d1
+ swap %d1 | one can be trashed below
+ move.w %d0,%d1
+#ifdef FPU_EMU_DEBUG
+ lea 0f,%a0
+ clr.l %d0
+ move.b (%a0,%d1.w),%d0
+ printf PDECODE,"f<op>.%c ",1,%d0
+
+ .data
+0: .byte 'l','s','x','p','w','d','b',0
+ .previous
+#endif
+
+/*
+ fp_getsource, fp_getdest
+
+ basically, we end up with a pointer to the source operand in
+ %a1, and a pointer to the destination operand in %a0. both
+ are, of course, 96-bit extended floating point numbers.
+*/
+
+fp_getsource:
+ | decode addressing mode for source
+ fp_decode_addr_mode
+
+ .long fp_data, fp_ill
+ .long fp_indirect, fp_postinc
+ .long fp_predecr, fp_disp16
+ .long fp_extmode0, fp_extmode1
+
+ | addressing mode: data register direct
+fp_data:
+ fp_mode_data_direct
+ jsr fp_get_data_reg
+ lea (FPD_TEMPFP1,FPDATA),%a0
+ jmp ([0f:w,%pc,%d1.w*4])
+
+ .align 4
+0:
+ .long fp_data_long, fp_data_single
+ .long fp_ill, fp_ill
+ .long fp_data_word, fp_ill
+ .long fp_data_byte, fp_ill
+
+ | data types that fit in an integer data register
+fp_data_byte:
+ extb.l %d0
+ jra fp_data_long
+
+fp_data_word:
+ ext.l %d0
+
+fp_data_long:
+ jsr fp_conv_long2ext
+ jra fp_getdest
+
+fp_data_single:
+ jsr fp_conv_single2ext
+ jra fp_getdest
+
+ | addressing mode: address register indirect
+fp_indirect:
+ fp_mode_addr_indirect
+ jra fp_fetchsource
+
+ | addressing mode: address register indirect with postincrement
+fp_postinc:
+ fp_mode_addr_indirect_postinc
+ jra fp_fetchsource
+
+ | addressing mode: address register indirect with predecrement
+fp_predecr:
+ fp_mode_addr_indirect_predec
+ jra fp_fetchsource
+
+ | addressing mode: address register/programm counter indirect
+ | with 16bit displacement
+fp_disp16:
+ fp_mode_addr_indirect_disp16
+ jra fp_fetchsource
+
+ | all other indirect addressing modes will finally end up here
+fp_extmode0:
+ fp_mode_addr_indirect_extmode0
+ jra fp_fetchsource
+
+| all pc relative addressing modes and immediate/absolute modes end up here
+| the first ones are sent to fp_extmode0 or fp_disp16
+| and only the latter are handled here
+fp_extmode1:
+ fp_decode_addr_reg
+ jmp ([0f:w,%pc,%d0*4])
+
+ .align 4
+0:
+ .long fp_abs_short, fp_abs_long
+ .long fp_disp16, fp_extmode0
+ .long fp_immediate, fp_ill
+ .long fp_ill, fp_ill
+
+ | addressing mode: absolute short
+fp_abs_short:
+ fp_mode_abs_short
+ jra fp_fetchsource
+
+ | addressing mode: absolute long
+fp_abs_long:
+ fp_mode_abs_long
+ jra fp_fetchsource
+
+ | addressing mode: immediate data
+fp_immediate:
+ printf PDECODE,"#"
+ fp_get_pc %a0
+ move.w (fp_datasize,%d1.w*2),%d0
+ addq.w #1,%d0
+ and.w #-2,%d0
+#ifdef FPU_EMU_DEBUG
+ movem.l %d0/%d1,-(%sp)
+ movel %a0,%a1
+ clr.l %d1
+ jra 2f
+1: getuser.b (%a1)+,%d1,fp_err_ua1,%a1
+ printf PDECODE,"%02x",1,%d1
+2: dbra %d0,1b
+ movem.l (%sp)+,%d0/%d1
+#endif
+ lea (%a0,%d0.w),%a1
+ fp_put_pc %a1
+| jra fp_fetchsource
+
+fp_fetchsource:
+ move.l %a0,%a1
+ swap %d1
+ lea (FPD_TEMPFP1,FPDATA),%a0
+ jmp ([0f:w,%pc,%d1.w*4])
+
+ .align 4
+0: .long fp_long, fp_single
+ .long fp_ext, fp_pack
+ .long fp_word, fp_double
+ .long fp_byte, fp_ill
+
+fp_long:
+ getuser.l (%a1),%d0,fp_err_ua1,%a1
+ jsr fp_conv_long2ext
+ jra fp_getdest
+
+fp_single:
+ getuser.l (%a1),%d0,fp_err_ua1,%a1
+ jsr fp_conv_single2ext
+ jra fp_getdest
+
+fp_ext:
+ getuser.l (%a1)+,%d0,fp_err_ua1,%a1
+ lsr.l #8,%d0
+ lsr.l #7,%d0
+ lsr.w #1,%d0
+ move.l %d0,(%a0)+
+ getuser.l (%a1)+,%d0,fp_err_ua1,%a1
+ move.l %d0,(%a0)+
+ getuser.l (%a1),%d0,fp_err_ua1,%a1
+ move.l %d0,(%a0)
+ subq.l #8,%a0
+ jra fp_getdest
+
+fp_pack:
+ /* not supported yet */
+ jra fp_ill
+
+fp_word:
+ getuser.w (%a1),%d0,fp_err_ua1,%a1
+ ext.l %d0
+ jsr fp_conv_long2ext
+ jra fp_getdest
+
+fp_double:
+ jsr fp_conv_double2ext
+ jra fp_getdest
+
+fp_byte:
+ getuser.b (%a1),%d0,fp_err_ua1,%a1
+ extb.l %d0
+ jsr fp_conv_long2ext
+| jra fp_getdest
+
+fp_getdest:
+ move.l %a0,%a1
+ bfextu %d2{#22,#3},%d0
+ printf PDECODE,",fp%d\n",1,%d0
+ fp_get_fp_reg
+ movem.l %a0/%a1,-(%sp)
+ pea fp_finalrounding
+ bfextu %d2{#25,#7},%d0
+ jmp ([0f:w,%pc,%d0*4])
+
+ .align 4
+0:
+ .long fp_fmove_mem2fp, fp_fint, fp_fsinh, fp_fintrz
+ .long fp_fsqrt, fp_ill, fp_flognp1, fp_ill
+ .long fp_fetoxm1, fp_ftanh, fp_fatan, fp_ill
+ .long fp_fasin, fp_fatanh, fp_fsin, fp_ftan
+ .long fp_fetox, fp_ftwotox, fp_ftentox, fp_ill
+ .long fp_flogn, fp_flog10, fp_flog2, fp_ill
+ .long fp_fabs, fp_fcosh, fp_fneg, fp_ill
+ .long fp_facos, fp_fcos, fp_fgetexp, fp_fgetman
+ .long fp_fdiv, fp_fmod, fp_fadd, fp_fmul
+ .long fpa_fsgldiv, fp_frem, fp_fscale, fpa_fsglmul
+ .long fp_fsub, fp_ill, fp_ill, fp_ill
+ .long fp_ill, fp_ill, fp_ill, fp_ill
+ .long fp_fsincos0, fp_fsincos1, fp_fsincos2, fp_fsincos3
+ .long fp_fsincos4, fp_fsincos5, fp_fsincos6, fp_fsincos7
+ .long fp_fcmp, fp_ill, fp_ftst, fp_ill
+ .long fp_ill, fp_ill, fp_ill, fp_ill
+ .long fp_fsmove, fp_fssqrt, fp_ill, fp_ill
+ .long fp_fdmove, fp_fdsqrt, fp_ill, fp_ill
+ .long fp_ill, fp_ill, fp_ill, fp_ill
+ .long fp_ill, fp_ill, fp_ill, fp_ill
+ .long fp_ill, fp_ill, fp_ill, fp_ill
+ .long fp_ill, fp_ill, fp_ill, fp_ill
+ .long fp_fsabs, fp_ill, fp_fsneg, fp_ill
+ .long fp_fdabs, fp_ill, fp_fdneg, fp_ill
+ .long fp_fsdiv, fp_ill, fp_fsadd, fp_fsmul
+ .long fp_fddiv, fp_ill, fp_fdadd, fp_fdmul
+ .long fp_fssub, fp_ill, fp_ill, fp_ill
+ .long fp_fdsub, fp_ill, fp_ill, fp_ill
+ .long fp_ill, fp_ill, fp_ill, fp_ill
+ .long fp_ill, fp_ill, fp_ill, fp_ill
+ .long fp_ill, fp_ill, fp_ill, fp_ill
+ .long fp_ill, fp_ill, fp_ill, fp_ill
+
+ | Instructions follow
+
+ | Move an (emulated) ROM constant
+fp_fmovecr:
+ bfextu %d2{#27,#5},%d0
+ printf PINSTR,"fp_fmovecr #%d",1,%d0
+ move.l %d0,%d1
+ add.l %d0,%d0
+ add.l %d1,%d0
+ lea (fp_constants,%d0*4),%a0
+ move.l #0x801cc0ff,%d0
+ addq.l #1,%d1
+ lsl.l %d1,%d0
+ jcc 1f
+ fp_set_sr FPSR_EXC_INEX2 | INEX2 exception
+1: moveq #-128,%d0 | continue with fmove
+ and.l %d0,%d2
+ jra fp_getdest
+
+ .data
+ .align 4
+fp_constants:
+ .long 0x00004000,0xc90fdaa2,0x2168c235 | pi
+ .extend 0,0,0,0,0,0,0,0,0,0
+ .long 0x00003ffd,0x9a209a84,0xfbcff798 | log10(2)
+ .long 0x00004000,0xadf85458,0xa2bb4a9a | e
+ .long 0x00003fff,0xb8aa3b29,0x5c17f0bc | log2(e)
+ .long 0x00003ffd,0xde5bd8a9,0x37287195 | log10(e)
+ .long 0x00000000,0x00000000,0x00000000 | 0.0
+ .long 0x00003ffe,0xb17217f7,0xd1cf79ac | 1n(2)
+ .long 0x00004000,0x935d8ddd,0xaaa8ac17 | 1n(10)
+ | read this as "1.0 * 2^0" - note the high bit in the mantissa
+ .long 0x00003fff,0x80000000,0x00000000 | 10^0
+ .long 0x00004002,0xa0000000,0x00000000 | 10^1
+ .long 0x00004005,0xc8000000,0x00000000 | 10^2
+ .long 0x0000400c,0x9c400000,0x00000000 | 10^4
+ .long 0x00004019,0xbebc2000,0x00000000 | 10^8
+ .long 0x00004034,0x8e1bc9bf,0x04000000 | 10^16
+ .long 0x00004069,0x9dc5ada8,0x2b70b59e | 10^32
+ .long 0x000040d3,0xc2781f49,0xffcfa6d5 | 10^64
+ .long 0x000041a8,0x93ba47c9,0x80e98ce0 | 10^128
+ .long 0x00004351,0xaa7eebfb,0x9df9de8e | 10^256
+ .long 0x000046a3,0xe319a0ae,0xa60e91c7 | 10^512
+ .long 0x00004d48,0xc9767586,0x81750c17 | 10^1024
+ .long 0x00005a92,0x9e8b3b5d,0xc53d5de5 | 10^2048
+ .long 0x00007525,0xc4605202,0x8a20979b | 10^4096
+ .previous
+
+fp_fmove_mem2fp:
+ printf PINSTR,"fmove %p,%p\n",2,%a0,%a1
+ move.l (%a1)+,(%a0)+
+ move.l (%a1)+,(%a0)+
+ move.l (%a1),(%a0)
+ subq.l #8,%a0
+ rts
+
+fpa_fsglmul:
+ move.l #fp_finalrounding_single_fast,(%sp)
+ jra fp_fsglmul
+
+fpa_fsgldiv:
+ move.l #fp_finalrounding_single_fast,(%sp)
+ jra fp_fsgldiv
+
+.macro fp_dosingleprec instr
+ printf PINSTR,"single "
+ move.l #fp_finalrounding_single,(%sp)
+ jra \instr
+.endm
+
+.macro fp_dodoubleprec instr
+ printf PINSTR,"double "
+ move.l #fp_finalrounding_double,(%sp)
+ jra \instr
+.endm
+
+fp_fsmove:
+ fp_dosingleprec fp_fmove_mem2fp
+
+fp_fssqrt:
+ fp_dosingleprec fp_fsqrt
+
+fp_fdmove:
+ fp_dodoubleprec fp_fmove_mem2fp
+
+fp_fdsqrt:
+ fp_dodoubleprec fp_fsqrt
+
+fp_fsabs:
+ fp_dosingleprec fp_fabs
+
+fp_fsneg:
+ fp_dosingleprec fp_fneg
+
+fp_fdabs:
+ fp_dodoubleprec fp_fabs
+
+fp_fdneg:
+ fp_dodoubleprec fp_fneg
+
+fp_fsdiv:
+ fp_dosingleprec fp_fdiv
+
+fp_fsadd:
+ fp_dosingleprec fp_fadd
+
+fp_fsmul:
+ fp_dosingleprec fp_fmul
+
+fp_fddiv:
+ fp_dodoubleprec fp_fdiv
+
+fp_fdadd:
+ fp_dodoubleprec fp_fadd
+
+fp_fdmul:
+ fp_dodoubleprec fp_fmul
+
+fp_fssub:
+ fp_dosingleprec fp_fsub
+
+fp_fdsub:
+ fp_dodoubleprec fp_fsub
+
+fp_nonstd:
+ fp_get_pc %a0
+ getuser.l (%a0),%d0,fp_err_ua1,%a0
+ printf ,"nonstd ((%08x)=%08x)\n",2,%a0,%d0
+ moveq #-1,%d0
+ rts
+
+ .data
+ .align 4
+
+ | data sizes corresponding to the operand formats
+fp_datasize:
+ .word 4, 4, 12, 12, 2, 8, 1, 0
diff --git a/arch/m68k/math-emu/fp_trig.c b/arch/m68k/math-emu/fp_trig.c
new file mode 100644
index 000000000000..6361d0784df2
--- /dev/null
+++ b/arch/m68k/math-emu/fp_trig.c
@@ -0,0 +1,183 @@
+/*
+
+ fp_trig.c: floating-point math routines for the Linux-m68k
+ floating point emulator.
+
+ Copyright (c) 1998-1999 David Huggins-Daines / Roman Zippel.
+
+ I hereby give permission, free of charge, to copy, modify, and
+ redistribute this software, in source or binary form, provided that
+ the above copyright notice and the following disclaimer are included
+ in all such copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS", WITH ABSOLUTELY NO WARRANTY, REAL
+ OR IMPLIED.
+
+*/
+
+#include "fp_emu.h"
+#include "fp_trig.h"
+
+struct fp_ext *
+fp_fsin(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("fsin\n");
+
+ fp_monadic_check(dest, src);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fcos(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("fcos\n");
+
+ fp_monadic_check(dest, src);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_ftan(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("ftan\n");
+
+ fp_monadic_check(dest, src);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fasin(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("fasin\n");
+
+ fp_monadic_check(dest, src);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_facos(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("facos\n");
+
+ fp_monadic_check(dest, src);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fatan(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("fatan\n");
+
+ fp_monadic_check(dest, src);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fsinh(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("fsinh\n");
+
+ fp_monadic_check(dest, src);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fcosh(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("fcosh\n");
+
+ fp_monadic_check(dest, src);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_ftanh(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("ftanh\n");
+
+ fp_monadic_check(dest, src);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fatanh(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("fatanh\n");
+
+ fp_monadic_check(dest, src);
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fsincos0(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("fsincos0\n");
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fsincos1(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("fsincos1\n");
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fsincos2(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("fsincos2\n");
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fsincos3(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("fsincos3\n");
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fsincos4(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("fsincos4\n");
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fsincos5(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("fsincos5\n");
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fsincos6(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("fsincos6\n");
+
+ return dest;
+}
+
+struct fp_ext *
+fp_fsincos7(struct fp_ext *dest, struct fp_ext *src)
+{
+ uprint("fsincos7\n");
+
+ return dest;
+}
diff --git a/arch/m68k/math-emu/fp_trig.h b/arch/m68k/math-emu/fp_trig.h
new file mode 100644
index 000000000000..af8b247e9c98
--- /dev/null
+++ b/arch/m68k/math-emu/fp_trig.h
@@ -0,0 +1,32 @@
+/*
+
+ fp_trig.h: floating-point math routines for the Linux-m68k
+ floating point emulator.
+
+ Copyright (c) 1998 David Huggins-Daines.
+
+ I hereby give permission, free of charge, to copy, modify, and
+ redistribute this software, in source or binary form, provided that
+ the above copyright notice and the following disclaimer are included
+ in all such copies.
+
+ THIS SOFTWARE IS PROVIDED "AS IS", WITH ABSOLUTELY NO WARRANTY, REAL
+ OR IMPLIED.
+
+*/
+
+#ifndef FP_TRIG_H
+#define FP_TRIG_H
+
+#include "fp_emu.h"
+
+/* floating point trigonometric instructions:
+
+ the arguments to these are in the "internal" extended format, that
+ is, an "exploded" version of the 96-bit extended fp format used by
+ the 68881.
+
+ they return a status code, which should end up in %d0, if all goes
+ well. */
+
+#endif /* FP_TRIG__H */
diff --git a/arch/m68k/math-emu/fp_util.S b/arch/m68k/math-emu/fp_util.S
new file mode 100644
index 000000000000..a9f7f0129067
--- /dev/null
+++ b/arch/m68k/math-emu/fp_util.S
@@ -0,0 +1,1455 @@
+/*
+ * fp_util.S
+ *
+ * Copyright Roman Zippel, 1997. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * ALTERNATIVELY, this product may be distributed under the terms of
+ * the GNU General Public License, in which case the provisions of the GPL are
+ * required INSTEAD OF the above restrictions. (This clause is
+ * necessary due to a potential bad interaction between the GPL and
+ * the restrictions contained in a BSD-style copyright.)
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/config.h>
+#include "fp_emu.h"
+
+/*
+ * Here are lots of conversion and normalization functions mainly
+ * used by fp_scan.S
+ * Note that these functions are optimized for "normal" numbers,
+ * these are handled first and exit as fast as possible, this is
+ * especially important for fp_normalize_ext/fp_conv_ext2ext, as
+ * it's called very often.
+ * The register usage is optimized for fp_scan.S and which register
+ * is currently at that time unused, be careful if you want change
+ * something here. %d0 and %d1 is always usable, sometimes %d2 (or
+ * only the lower half) most function have to return the %a0
+ * unmodified, so that the caller can immediately reuse it.
+ */
+
+ .globl fp_ill, fp_end
+
+ | exits from fp_scan:
+ | illegal instruction
+fp_ill:
+ printf ,"fp_illegal\n"
+ rts
+ | completed instruction
+fp_end:
+ tst.l (TASK_MM-8,%a2)
+ jmi 1f
+ tst.l (TASK_MM-4,%a2)
+ jmi 1f
+ tst.l (TASK_MM,%a2)
+ jpl 2f
+1: printf ,"oops:%p,%p,%p\n",3,%a2@(TASK_MM-8),%a2@(TASK_MM-4),%a2@(TASK_MM)
+2: clr.l %d0
+ rts
+
+ .globl fp_conv_long2ext, fp_conv_single2ext
+ .globl fp_conv_double2ext, fp_conv_ext2ext
+ .globl fp_normalize_ext, fp_normalize_double
+ .globl fp_normalize_single, fp_normalize_single_fast
+ .globl fp_conv_ext2double, fp_conv_ext2single
+ .globl fp_conv_ext2long, fp_conv_ext2short
+ .globl fp_conv_ext2byte
+ .globl fp_finalrounding_single, fp_finalrounding_single_fast
+ .globl fp_finalrounding_double
+ .globl fp_finalrounding, fp_finaltest, fp_final
+
+/*
+ * First several conversion functions from a source operand
+ * into the extended format. Note, that only fp_conv_ext2ext
+ * normalizes the number and is always called after the other
+ * conversion functions, which only move the information into
+ * fp_ext structure.
+ */
+
+ | fp_conv_long2ext:
+ |
+ | args: %d0 = source (32-bit long)
+ | %a0 = destination (ptr to struct fp_ext)
+
+fp_conv_long2ext:
+ printf PCONV,"l2e: %p -> %p(",2,%d0,%a0
+ clr.l %d1 | sign defaults to zero
+ tst.l %d0
+ jeq fp_l2e_zero | is source zero?
+ jpl 1f | positive?
+ moveq #1,%d1
+ neg.l %d0
+1: swap %d1
+ move.w #0x3fff+31,%d1
+ move.l %d1,(%a0)+ | set sign / exp
+ move.l %d0,(%a0)+ | set mantissa
+ clr.l (%a0)
+ subq.l #8,%a0 | restore %a0
+ printx PCONV,%a0@
+ printf PCONV,")\n"
+ rts
+ | source is zero
+fp_l2e_zero:
+ clr.l (%a0)+
+ clr.l (%a0)+
+ clr.l (%a0)
+ subq.l #8,%a0
+ printx PCONV,%a0@
+ printf PCONV,")\n"
+ rts
+
+ | fp_conv_single2ext
+ | args: %d0 = source (single-precision fp value)
+ | %a0 = dest (struct fp_ext *)
+
+fp_conv_single2ext:
+ printf PCONV,"s2e: %p -> %p(",2,%d0,%a0
+ move.l %d0,%d1
+ lsl.l #8,%d0 | shift mantissa
+ lsr.l #8,%d1 | exponent / sign
+ lsr.l #7,%d1
+ lsr.w #8,%d1
+ jeq fp_s2e_small | zero / denormal?
+ cmp.w #0xff,%d1 | NaN / Inf?
+ jeq fp_s2e_large
+ bset #31,%d0 | set explizit bit
+ add.w #0x3fff-0x7f,%d1 | re-bias the exponent.
+9: move.l %d1,(%a0)+ | fp_ext.sign, fp_ext.exp
+ move.l %d0,(%a0)+ | high lword of fp_ext.mant
+ clr.l (%a0) | low lword = 0
+ subq.l #8,%a0
+ printx PCONV,%a0@
+ printf PCONV,")\n"
+ rts
+ | zeros and denormalized
+fp_s2e_small:
+ | exponent is zero, so explizit bit is already zero too
+ tst.l %d0
+ jeq 9b
+ move.w #0x4000-0x7f,%d1
+ jra 9b
+ | infinities and NAN
+fp_s2e_large:
+ bclr #31,%d0 | clear explizit bit
+ move.w #0x7fff,%d1
+ jra 9b
+
+fp_conv_double2ext:
+#ifdef FPU_EMU_DEBUG
+ getuser.l %a1@(0),%d0,fp_err_ua2,%a1
+ getuser.l %a1@(4),%d1,fp_err_ua2,%a1
+ printf PCONV,"d2e: %p%p -> %p(",3,%d0,%d1,%a0
+#endif
+ getuser.l (%a1)+,%d0,fp_err_ua2,%a1
+ move.l %d0,%d1
+ lsl.l #8,%d0 | shift high mantissa
+ lsl.l #3,%d0
+ lsr.l #8,%d1 | exponent / sign
+ lsr.l #7,%d1
+ lsr.w #5,%d1
+ jeq fp_d2e_small | zero / denormal?
+ cmp.w #0x7ff,%d1 | NaN / Inf?
+ jeq fp_d2e_large
+ bset #31,%d0 | set explizit bit
+ add.w #0x3fff-0x3ff,%d1 | re-bias the exponent.
+9: move.l %d1,(%a0)+ | fp_ext.sign, fp_ext.exp
+ move.l %d0,(%a0)+
+ getuser.l (%a1)+,%d0,fp_err_ua2,%a1
+ move.l %d0,%d1
+ lsl.l #8,%d0
+ lsl.l #3,%d0
+ move.l %d0,(%a0)
+ moveq #21,%d0
+ lsr.l %d0,%d1
+ or.l %d1,-(%a0)
+ subq.l #4,%a0
+ printx PCONV,%a0@
+ printf PCONV,")\n"
+ rts
+ | zeros and denormalized
+fp_d2e_small:
+ | exponent is zero, so explizit bit is already zero too
+ tst.l %d0
+ jeq 9b
+ move.w #0x4000-0x3ff,%d1
+ jra 9b
+ | infinities and NAN
+fp_d2e_large:
+ bclr #31,%d0 | clear explizit bit
+ move.w #0x7fff,%d1
+ jra 9b
+
+ | fp_conv_ext2ext:
+ | originally used to get longdouble from userspace, now it's
+ | called before arithmetic operations to make sure the number
+ | is normalized [maybe rename it?].
+ | args: %a0 = dest (struct fp_ext *)
+ | returns 0 in %d0 for a NaN, otherwise 1
+
+fp_conv_ext2ext:
+ printf PCONV,"e2e: %p(",1,%a0
+ printx PCONV,%a0@
+ printf PCONV,"), "
+ move.l (%a0)+,%d0
+ cmp.w #0x7fff,%d0 | Inf / NaN?
+ jeq fp_e2e_large
+ move.l (%a0),%d0
+ jpl fp_e2e_small | zero / denorm?
+ | The high bit is set, so normalization is irrelevant.
+fp_e2e_checkround:
+ subq.l #4,%a0
+#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
+ move.b (%a0),%d0
+ jne fp_e2e_round
+#endif
+ printf PCONV,"%p(",1,%a0
+ printx PCONV,%a0@
+ printf PCONV,")\n"
+ moveq #1,%d0
+ rts
+#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
+fp_e2e_round:
+ fp_set_sr FPSR_EXC_INEX2
+ clr.b (%a0)
+ move.w (FPD_RND,FPDATA),%d2
+ jne fp_e2e_roundother | %d2 == 0, round to nearest
+ tst.b %d0 | test guard bit
+ jpl 9f | zero is closer
+ btst #0,(11,%a0) | test lsb bit
+ jne fp_e2e_doroundup | round to infinity
+ lsl.b #1,%d0 | check low bits
+ jeq 9f | round to zero
+fp_e2e_doroundup:
+ addq.l #1,(8,%a0)
+ jcc 9f
+ addq.l #1,(4,%a0)
+ jcc 9f
+ move.w #0x8000,(4,%a0)
+ addq.w #1,(2,%a0)
+9: printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ rts
+fp_e2e_roundother:
+ subq.w #2,%d2
+ jcs 9b | %d2 < 2, round to zero
+ jhi 1f | %d2 > 2, round to +infinity
+ tst.b (1,%a0) | to -inf
+ jne fp_e2e_doroundup | negative, round to infinity
+ jra 9b | positive, round to zero
+1: tst.b (1,%a0) | to +inf
+ jeq fp_e2e_doroundup | positive, round to infinity
+ jra 9b | negative, round to zero
+#endif
+ | zeros and subnormals:
+ | try to normalize these anyway.
+fp_e2e_small:
+ jne fp_e2e_small1 | high lword zero?
+ move.l (4,%a0),%d0
+ jne fp_e2e_small2
+#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
+ clr.l %d0
+ move.b (-4,%a0),%d0
+ jne fp_e2e_small3
+#endif
+ | Genuine zero.
+ clr.w -(%a0)
+ subq.l #2,%a0
+ printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ moveq #1,%d0
+ rts
+ | definitely subnormal, need to shift all 64 bits
+fp_e2e_small1:
+ bfffo %d0{#0,#32},%d1
+ move.w -(%a0),%d2
+ sub.w %d1,%d2
+ jcc 1f
+ | Pathologically small, denormalize.
+ add.w %d2,%d1
+ clr.w %d2
+1: move.w %d2,(%a0)+
+ move.w %d1,%d2
+ jeq fp_e2e_checkround
+ | fancy 64-bit double-shift begins here
+ lsl.l %d2,%d0
+ move.l %d0,(%a0)+
+ move.l (%a0),%d0
+ move.l %d0,%d1
+ lsl.l %d2,%d0
+ move.l %d0,(%a0)
+ neg.w %d2
+ and.w #0x1f,%d2
+ lsr.l %d2,%d1
+ or.l %d1,-(%a0)
+#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
+fp_e2e_extra1:
+ clr.l %d0
+ move.b (-4,%a0),%d0
+ neg.w %d2
+ add.w #24,%d2
+ jcc 1f
+ clr.b (-4,%a0)
+ lsl.l %d2,%d0
+ or.l %d0,(4,%a0)
+ jra fp_e2e_checkround
+1: addq.w #8,%d2
+ lsl.l %d2,%d0
+ move.b %d0,(-4,%a0)
+ lsr.l #8,%d0
+ or.l %d0,(4,%a0)
+#endif
+ jra fp_e2e_checkround
+ | pathologically small subnormal
+fp_e2e_small2:
+ bfffo %d0{#0,#32},%d1
+ add.w #32,%d1
+ move.w -(%a0),%d2
+ sub.w %d1,%d2
+ jcc 1f
+ | Beyond pathologically small, denormalize.
+ add.w %d2,%d1
+ clr.w %d2
+1: move.w %d2,(%a0)+
+ ext.l %d1
+ jeq fp_e2e_checkround
+ clr.l (4,%a0)
+ sub.w #32,%d2
+ jcs 1f
+ lsl.l %d1,%d0 | lower lword needs only to be shifted
+ move.l %d0,(%a0) | into the higher lword
+#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
+ clr.l %d0
+ move.b (-4,%a0),%d0
+ clr.b (-4,%a0)
+ neg.w %d1
+ add.w #32,%d1
+ bfins %d0,(%a0){%d1,#8}
+#endif
+ jra fp_e2e_checkround
+1: neg.w %d1 | lower lword is splitted between
+ bfins %d0,(%a0){%d1,#32} | higher and lower lword
+#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
+ jra fp_e2e_checkround
+#else
+ move.w %d1,%d2
+ jra fp_e2e_extra1
+ | These are extremely small numbers, that will mostly end up as zero
+ | anyway, so this is only important for correct rounding.
+fp_e2e_small3:
+ bfffo %d0{#24,#8},%d1
+ add.w #40,%d1
+ move.w -(%a0),%d2
+ sub.w %d1,%d2
+ jcc 1f
+ | Pathologically small, denormalize.
+ add.w %d2,%d1
+ clr.w %d2
+1: move.w %d2,(%a0)+
+ ext.l %d1
+ jeq fp_e2e_checkround
+ cmp.w #8,%d1
+ jcs 2f
+1: clr.b (-4,%a0)
+ sub.w #64,%d1
+ jcs 1f
+ add.w #24,%d1
+ lsl.l %d1,%d0
+ move.l %d0,(%a0)
+ jra fp_e2e_checkround
+1: neg.w %d1
+ bfins %d0,(%a0){%d1,#8}
+ jra fp_e2e_checkround
+2: lsl.l %d1,%d0
+ move.b %d0,(-4,%a0)
+ lsr.l #8,%d0
+ move.b %d0,(7,%a0)
+ jra fp_e2e_checkround
+#endif
+1: move.l %d0,%d1 | lower lword is splitted between
+ lsl.l %d2,%d0 | higher and lower lword
+ move.l %d0,(%a0)
+ move.l %d1,%d0
+ neg.w %d2
+ add.w #32,%d2
+ lsr.l %d2,%d0
+ move.l %d0,-(%a0)
+ jra fp_e2e_checkround
+ | Infinities and NaNs
+fp_e2e_large:
+ move.l (%a0)+,%d0
+ jne 3f
+1: tst.l (%a0)
+ jne 4f
+ moveq #1,%d0
+2: subq.l #8,%a0
+ printf PCONV,"%p(",1,%a0
+ printx PCONV,%a0@
+ printf PCONV,")\n"
+ rts
+ | we have maybe a NaN, shift off the highest bit
+3: lsl.l #1,%d0
+ jeq 1b
+ | we have a NaN, clear the return value
+4: clrl %d0
+ jra 2b
+
+
+/*
+ * Normalization functions. Call these on the output of general
+ * FP operators, and before any conversion into the destination
+ * formats. fp_normalize_ext has always to be called first, the
+ * following conversion functions expect an already normalized
+ * number.
+ */
+
+ | fp_normalize_ext:
+ | normalize an extended in extended (unpacked) format, basically
+ | it does the same as fp_conv_ext2ext, additionally it also does
+ | the necessary postprocessing checks.
+ | args: %a0 (struct fp_ext *)
+ | NOTE: it does _not_ modify %a0/%a1 and the upper word of %d2
+
+fp_normalize_ext:
+ printf PNORM,"ne: %p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,"), "
+ move.l (%a0)+,%d0
+ cmp.w #0x7fff,%d0 | Inf / NaN?
+ jeq fp_ne_large
+ move.l (%a0),%d0
+ jpl fp_ne_small | zero / denorm?
+ | The high bit is set, so normalization is irrelevant.
+fp_ne_checkround:
+ subq.l #4,%a0
+#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
+ move.b (%a0),%d0
+ jne fp_ne_round
+#endif
+ printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ rts
+#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
+fp_ne_round:
+ fp_set_sr FPSR_EXC_INEX2
+ clr.b (%a0)
+ move.w (FPD_RND,FPDATA),%d2
+ jne fp_ne_roundother | %d2 == 0, round to nearest
+ tst.b %d0 | test guard bit
+ jpl 9f | zero is closer
+ btst #0,(11,%a0) | test lsb bit
+ jne fp_ne_doroundup | round to infinity
+ lsl.b #1,%d0 | check low bits
+ jeq 9f | round to zero
+fp_ne_doroundup:
+ addq.l #1,(8,%a0)
+ jcc 9f
+ addq.l #1,(4,%a0)
+ jcc 9f
+ addq.w #1,(2,%a0)
+ move.w #0x8000,(4,%a0)
+9: printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ rts
+fp_ne_roundother:
+ subq.w #2,%d2
+ jcs 9b | %d2 < 2, round to zero
+ jhi 1f | %d2 > 2, round to +infinity
+ tst.b (1,%a0) | to -inf
+ jne fp_ne_doroundup | negative, round to infinity
+ jra 9b | positive, round to zero
+1: tst.b (1,%a0) | to +inf
+ jeq fp_ne_doroundup | positive, round to infinity
+ jra 9b | negative, round to zero
+#endif
+ | Zeros and subnormal numbers
+ | These are probably merely subnormal, rather than "denormalized"
+ | numbers, so we will try to make them normal again.
+fp_ne_small:
+ jne fp_ne_small1 | high lword zero?
+ move.l (4,%a0),%d0
+ jne fp_ne_small2
+#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
+ clr.l %d0
+ move.b (-4,%a0),%d0
+ jne fp_ne_small3
+#endif
+ | Genuine zero.
+ clr.w -(%a0)
+ subq.l #2,%a0
+ printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ rts
+ | Subnormal.
+fp_ne_small1:
+ bfffo %d0{#0,#32},%d1
+ move.w -(%a0),%d2
+ sub.w %d1,%d2
+ jcc 1f
+ | Pathologically small, denormalize.
+ add.w %d2,%d1
+ clr.w %d2
+ fp_set_sr FPSR_EXC_UNFL
+1: move.w %d2,(%a0)+
+ move.w %d1,%d2
+ jeq fp_ne_checkround
+ | This is exactly the same 64-bit double shift as seen above.
+ lsl.l %d2,%d0
+ move.l %d0,(%a0)+
+ move.l (%a0),%d0
+ move.l %d0,%d1
+ lsl.l %d2,%d0
+ move.l %d0,(%a0)
+ neg.w %d2
+ and.w #0x1f,%d2
+ lsr.l %d2,%d1
+ or.l %d1,-(%a0)
+#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
+fp_ne_extra1:
+ clr.l %d0
+ move.b (-4,%a0),%d0
+ neg.w %d2
+ add.w #24,%d2
+ jcc 1f
+ clr.b (-4,%a0)
+ lsl.l %d2,%d0
+ or.l %d0,(4,%a0)
+ jra fp_ne_checkround
+1: addq.w #8,%d2
+ lsl.l %d2,%d0
+ move.b %d0,(-4,%a0)
+ lsr.l #8,%d0
+ or.l %d0,(4,%a0)
+#endif
+ jra fp_ne_checkround
+ | May or may not be subnormal, if so, only 32 bits to shift.
+fp_ne_small2:
+ bfffo %d0{#0,#32},%d1
+ add.w #32,%d1
+ move.w -(%a0),%d2
+ sub.w %d1,%d2
+ jcc 1f
+ | Beyond pathologically small, denormalize.
+ add.w %d2,%d1
+ clr.w %d2
+ fp_set_sr FPSR_EXC_UNFL
+1: move.w %d2,(%a0)+
+ ext.l %d1
+ jeq fp_ne_checkround
+ clr.l (4,%a0)
+ sub.w #32,%d1
+ jcs 1f
+ lsl.l %d1,%d0 | lower lword needs only to be shifted
+ move.l %d0,(%a0) | into the higher lword
+#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
+ clr.l %d0
+ move.b (-4,%a0),%d0
+ clr.b (-4,%a0)
+ neg.w %d1
+ add.w #32,%d1
+ bfins %d0,(%a0){%d1,#8}
+#endif
+ jra fp_ne_checkround
+1: neg.w %d1 | lower lword is splitted between
+ bfins %d0,(%a0){%d1,#32} | higher and lower lword
+#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
+ jra fp_ne_checkround
+#else
+ move.w %d1,%d2
+ jra fp_ne_extra1
+ | These are extremely small numbers, that will mostly end up as zero
+ | anyway, so this is only important for correct rounding.
+fp_ne_small3:
+ bfffo %d0{#24,#8},%d1
+ add.w #40,%d1
+ move.w -(%a0),%d2
+ sub.w %d1,%d2
+ jcc 1f
+ | Pathologically small, denormalize.
+ add.w %d2,%d1
+ clr.w %d2
+1: move.w %d2,(%a0)+
+ ext.l %d1
+ jeq fp_ne_checkround
+ cmp.w #8,%d1
+ jcs 2f
+1: clr.b (-4,%a0)
+ sub.w #64,%d1
+ jcs 1f
+ add.w #24,%d1
+ lsl.l %d1,%d0
+ move.l %d0,(%a0)
+ jra fp_ne_checkround
+1: neg.w %d1
+ bfins %d0,(%a0){%d1,#8}
+ jra fp_ne_checkround
+2: lsl.l %d1,%d0
+ move.b %d0,(-4,%a0)
+ lsr.l #8,%d0
+ move.b %d0,(7,%a0)
+ jra fp_ne_checkround
+#endif
+ | Infinities and NaNs, again, same as above.
+fp_ne_large:
+ move.l (%a0)+,%d0
+ jne 3f
+1: tst.l (%a0)
+ jne 4f
+2: subq.l #8,%a0
+ printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ rts
+ | we have maybe a NaN, shift off the highest bit
+3: move.l %d0,%d1
+ lsl.l #1,%d1
+ jne 4f
+ clr.l (-4,%a0)
+ jra 1b
+ | we have a NaN, test if it is signaling
+4: bset #30,%d0
+ jne 2b
+ fp_set_sr FPSR_EXC_SNAN
+ move.l %d0,(-4,%a0)
+ jra 2b
+
+ | these next two do rounding as per the IEEE standard.
+ | values for the rounding modes appear to be:
+ | 0: Round to nearest
+ | 1: Round to zero
+ | 2: Round to -Infinity
+ | 3: Round to +Infinity
+ | both functions expect that fp_normalize was already
+ | called (and extended argument is already normalized
+ | as far as possible), these are used if there is different
+ | rounding precision is selected and before converting
+ | into single/double
+
+ | fp_normalize_double:
+ | normalize an extended with double (52-bit) precision
+ | args: %a0 (struct fp_ext *)
+
+fp_normalize_double:
+ printf PNORM,"nd: %p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,"), "
+ move.l (%a0)+,%d2
+ tst.w %d2
+ jeq fp_nd_zero | zero / denormalized
+ cmp.w #0x7fff,%d2
+ jeq fp_nd_huge | NaN / infinitive.
+ sub.w #0x4000-0x3ff,%d2 | will the exponent fit?
+ jcs fp_nd_small | too small.
+ cmp.w #0x7fe,%d2
+ jcc fp_nd_large | too big.
+ addq.l #4,%a0
+ move.l (%a0),%d0 | low lword of mantissa
+ | now, round off the low 11 bits.
+fp_nd_round:
+ moveq #21,%d1
+ lsl.l %d1,%d0 | keep 11 low bits.
+ jne fp_nd_checkround | Are they non-zero?
+ | nothing to do here
+9: subq.l #8,%a0
+ printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ rts
+ | Be careful with the X bit! It contains the lsb
+ | from the shift above, it is needed for round to nearest.
+fp_nd_checkround:
+ fp_set_sr FPSR_EXC_INEX2 | INEX2 bit
+ and.w #0xf800,(2,%a0) | clear bits 0-10
+ move.w (FPD_RND,FPDATA),%d2 | rounding mode
+ jne 2f | %d2 == 0, round to nearest
+ tst.l %d0 | test guard bit
+ jpl 9b | zero is closer
+ | here we test the X bit by adding it to %d2
+ clr.w %d2 | first set z bit, addx only clears it
+ addx.w %d2,%d2 | test lsb bit
+ | IEEE754-specified "round to even" behaviour. If the guard
+ | bit is set, then the number is odd, so rounding works like
+ | in grade-school arithmetic (i.e. 1.5 rounds to 2.0)
+ | Otherwise, an equal distance rounds towards zero, so as not
+ | to produce an odd number. This is strange, but it is what
+ | the standard says.
+ jne fp_nd_doroundup | round to infinity
+ lsl.l #1,%d0 | check low bits
+ jeq 9b | round to zero
+fp_nd_doroundup:
+ | round (the mantissa, that is) towards infinity
+ add.l #0x800,(%a0)
+ jcc 9b | no overflow, good.
+ addq.l #1,-(%a0) | extend to high lword
+ jcc 1f | no overflow, good.
+ | Yow! we have managed to overflow the mantissa. Since this
+ | only happens when %d1 was 0xfffff800, it is now zero, so
+ | reset the high bit, and increment the exponent.
+ move.w #0x8000,(%a0)
+ addq.w #1,-(%a0)
+ cmp.w #0x43ff,(%a0)+ | exponent now overflown?
+ jeq fp_nd_large | yes, so make it infinity.
+1: subq.l #4,%a0
+ printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ rts
+2: subq.w #2,%d2
+ jcs 9b | %d2 < 2, round to zero
+ jhi 3f | %d2 > 2, round to +infinity
+ | Round to +Inf or -Inf. High word of %d2 contains the
+ | sign of the number, by the way.
+ swap %d2 | to -inf
+ tst.b %d2
+ jne fp_nd_doroundup | negative, round to infinity
+ jra 9b | positive, round to zero
+3: swap %d2 | to +inf
+ tst.b %d2
+ jeq fp_nd_doroundup | positive, round to infinity
+ jra 9b | negative, round to zero
+ | Exponent underflow. Try to make a denormal, and set it to
+ | the smallest possible fraction if this fails.
+fp_nd_small:
+ fp_set_sr FPSR_EXC_UNFL | set UNFL bit
+ move.w #0x3c01,(-2,%a0) | 2**-1022
+ neg.w %d2 | degree of underflow
+ cmp.w #32,%d2 | single or double shift?
+ jcc 1f
+ | Again, another 64-bit double shift.
+ move.l (%a0),%d0
+ move.l %d0,%d1
+ lsr.l %d2,%d0
+ move.l %d0,(%a0)+
+ move.l (%a0),%d0
+ lsr.l %d2,%d0
+ neg.w %d2
+ add.w #32,%d2
+ lsl.l %d2,%d1
+ or.l %d1,%d0
+ move.l (%a0),%d1
+ move.l %d0,(%a0)
+ | Check to see if we shifted off any significant bits
+ lsl.l %d2,%d1
+ jeq fp_nd_round | Nope, round.
+ bset #0,%d0 | Yes, so set the "sticky bit".
+ jra fp_nd_round | Now, round.
+ | Another 64-bit single shift and store
+1: sub.w #32,%d2
+ cmp.w #32,%d2 | Do we really need to shift?
+ jcc 2f | No, the number is too small.
+ move.l (%a0),%d0
+ clr.l (%a0)+
+ move.l %d0,%d1
+ lsr.l %d2,%d0
+ neg.w %d2
+ add.w #32,%d2
+ | Again, check to see if we shifted off any significant bits.
+ tst.l (%a0)
+ jeq 1f
+ bset #0,%d0 | Sticky bit.
+1: move.l %d0,(%a0)
+ lsl.l %d2,%d1
+ jeq fp_nd_round
+ bset #0,%d0
+ jra fp_nd_round
+ | Sorry, the number is just too small.
+2: clr.l (%a0)+
+ clr.l (%a0)
+ moveq #1,%d0 | Smallest possible fraction,
+ jra fp_nd_round | round as desired.
+ | zero and denormalized
+fp_nd_zero:
+ tst.l (%a0)+
+ jne 1f
+ tst.l (%a0)
+ jne 1f
+ subq.l #8,%a0
+ printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ rts | zero. nothing to do.
+ | These are not merely subnormal numbers, but true denormals,
+ | i.e. pathologically small (exponent is 2**-16383) numbers.
+ | It is clearly impossible for even a normal extended number
+ | with that exponent to fit into double precision, so just
+ | write these ones off as "too darn small".
+1: fp_set_sr FPSR_EXC_UNFL | Set UNFL bit
+ clr.l (%a0)
+ clr.l -(%a0)
+ move.w #0x3c01,-(%a0) | i.e. 2**-1022
+ addq.l #6,%a0
+ moveq #1,%d0
+ jra fp_nd_round | round.
+ | Exponent overflow. Just call it infinity.
+fp_nd_large:
+ move.w #0x7ff,%d0
+ and.w (6,%a0),%d0
+ jeq 1f
+ fp_set_sr FPSR_EXC_INEX2
+1: fp_set_sr FPSR_EXC_OVFL
+ move.w (FPD_RND,FPDATA),%d2
+ jne 3f | %d2 = 0 round to nearest
+1: move.w #0x7fff,(-2,%a0)
+ clr.l (%a0)+
+ clr.l (%a0)
+2: subq.l #8,%a0
+ printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ rts
+3: subq.w #2,%d2
+ jcs 5f | %d2 < 2, round to zero
+ jhi 4f | %d2 > 2, round to +infinity
+ tst.b (-3,%a0) | to -inf
+ jne 1b
+ jra 5f
+4: tst.b (-3,%a0) | to +inf
+ jeq 1b
+5: move.w #0x43fe,(-2,%a0)
+ moveq #-1,%d0
+ move.l %d0,(%a0)+
+ move.w #0xf800,%d0
+ move.l %d0,(%a0)
+ jra 2b
+ | Infinities or NaNs
+fp_nd_huge:
+ subq.l #4,%a0
+ printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ rts
+
+ | fp_normalize_single:
+ | normalize an extended with single (23-bit) precision
+ | args: %a0 (struct fp_ext *)
+
+fp_normalize_single:
+ printf PNORM,"ns: %p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,") "
+ addq.l #2,%a0
+ move.w (%a0)+,%d2
+ jeq fp_ns_zero | zero / denormalized
+ cmp.w #0x7fff,%d2
+ jeq fp_ns_huge | NaN / infinitive.
+ sub.w #0x4000-0x7f,%d2 | will the exponent fit?
+ jcs fp_ns_small | too small.
+ cmp.w #0xfe,%d2
+ jcc fp_ns_large | too big.
+ move.l (%a0)+,%d0 | get high lword of mantissa
+fp_ns_round:
+ tst.l (%a0) | check the low lword
+ jeq 1f
+ | Set a sticky bit if it is non-zero. This should only
+ | affect the rounding in what would otherwise be equal-
+ | distance situations, which is what we want it to do.
+ bset #0,%d0
+1: clr.l (%a0) | zap it from memory.
+ | now, round off the low 8 bits of the hi lword.
+ tst.b %d0 | 8 low bits.
+ jne fp_ns_checkround | Are they non-zero?
+ | nothing to do here
+ subq.l #8,%a0
+ printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ rts
+fp_ns_checkround:
+ fp_set_sr FPSR_EXC_INEX2 | INEX2 bit
+ clr.b -(%a0) | clear low byte of high lword
+ subq.l #3,%a0
+ move.w (FPD_RND,FPDATA),%d2 | rounding mode
+ jne 2f | %d2 == 0, round to nearest
+ tst.b %d0 | test guard bit
+ jpl 9f | zero is closer
+ btst #8,%d0 | test lsb bit
+ | round to even behaviour, see above.
+ jne fp_ns_doroundup | round to infinity
+ lsl.b #1,%d0 | check low bits
+ jeq 9f | round to zero
+fp_ns_doroundup:
+ | round (the mantissa, that is) towards infinity
+ add.l #0x100,(%a0)
+ jcc 9f | no overflow, good.
+ | Overflow. This means that the %d1 was 0xffffff00, so it
+ | is now zero. We will set the mantissa to reflect this, and
+ | increment the exponent (checking for overflow there too)
+ move.w #0x8000,(%a0)
+ addq.w #1,-(%a0)
+ cmp.w #0x407f,(%a0)+ | exponent now overflown?
+ jeq fp_ns_large | yes, so make it infinity.
+9: subq.l #4,%a0
+ printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ rts
+ | check nondefault rounding modes
+2: subq.w #2,%d2
+ jcs 9b | %d2 < 2, round to zero
+ jhi 3f | %d2 > 2, round to +infinity
+ tst.b (-3,%a0) | to -inf
+ jne fp_ns_doroundup | negative, round to infinity
+ jra 9b | positive, round to zero
+3: tst.b (-3,%a0) | to +inf
+ jeq fp_ns_doroundup | positive, round to infinity
+ jra 9b | negative, round to zero
+ | Exponent underflow. Try to make a denormal, and set it to
+ | the smallest possible fraction if this fails.
+fp_ns_small:
+ fp_set_sr FPSR_EXC_UNFL | set UNFL bit
+ move.w #0x3f81,(-2,%a0) | 2**-126
+ neg.w %d2 | degree of underflow
+ cmp.w #32,%d2 | single or double shift?
+ jcc 2f
+ | a 32-bit shift.
+ move.l (%a0),%d0
+ move.l %d0,%d1
+ lsr.l %d2,%d0
+ move.l %d0,(%a0)+
+ | Check to see if we shifted off any significant bits.
+ neg.w %d2
+ add.w #32,%d2
+ lsl.l %d2,%d1
+ jeq 1f
+ bset #0,%d0 | Sticky bit.
+ | Check the lower lword
+1: tst.l (%a0)
+ jeq fp_ns_round
+ clr (%a0)
+ bset #0,%d0 | Sticky bit.
+ jra fp_ns_round
+ | Sorry, the number is just too small.
+2: clr.l (%a0)+
+ clr.l (%a0)
+ moveq #1,%d0 | Smallest possible fraction,
+ jra fp_ns_round | round as desired.
+ | Exponent overflow. Just call it infinity.
+fp_ns_large:
+ tst.b (3,%a0)
+ jeq 1f
+ fp_set_sr FPSR_EXC_INEX2
+1: fp_set_sr FPSR_EXC_OVFL
+ move.w (FPD_RND,FPDATA),%d2
+ jne 3f | %d2 = 0 round to nearest
+1: move.w #0x7fff,(-2,%a0)
+ clr.l (%a0)+
+ clr.l (%a0)
+2: subq.l #8,%a0
+ printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ rts
+3: subq.w #2,%d2
+ jcs 5f | %d2 < 2, round to zero
+ jhi 4f | %d2 > 2, round to +infinity
+ tst.b (-3,%a0) | to -inf
+ jne 1b
+ jra 5f
+4: tst.b (-3,%a0) | to +inf
+ jeq 1b
+5: move.w #0x407e,(-2,%a0)
+ move.l #0xffffff00,(%a0)+
+ clr.l (%a0)
+ jra 2b
+ | zero and denormalized
+fp_ns_zero:
+ tst.l (%a0)+
+ jne 1f
+ tst.l (%a0)
+ jne 1f
+ subq.l #8,%a0
+ printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ rts | zero. nothing to do.
+ | These are not merely subnormal numbers, but true denormals,
+ | i.e. pathologically small (exponent is 2**-16383) numbers.
+ | It is clearly impossible for even a normal extended number
+ | with that exponent to fit into single precision, so just
+ | write these ones off as "too darn small".
+1: fp_set_sr FPSR_EXC_UNFL | Set UNFL bit
+ clr.l (%a0)
+ clr.l -(%a0)
+ move.w #0x3f81,-(%a0) | i.e. 2**-126
+ addq.l #6,%a0
+ moveq #1,%d0
+ jra fp_ns_round | round.
+ | Infinities or NaNs
+fp_ns_huge:
+ subq.l #4,%a0
+ printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ rts
+
+ | fp_normalize_single_fast:
+ | normalize an extended with single (23-bit) precision
+ | this is only used by fsgldiv/fsgdlmul, where the
+ | operand is not completly normalized.
+ | args: %a0 (struct fp_ext *)
+
+fp_normalize_single_fast:
+ printf PNORM,"nsf: %p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,") "
+ addq.l #2,%a0
+ move.w (%a0)+,%d2
+ cmp.w #0x7fff,%d2
+ jeq fp_nsf_huge | NaN / infinitive.
+ move.l (%a0)+,%d0 | get high lword of mantissa
+fp_nsf_round:
+ tst.l (%a0) | check the low lword
+ jeq 1f
+ | Set a sticky bit if it is non-zero. This should only
+ | affect the rounding in what would otherwise be equal-
+ | distance situations, which is what we want it to do.
+ bset #0,%d0
+1: clr.l (%a0) | zap it from memory.
+ | now, round off the low 8 bits of the hi lword.
+ tst.b %d0 | 8 low bits.
+ jne fp_nsf_checkround | Are they non-zero?
+ | nothing to do here
+ subq.l #8,%a0
+ printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ rts
+fp_nsf_checkround:
+ fp_set_sr FPSR_EXC_INEX2 | INEX2 bit
+ clr.b -(%a0) | clear low byte of high lword
+ subq.l #3,%a0
+ move.w (FPD_RND,FPDATA),%d2 | rounding mode
+ jne 2f | %d2 == 0, round to nearest
+ tst.b %d0 | test guard bit
+ jpl 9f | zero is closer
+ btst #8,%d0 | test lsb bit
+ | round to even behaviour, see above.
+ jne fp_nsf_doroundup | round to infinity
+ lsl.b #1,%d0 | check low bits
+ jeq 9f | round to zero
+fp_nsf_doroundup:
+ | round (the mantissa, that is) towards infinity
+ add.l #0x100,(%a0)
+ jcc 9f | no overflow, good.
+ | Overflow. This means that the %d1 was 0xffffff00, so it
+ | is now zero. We will set the mantissa to reflect this, and
+ | increment the exponent (checking for overflow there too)
+ move.w #0x8000,(%a0)
+ addq.w #1,-(%a0)
+ cmp.w #0x407f,(%a0)+ | exponent now overflown?
+ jeq fp_nsf_large | yes, so make it infinity.
+9: subq.l #4,%a0
+ printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ rts
+ | check nondefault rounding modes
+2: subq.w #2,%d2
+ jcs 9b | %d2 < 2, round to zero
+ jhi 3f | %d2 > 2, round to +infinity
+ tst.b (-3,%a0) | to -inf
+ jne fp_nsf_doroundup | negative, round to infinity
+ jra 9b | positive, round to zero
+3: tst.b (-3,%a0) | to +inf
+ jeq fp_nsf_doroundup | positive, round to infinity
+ jra 9b | negative, round to zero
+ | Exponent overflow. Just call it infinity.
+fp_nsf_large:
+ tst.b (3,%a0)
+ jeq 1f
+ fp_set_sr FPSR_EXC_INEX2
+1: fp_set_sr FPSR_EXC_OVFL
+ move.w (FPD_RND,FPDATA),%d2
+ jne 3f | %d2 = 0 round to nearest
+1: move.w #0x7fff,(-2,%a0)
+ clr.l (%a0)+
+ clr.l (%a0)
+2: subq.l #8,%a0
+ printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ rts
+3: subq.w #2,%d2
+ jcs 5f | %d2 < 2, round to zero
+ jhi 4f | %d2 > 2, round to +infinity
+ tst.b (-3,%a0) | to -inf
+ jne 1b
+ jra 5f
+4: tst.b (-3,%a0) | to +inf
+ jeq 1b
+5: move.w #0x407e,(-2,%a0)
+ move.l #0xffffff00,(%a0)+
+ clr.l (%a0)
+ jra 2b
+ | Infinities or NaNs
+fp_nsf_huge:
+ subq.l #4,%a0
+ printf PNORM,"%p(",1,%a0
+ printx PNORM,%a0@
+ printf PNORM,")\n"
+ rts
+
+ | conv_ext2int (macro):
+ | Generates a subroutine that converts an extended value to an
+ | integer of a given size, again, with the appropriate type of
+ | rounding.
+
+ | Macro arguments:
+ | s: size, as given in an assembly instruction.
+ | b: number of bits in that size.
+
+ | Subroutine arguments:
+ | %a0: source (struct fp_ext *)
+
+ | Returns the integer in %d0 (like it should)
+
+.macro conv_ext2int s,b
+ .set inf,(1<<(\b-1))-1 | i.e. MAXINT
+ printf PCONV,"e2i%d: %p(",2,#\b,%a0
+ printx PCONV,%a0@
+ printf PCONV,") "
+ addq.l #2,%a0
+ move.w (%a0)+,%d2 | exponent
+ jeq fp_e2i_zero\b | zero / denorm (== 0, here)
+ cmp.w #0x7fff,%d2
+ jeq fp_e2i_huge\b | Inf / NaN
+ sub.w #0x3ffe,%d2
+ jcs fp_e2i_small\b
+ cmp.w #\b,%d2
+ jhi fp_e2i_large\b
+ move.l (%a0),%d0
+ move.l %d0,%d1
+ lsl.l %d2,%d1
+ jne fp_e2i_round\b
+ tst.l (4,%a0)
+ jne fp_e2i_round\b
+ neg.w %d2
+ add.w #32,%d2
+ lsr.l %d2,%d0
+9: tst.w (-4,%a0)
+ jne 1f
+ tst.\s %d0
+ jmi fp_e2i_large\b
+ printf PCONV,"-> %p\n",1,%d0
+ rts
+1: neg.\s %d0
+ jeq 1f
+ jpl fp_e2i_large\b
+1: printf PCONV,"-> %p\n",1,%d0
+ rts
+fp_e2i_round\b:
+ fp_set_sr FPSR_EXC_INEX2 | INEX2 bit
+ neg.w %d2
+ add.w #32,%d2
+ .if \b>16
+ jeq 5f
+ .endif
+ lsr.l %d2,%d0
+ move.w (FPD_RND,FPDATA),%d2 | rounding mode
+ jne 2f | %d2 == 0, round to nearest
+ tst.l %d1 | test guard bit
+ jpl 9b | zero is closer
+ btst %d2,%d0 | test lsb bit (%d2 still 0)
+ jne fp_e2i_doroundup\b
+ lsl.l #1,%d1 | check low bits
+ jne fp_e2i_doroundup\b
+ tst.l (4,%a0)
+ jeq 9b
+fp_e2i_doroundup\b:
+ addq.l #1,%d0
+ jra 9b
+ | check nondefault rounding modes
+2: subq.w #2,%d2
+ jcs 9b | %d2 < 2, round to zero
+ jhi 3f | %d2 > 2, round to +infinity
+ tst.w (-4,%a0) | to -inf
+ jne fp_e2i_doroundup\b | negative, round to infinity
+ jra 9b | positive, round to zero
+3: tst.w (-4,%a0) | to +inf
+ jeq fp_e2i_doroundup\b | positive, round to infinity
+ jra 9b | negative, round to zero
+ | we are only want -2**127 get correctly rounded here,
+ | since the guard bit is in the lower lword.
+ | everything else ends up anyway as overflow.
+ .if \b>16
+5: move.w (FPD_RND,FPDATA),%d2 | rounding mode
+ jne 2b | %d2 == 0, round to nearest
+ move.l (4,%a0),%d1 | test guard bit
+ jpl 9b | zero is closer
+ lsl.l #1,%d1 | check low bits
+ jne fp_e2i_doroundup\b
+ jra 9b
+ .endif
+fp_e2i_zero\b:
+ clr.l %d0
+ tst.l (%a0)+
+ jne 1f
+ tst.l (%a0)
+ jeq 3f
+1: subq.l #4,%a0
+ fp_clr_sr FPSR_EXC_UNFL | fp_normalize_ext has set this bit
+fp_e2i_small\b:
+ fp_set_sr FPSR_EXC_INEX2
+ clr.l %d0
+ move.w (FPD_RND,FPDATA),%d2 | rounding mode
+ subq.w #2,%d2
+ jcs 3f | %d2 < 2, round to nearest/zero
+ jhi 2f | %d2 > 2, round to +infinity
+ tst.w (-4,%a0) | to -inf
+ jeq 3f
+ subq.\s #1,%d0
+ jra 3f
+2: tst.w (-4,%a0) | to +inf
+ jne 3f
+ addq.\s #1,%d0
+3: printf PCONV,"-> %p\n",1,%d0
+ rts
+fp_e2i_large\b:
+ fp_set_sr FPSR_EXC_OPERR
+ move.\s #inf,%d0
+ tst.w (-4,%a0)
+ jeq 1f
+ addq.\s #1,%d0
+1: printf PCONV,"-> %p\n",1,%d0
+ rts
+fp_e2i_huge\b:
+ move.\s (%a0),%d0
+ tst.l (%a0)
+ jne 1f
+ tst.l (%a0)
+ jeq fp_e2i_large\b
+ | fp_normalize_ext has set this bit already
+ | and made the number nonsignaling
+1: fp_tst_sr FPSR_EXC_SNAN
+ jne 1f
+ fp_set_sr FPSR_EXC_OPERR
+1: printf PCONV,"-> %p\n",1,%d0
+ rts
+.endm
+
+fp_conv_ext2long:
+ conv_ext2int l,32
+
+fp_conv_ext2short:
+ conv_ext2int w,16
+
+fp_conv_ext2byte:
+ conv_ext2int b,8
+
+fp_conv_ext2double:
+ jsr fp_normalize_double
+ printf PCONV,"e2d: %p(",1,%a0
+ printx PCONV,%a0@
+ printf PCONV,"), "
+ move.l (%a0)+,%d2
+ cmp.w #0x7fff,%d2
+ jne 1f
+ move.w #0x7ff,%d2
+ move.l (%a0)+,%d0
+ jra 2f
+1: sub.w #0x3fff-0x3ff,%d2
+ move.l (%a0)+,%d0
+ jmi 2f
+ clr.w %d2
+2: lsl.w #5,%d2
+ lsl.l #7,%d2
+ lsl.l #8,%d2
+ move.l %d0,%d1
+ lsl.l #1,%d0
+ lsr.l #4,%d0
+ lsr.l #8,%d0
+ or.l %d2,%d0
+ putuser.l %d0,(%a1)+,fp_err_ua2,%a1
+ moveq #21,%d0
+ lsl.l %d0,%d1
+ move.l (%a0),%d0
+ lsr.l #4,%d0
+ lsr.l #7,%d0
+ or.l %d1,%d0
+ putuser.l %d0,(%a1),fp_err_ua2,%a1
+#ifdef FPU_EMU_DEBUG
+ getuser.l %a1@(-4),%d0,fp_err_ua2,%a1
+ getuser.l %a1@(0),%d1,fp_err_ua2,%a1
+ printf PCONV,"%p(%08x%08x)\n",3,%a1,%d0,%d1
+#endif
+ rts
+
+fp_conv_ext2single:
+ jsr fp_normalize_single
+ printf PCONV,"e2s: %p(",1,%a0
+ printx PCONV,%a0@
+ printf PCONV,"), "
+ move.l (%a0)+,%d1
+ cmp.w #0x7fff,%d1
+ jne 1f
+ move.w #0xff,%d1
+ move.l (%a0)+,%d0
+ jra 2f
+1: sub.w #0x3fff-0x7f,%d1
+ move.l (%a0)+,%d0
+ jmi 2f
+ clr.w %d1
+2: lsl.w #8,%d1
+ lsl.l #7,%d1
+ lsl.l #8,%d1
+ bclr #31,%d0
+ lsr.l #8,%d0
+ or.l %d1,%d0
+ printf PCONV,"%08x\n",1,%d0
+ rts
+
+ | special return addresses for instr that
+ | encode the rounding precision in the opcode
+ | (e.g. fsmove,fdmove)
+
+fp_finalrounding_single:
+ addq.l #8,%sp
+ jsr fp_normalize_ext
+ jsr fp_normalize_single
+ jra fp_finaltest
+
+fp_finalrounding_single_fast:
+ addq.l #8,%sp
+ jsr fp_normalize_ext
+ jsr fp_normalize_single_fast
+ jra fp_finaltest
+
+fp_finalrounding_double:
+ addq.l #8,%sp
+ jsr fp_normalize_ext
+ jsr fp_normalize_double
+ jra fp_finaltest
+
+ | fp_finaltest:
+ | set the emulated status register based on the outcome of an
+ | emulated instruction.
+
+fp_finalrounding:
+ addq.l #8,%sp
+| printf ,"f: %p\n",1,%a0
+ jsr fp_normalize_ext
+ move.w (FPD_PREC,FPDATA),%d0
+ subq.w #1,%d0
+ jcs fp_finaltest
+ jne 1f
+ jsr fp_normalize_single
+ jra 2f
+1: jsr fp_normalize_double
+2:| printf ,"f: %p\n",1,%a0
+fp_finaltest:
+ | First, we do some of the obvious tests for the exception
+ | status byte and condition code bytes of fp_sr here, so that
+ | they do not have to be handled individually by every
+ | emulated instruction.
+ clr.l %d0
+ addq.l #1,%a0
+ tst.b (%a0)+ | sign
+ jeq 1f
+ bset #FPSR_CC_NEG-24,%d0 | N bit
+1: cmp.w #0x7fff,(%a0)+ | exponent
+ jeq 2f
+ | test for zero
+ moveq #FPSR_CC_Z-24,%d1
+ tst.l (%a0)+
+ jne 9f
+ tst.l (%a0)
+ jne 9f
+ jra 8f
+ | infinitiv and NAN
+2: moveq #FPSR_CC_NAN-24,%d1
+ move.l (%a0)+,%d2
+ lsl.l #1,%d2 | ignore high bit
+ jne 8f
+ tst.l (%a0)
+ jne 8f
+ moveq #FPSR_CC_INF-24,%d1
+8: bset %d1,%d0
+9: move.b %d0,(FPD_FPSR+0,FPDATA) | set condition test result
+ | move instructions enter here
+ | Here, we test things in the exception status byte, and set
+ | other things in the accrued exception byte accordingly.
+ | Emulated instructions can set various things in the former,
+ | as defined in fp_emu.h.
+fp_final:
+ move.l (FPD_FPSR,FPDATA),%d0
+#if 0
+ btst #FPSR_EXC_SNAN,%d0 | EXC_SNAN
+ jne 1f
+ btst #FPSR_EXC_OPERR,%d0 | EXC_OPERR
+ jeq 2f
+1: bset #FPSR_AEXC_IOP,%d0 | set IOP bit
+2: btst #FPSR_EXC_OVFL,%d0 | EXC_OVFL
+ jeq 1f
+ bset #FPSR_AEXC_OVFL,%d0 | set OVFL bit
+1: btst #FPSR_EXC_UNFL,%d0 | EXC_UNFL
+ jeq 1f
+ btst #FPSR_EXC_INEX2,%d0 | EXC_INEX2
+ jeq 1f
+ bset #FPSR_AEXC_UNFL,%d0 | set UNFL bit
+1: btst #FPSR_EXC_DZ,%d0 | EXC_INEX1
+ jeq 1f
+ bset #FPSR_AEXC_DZ,%d0 | set DZ bit
+1: btst #FPSR_EXC_OVFL,%d0 | EXC_OVFL
+ jne 1f
+ btst #FPSR_EXC_INEX2,%d0 | EXC_INEX2
+ jne 1f
+ btst #FPSR_EXC_INEX1,%d0 | EXC_INEX1
+ jeq 2f
+1: bset #FPSR_AEXC_INEX,%d0 | set INEX bit
+2: move.l %d0,(FPD_FPSR,FPDATA)
+#else
+ | same as above, greatly optimized, but untested (yet)
+ move.l %d0,%d2
+ lsr.l #5,%d0
+ move.l %d0,%d1
+ lsr.l #4,%d1
+ or.l %d0,%d1
+ and.b #0x08,%d1
+ move.l %d2,%d0
+ lsr.l #6,%d0
+ or.l %d1,%d0
+ move.l %d2,%d1
+ lsr.l #4,%d1
+ or.b #0xdf,%d1
+ and.b %d1,%d0
+ move.l %d2,%d1
+ lsr.l #7,%d1
+ and.b #0x80,%d1
+ or.b %d1,%d0
+ and.b #0xf8,%d0
+ or.b %d0,%d2
+ move.l %d2,(FPD_FPSR,FPDATA)
+#endif
+ move.b (FPD_FPSR+2,FPDATA),%d0
+ and.b (FPD_FPCR+2,FPDATA),%d0
+ jeq 1f
+ printf ,"send signal!!!\n"
+1: jra fp_end
diff --git a/arch/m68k/math-emu/multi_arith.h b/arch/m68k/math-emu/multi_arith.h
new file mode 100644
index 000000000000..02251e5afd89
--- /dev/null
+++ b/arch/m68k/math-emu/multi_arith.h
@@ -0,0 +1,819 @@
+/* multi_arith.h: multi-precision integer arithmetic functions, needed
+ to do extended-precision floating point.
+
+ (c) 1998 David Huggins-Daines.
+
+ Somewhat based on arch/alpha/math-emu/ieee-math.c, which is (c)
+ David Mosberger-Tang.
+
+ You may copy, modify, and redistribute this file under the terms of
+ the GNU General Public License, version 2, or any later version, at
+ your convenience. */
+
+/* Note:
+
+ These are not general multi-precision math routines. Rather, they
+ implement the subset of integer arithmetic that we need in order to
+ multiply, divide, and normalize 128-bit unsigned mantissae. */
+
+#ifndef MULTI_ARITH_H
+#define MULTI_ARITH_H
+
+#if 0 /* old code... */
+
+/* Unsigned only, because we don't need signs to multiply and divide. */
+typedef unsigned int int128[4];
+
+/* Word order */
+enum {
+ MSW128,
+ NMSW128,
+ NLSW128,
+ LSW128
+};
+
+/* big-endian */
+#define LO_WORD(ll) (((unsigned int *) &ll)[1])
+#define HI_WORD(ll) (((unsigned int *) &ll)[0])
+
+/* Convenience functions to stuff various integer values into int128s */
+
+static inline void zero128(int128 a)
+{
+ a[LSW128] = a[NLSW128] = a[NMSW128] = a[MSW128] = 0;
+}
+
+/* Human-readable word order in the arguments */
+static inline void set128(unsigned int i3, unsigned int i2, unsigned int i1,
+ unsigned int i0, int128 a)
+{
+ a[LSW128] = i0;
+ a[NLSW128] = i1;
+ a[NMSW128] = i2;
+ a[MSW128] = i3;
+}
+
+/* Convenience functions (for testing as well) */
+static inline void int64_to_128(unsigned long long src, int128 dest)
+{
+ dest[LSW128] = (unsigned int) src;
+ dest[NLSW128] = src >> 32;
+ dest[NMSW128] = dest[MSW128] = 0;
+}
+
+static inline void int128_to_64(const int128 src, unsigned long long *dest)
+{
+ *dest = src[LSW128] | (long long) src[NLSW128] << 32;
+}
+
+static inline void put_i128(const int128 a)
+{
+ printk("%08x %08x %08x %08x\n", a[MSW128], a[NMSW128],
+ a[NLSW128], a[LSW128]);
+}
+
+/* Internal shifters:
+
+ Note that these are only good for 0 < count < 32.
+ */
+
+static inline void _lsl128(unsigned int count, int128 a)
+{
+ a[MSW128] = (a[MSW128] << count) | (a[NMSW128] >> (32 - count));
+ a[NMSW128] = (a[NMSW128] << count) | (a[NLSW128] >> (32 - count));
+ a[NLSW128] = (a[NLSW128] << count) | (a[LSW128] >> (32 - count));
+ a[LSW128] <<= count;
+}
+
+static inline void _lsr128(unsigned int count, int128 a)
+{
+ a[LSW128] = (a[LSW128] >> count) | (a[NLSW128] << (32 - count));
+ a[NLSW128] = (a[NLSW128] >> count) | (a[NMSW128] << (32 - count));
+ a[NMSW128] = (a[NMSW128] >> count) | (a[MSW128] << (32 - count));
+ a[MSW128] >>= count;
+}
+
+/* Should be faster, one would hope */
+
+static inline void lslone128(int128 a)
+{
+ asm volatile ("lsl.l #1,%0\n"
+ "roxl.l #1,%1\n"
+ "roxl.l #1,%2\n"
+ "roxl.l #1,%3\n"
+ :
+ "=d" (a[LSW128]),
+ "=d"(a[NLSW128]),
+ "=d"(a[NMSW128]),
+ "=d"(a[MSW128])
+ :
+ "0"(a[LSW128]),
+ "1"(a[NLSW128]),
+ "2"(a[NMSW128]),
+ "3"(a[MSW128]));
+}
+
+static inline void lsrone128(int128 a)
+{
+ asm volatile ("lsr.l #1,%0\n"
+ "roxr.l #1,%1\n"
+ "roxr.l #1,%2\n"
+ "roxr.l #1,%3\n"
+ :
+ "=d" (a[MSW128]),
+ "=d"(a[NMSW128]),
+ "=d"(a[NLSW128]),
+ "=d"(a[LSW128])
+ :
+ "0"(a[MSW128]),
+ "1"(a[NMSW128]),
+ "2"(a[NLSW128]),
+ "3"(a[LSW128]));
+}
+
+/* Generalized 128-bit shifters:
+
+ These bit-shift to a multiple of 32, then move whole longwords. */
+
+static inline void lsl128(unsigned int count, int128 a)
+{
+ int wordcount, i;
+
+ if (count % 32)
+ _lsl128(count % 32, a);
+
+ if (0 == (wordcount = count / 32))
+ return;
+
+ /* argh, gak, endian-sensitive */
+ for (i = 0; i < 4 - wordcount; i++) {
+ a[i] = a[i + wordcount];
+ }
+ for (i = 3; i >= 4 - wordcount; --i) {
+ a[i] = 0;
+ }
+}
+
+static inline void lsr128(unsigned int count, int128 a)
+{
+ int wordcount, i;
+
+ if (count % 32)
+ _lsr128(count % 32, a);
+
+ if (0 == (wordcount = count / 32))
+ return;
+
+ for (i = 3; i >= wordcount; --i) {
+ a[i] = a[i - wordcount];
+ }
+ for (i = 0; i < wordcount; i++) {
+ a[i] = 0;
+ }
+}
+
+static inline int orl128(int a, int128 b)
+{
+ b[LSW128] |= a;
+}
+
+static inline int btsthi128(const int128 a)
+{
+ return a[MSW128] & 0x80000000;
+}
+
+/* test bits (numbered from 0 = LSB) up to and including "top" */
+static inline int bftestlo128(int top, const int128 a)
+{
+ int r = 0;
+
+ if (top > 31)
+ r |= a[LSW128];
+ if (top > 63)
+ r |= a[NLSW128];
+ if (top > 95)
+ r |= a[NMSW128];
+
+ r |= a[3 - (top / 32)] & ((1 << (top % 32 + 1)) - 1);
+
+ return (r != 0);
+}
+
+/* Aargh. We need these because GCC is broken */
+/* FIXME: do them in assembly, for goodness' sake! */
+static inline void mask64(int pos, unsigned long long *mask)
+{
+ *mask = 0;
+
+ if (pos < 32) {
+ LO_WORD(*mask) = (1 << pos) - 1;
+ return;
+ }
+ LO_WORD(*mask) = -1;
+ HI_WORD(*mask) = (1 << (pos - 32)) - 1;
+}
+
+static inline void bset64(int pos, unsigned long long *dest)
+{
+ /* This conditional will be optimized away. Thanks, GCC! */
+ if (pos < 32)
+ asm volatile ("bset %1,%0":"=m"
+ (LO_WORD(*dest)):"id"(pos));
+ else
+ asm volatile ("bset %1,%0":"=m"
+ (HI_WORD(*dest)):"id"(pos - 32));
+}
+
+static inline int btst64(int pos, unsigned long long dest)
+{
+ if (pos < 32)
+ return (0 != (LO_WORD(dest) & (1 << pos)));
+ else
+ return (0 != (HI_WORD(dest) & (1 << (pos - 32))));
+}
+
+static inline void lsl64(int count, unsigned long long *dest)
+{
+ if (count < 32) {
+ HI_WORD(*dest) = (HI_WORD(*dest) << count)
+ | (LO_WORD(*dest) >> count);
+ LO_WORD(*dest) <<= count;
+ return;
+ }
+ count -= 32;
+ HI_WORD(*dest) = LO_WORD(*dest) << count;
+ LO_WORD(*dest) = 0;
+}
+
+static inline void lsr64(int count, unsigned long long *dest)
+{
+ if (count < 32) {
+ LO_WORD(*dest) = (LO_WORD(*dest) >> count)
+ | (HI_WORD(*dest) << (32 - count));
+ HI_WORD(*dest) >>= count;
+ return;
+ }
+ count -= 32;
+ LO_WORD(*dest) = HI_WORD(*dest) >> count;
+ HI_WORD(*dest) = 0;
+}
+#endif
+
+static inline void fp_denormalize(struct fp_ext *reg, unsigned int cnt)
+{
+ reg->exp += cnt;
+
+ switch (cnt) {
+ case 0 ... 8:
+ reg->lowmant = reg->mant.m32[1] << (8 - cnt);
+ reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) |
+ (reg->mant.m32[0] << (32 - cnt));
+ reg->mant.m32[0] = reg->mant.m32[0] >> cnt;
+ break;
+ case 9 ... 32:
+ reg->lowmant = reg->mant.m32[1] >> (cnt - 8);
+ if (reg->mant.m32[1] << (40 - cnt))
+ reg->lowmant |= 1;
+ reg->mant.m32[1] = (reg->mant.m32[1] >> cnt) |
+ (reg->mant.m32[0] << (32 - cnt));
+ reg->mant.m32[0] = reg->mant.m32[0] >> cnt;
+ break;
+ case 33 ... 39:
+ asm volatile ("bfextu %1{%2,#8},%0" : "=d" (reg->lowmant)
+ : "m" (reg->mant.m32[0]), "d" (64 - cnt));
+ if (reg->mant.m32[1] << (40 - cnt))
+ reg->lowmant |= 1;
+ reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32);
+ reg->mant.m32[0] = 0;
+ break;
+ case 40 ... 71:
+ reg->lowmant = reg->mant.m32[0] >> (cnt - 40);
+ if ((reg->mant.m32[0] << (72 - cnt)) || reg->mant.m32[1])
+ reg->lowmant |= 1;
+ reg->mant.m32[1] = reg->mant.m32[0] >> (cnt - 32);
+ reg->mant.m32[0] = 0;
+ break;
+ default:
+ reg->lowmant = reg->mant.m32[0] || reg->mant.m32[1];
+ reg->mant.m32[0] = 0;
+ reg->mant.m32[1] = 0;
+ break;
+ }
+}
+
+static inline int fp_overnormalize(struct fp_ext *reg)
+{
+ int shift;
+
+ if (reg->mant.m32[0]) {
+ asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[0]));
+ reg->mant.m32[0] = (reg->mant.m32[0] << shift) | (reg->mant.m32[1] >> (32 - shift));
+ reg->mant.m32[1] = (reg->mant.m32[1] << shift);
+ } else {
+ asm ("bfffo %1{#0,#32},%0" : "=d" (shift) : "dm" (reg->mant.m32[1]));
+ reg->mant.m32[0] = (reg->mant.m32[1] << shift);
+ reg->mant.m32[1] = 0;
+ shift += 32;
+ }
+
+ return shift;
+}
+
+static inline int fp_addmant(struct fp_ext *dest, struct fp_ext *src)
+{
+ int carry;
+
+ /* we assume here, gcc only insert move and a clr instr */
+ asm volatile ("add.b %1,%0" : "=d,g" (dest->lowmant)
+ : "g,d" (src->lowmant), "0,0" (dest->lowmant));
+ asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[1])
+ : "d" (src->mant.m32[1]), "0" (dest->mant.m32[1]));
+ asm volatile ("addx.l %1,%0" : "=d" (dest->mant.m32[0])
+ : "d" (src->mant.m32[0]), "0" (dest->mant.m32[0]));
+ asm volatile ("addx.l %0,%0" : "=d" (carry) : "0" (0));
+
+ return carry;
+}
+
+static inline int fp_addcarry(struct fp_ext *reg)
+{
+ if (++reg->exp == 0x7fff) {
+ if (reg->mant.m64)
+ fp_set_sr(FPSR_EXC_INEX2);
+ reg->mant.m64 = 0;
+ fp_set_sr(FPSR_EXC_OVFL);
+ return 0;
+ }
+ reg->lowmant = (reg->mant.m32[1] << 7) | (reg->lowmant ? 1 : 0);
+ reg->mant.m32[1] = (reg->mant.m32[1] >> 1) |
+ (reg->mant.m32[0] << 31);
+ reg->mant.m32[0] = (reg->mant.m32[0] >> 1) | 0x80000000;
+
+ return 1;
+}
+
+static inline void fp_submant(struct fp_ext *dest, struct fp_ext *src1,
+ struct fp_ext *src2)
+{
+ /* we assume here, gcc only insert move and a clr instr */
+ asm volatile ("sub.b %1,%0" : "=d,g" (dest->lowmant)
+ : "g,d" (src2->lowmant), "0,0" (src1->lowmant));
+ asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[1])
+ : "d" (src2->mant.m32[1]), "0" (src1->mant.m32[1]));
+ asm volatile ("subx.l %1,%0" : "=d" (dest->mant.m32[0])
+ : "d" (src2->mant.m32[0]), "0" (src1->mant.m32[0]));
+}
+
+#define fp_mul64(desth, destl, src1, src2) ({ \
+ asm ("mulu.l %2,%1:%0" : "=d" (destl), "=d" (desth) \
+ : "g" (src1), "0" (src2)); \
+})
+#define fp_div64(quot, rem, srch, srcl, div) \
+ asm ("divu.l %2,%1:%0" : "=d" (quot), "=d" (rem) \
+ : "dm" (div), "1" (srch), "0" (srcl))
+#define fp_add64(dest1, dest2, src1, src2) ({ \
+ asm ("add.l %1,%0" : "=d,dm" (dest2) \
+ : "dm,d" (src2), "0,0" (dest2)); \
+ asm ("addx.l %1,%0" : "=d" (dest1) \
+ : "d" (src1), "0" (dest1)); \
+})
+#define fp_addx96(dest, src) ({ \
+ /* we assume here, gcc only insert move and a clr instr */ \
+ asm volatile ("add.l %1,%0" : "=d,g" (dest->m32[2]) \
+ : "g,d" (temp.m32[1]), "0,0" (dest->m32[2])); \
+ asm volatile ("addx.l %1,%0" : "=d" (dest->m32[1]) \
+ : "d" (temp.m32[0]), "0" (dest->m32[1])); \
+ asm volatile ("addx.l %1,%0" : "=d" (dest->m32[0]) \
+ : "d" (0), "0" (dest->m32[0])); \
+})
+#define fp_sub64(dest, src) ({ \
+ asm ("sub.l %1,%0" : "=d,dm" (dest.m32[1]) \
+ : "dm,d" (src.m32[1]), "0,0" (dest.m32[1])); \
+ asm ("subx.l %1,%0" : "=d" (dest.m32[0]) \
+ : "d" (src.m32[0]), "0" (dest.m32[0])); \
+})
+#define fp_sub96c(dest, srch, srcm, srcl) ({ \
+ char carry; \
+ asm ("sub.l %1,%0" : "=d,dm" (dest.m32[2]) \
+ : "dm,d" (srcl), "0,0" (dest.m32[2])); \
+ asm ("subx.l %1,%0" : "=d" (dest.m32[1]) \
+ : "d" (srcm), "0" (dest.m32[1])); \
+ asm ("subx.l %2,%1; scs %0" : "=d" (carry), "=d" (dest.m32[0]) \
+ : "d" (srch), "1" (dest.m32[0])); \
+ carry; \
+})
+
+static inline void fp_multiplymant(union fp_mant128 *dest, struct fp_ext *src1,
+ struct fp_ext *src2)
+{
+ union fp_mant64 temp;
+
+ fp_mul64(dest->m32[0], dest->m32[1], src1->mant.m32[0], src2->mant.m32[0]);
+ fp_mul64(dest->m32[2], dest->m32[3], src1->mant.m32[1], src2->mant.m32[1]);
+
+ fp_mul64(temp.m32[0], temp.m32[1], src1->mant.m32[0], src2->mant.m32[1]);
+ fp_addx96(dest, temp);
+
+ fp_mul64(temp.m32[0], temp.m32[1], src1->mant.m32[1], src2->mant.m32[0]);
+ fp_addx96(dest, temp);
+}
+
+static inline void fp_dividemant(union fp_mant128 *dest, struct fp_ext *src,
+ struct fp_ext *div)
+{
+ union fp_mant128 tmp;
+ union fp_mant64 tmp64;
+ unsigned long *mantp = dest->m32;
+ unsigned long fix, rem, first, dummy;
+ int i;
+
+ /* the algorithm below requires dest to be smaller than div,
+ but both have the high bit set */
+ if (src->mant.m64 >= div->mant.m64) {
+ fp_sub64(src->mant, div->mant);
+ *mantp = 1;
+ } else
+ *mantp = 0;
+ mantp++;
+
+ /* basic idea behind this algorithm: we can't divide two 64bit numbers
+ (AB/CD) directly, but we can calculate AB/C0, but this means this
+ quotient is off by C0/CD, so we have to multiply the first result
+ to fix the result, after that we have nearly the correct result
+ and only a few corrections are needed. */
+
+ /* C0/CD can be precalculated, but it's an 64bit division again, but
+ we can make it a bit easier, by dividing first through C so we get
+ 10/1D and now only a single shift and the value fits into 32bit. */
+ fix = 0x80000000;
+ dummy = div->mant.m32[1] / div->mant.m32[0] + 1;
+ dummy = (dummy >> 1) | fix;
+ fp_div64(fix, dummy, fix, 0, dummy);
+ fix--;
+
+ for (i = 0; i < 3; i++, mantp++) {
+ if (src->mant.m32[0] == div->mant.m32[0]) {
+ fp_div64(first, rem, 0, src->mant.m32[1], div->mant.m32[0]);
+
+ fp_mul64(*mantp, dummy, first, fix);
+ *mantp += fix;
+ } else {
+ fp_div64(first, rem, src->mant.m32[0], src->mant.m32[1], div->mant.m32[0]);
+
+ fp_mul64(*mantp, dummy, first, fix);
+ }
+
+ fp_mul64(tmp.m32[0], tmp.m32[1], div->mant.m32[0], first - *mantp);
+ fp_add64(tmp.m32[0], tmp.m32[1], 0, rem);
+ tmp.m32[2] = 0;
+
+ fp_mul64(tmp64.m32[0], tmp64.m32[1], *mantp, div->mant.m32[1]);
+ fp_sub96c(tmp, 0, tmp64.m32[0], tmp64.m32[1]);
+
+ src->mant.m32[0] = tmp.m32[1];
+ src->mant.m32[1] = tmp.m32[2];
+
+ while (!fp_sub96c(tmp, 0, div->mant.m32[0], div->mant.m32[1])) {
+ src->mant.m32[0] = tmp.m32[1];
+ src->mant.m32[1] = tmp.m32[2];
+ *mantp += 1;
+ }
+ }
+}
+
+#if 0
+static inline unsigned int fp_fls128(union fp_mant128 *src)
+{
+ unsigned long data;
+ unsigned int res, off;
+
+ if ((data = src->m32[0]))
+ off = 0;
+ else if ((data = src->m32[1]))
+ off = 32;
+ else if ((data = src->m32[2]))
+ off = 64;
+ else if ((data = src->m32[3]))
+ off = 96;
+ else
+ return 128;
+
+ asm ("bfffo %1{#0,#32},%0" : "=d" (res) : "dm" (data));
+ return res + off;
+}
+
+static inline void fp_shiftmant128(union fp_mant128 *src, int shift)
+{
+ unsigned long sticky;
+
+ switch (shift) {
+ case 0:
+ return;
+ case 1:
+ asm volatile ("lsl.l #1,%0"
+ : "=d" (src->m32[3]) : "0" (src->m32[3]));
+ asm volatile ("roxl.l #1,%0"
+ : "=d" (src->m32[2]) : "0" (src->m32[2]));
+ asm volatile ("roxl.l #1,%0"
+ : "=d" (src->m32[1]) : "0" (src->m32[1]));
+ asm volatile ("roxl.l #1,%0"
+ : "=d" (src->m32[0]) : "0" (src->m32[0]));
+ return;
+ case 2 ... 31:
+ src->m32[0] = (src->m32[0] << shift) | (src->m32[1] >> (32 - shift));
+ src->m32[1] = (src->m32[1] << shift) | (src->m32[2] >> (32 - shift));
+ src->m32[2] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift));
+ src->m32[3] = (src->m32[3] << shift);
+ return;
+ case 32 ... 63:
+ shift -= 32;
+ src->m32[0] = (src->m32[1] << shift) | (src->m32[2] >> (32 - shift));
+ src->m32[1] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift));
+ src->m32[2] = (src->m32[3] << shift);
+ src->m32[3] = 0;
+ return;
+ case 64 ... 95:
+ shift -= 64;
+ src->m32[0] = (src->m32[2] << shift) | (src->m32[3] >> (32 - shift));
+ src->m32[1] = (src->m32[3] << shift);
+ src->m32[2] = src->m32[3] = 0;
+ return;
+ case 96 ... 127:
+ shift -= 96;
+ src->m32[0] = (src->m32[3] << shift);
+ src->m32[1] = src->m32[2] = src->m32[3] = 0;
+ return;
+ case -31 ... -1:
+ shift = -shift;
+ sticky = 0;
+ if (src->m32[3] << (32 - shift))
+ sticky = 1;
+ src->m32[3] = (src->m32[3] >> shift) | (src->m32[2] << (32 - shift)) | sticky;
+ src->m32[2] = (src->m32[2] >> shift) | (src->m32[1] << (32 - shift));
+ src->m32[1] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift));
+ src->m32[0] = (src->m32[0] >> shift);
+ return;
+ case -63 ... -32:
+ shift = -shift - 32;
+ sticky = 0;
+ if ((src->m32[2] << (32 - shift)) || src->m32[3])
+ sticky = 1;
+ src->m32[3] = (src->m32[2] >> shift) | (src->m32[1] << (32 - shift)) | sticky;
+ src->m32[2] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift));
+ src->m32[1] = (src->m32[0] >> shift);
+ src->m32[0] = 0;
+ return;
+ case -95 ... -64:
+ shift = -shift - 64;
+ sticky = 0;
+ if ((src->m32[1] << (32 - shift)) || src->m32[2] || src->m32[3])
+ sticky = 1;
+ src->m32[3] = (src->m32[1] >> shift) | (src->m32[0] << (32 - shift)) | sticky;
+ src->m32[2] = (src->m32[0] >> shift);
+ src->m32[1] = src->m32[0] = 0;
+ return;
+ case -127 ... -96:
+ shift = -shift - 96;
+ sticky = 0;
+ if ((src->m32[0] << (32 - shift)) || src->m32[1] || src->m32[2] || src->m32[3])
+ sticky = 1;
+ src->m32[3] = (src->m32[0] >> shift) | sticky;
+ src->m32[2] = src->m32[1] = src->m32[0] = 0;
+ return;
+ }
+
+ if (shift < 0 && (src->m32[0] || src->m32[1] || src->m32[2] || src->m32[3]))
+ src->m32[3] = 1;
+ else
+ src->m32[3] = 0;
+ src->m32[2] = 0;
+ src->m32[1] = 0;
+ src->m32[0] = 0;
+}
+#endif
+
+static inline void fp_putmant128(struct fp_ext *dest, union fp_mant128 *src,
+ int shift)
+{
+ unsigned long tmp;
+
+ switch (shift) {
+ case 0:
+ dest->mant.m64 = src->m64[0];
+ dest->lowmant = src->m32[2] >> 24;
+ if (src->m32[3] || (src->m32[2] << 8))
+ dest->lowmant |= 1;
+ break;
+ case 1:
+ asm volatile ("lsl.l #1,%0"
+ : "=d" (tmp) : "0" (src->m32[2]));
+ asm volatile ("roxl.l #1,%0"
+ : "=d" (dest->mant.m32[1]) : "0" (src->m32[1]));
+ asm volatile ("roxl.l #1,%0"
+ : "=d" (dest->mant.m32[0]) : "0" (src->m32[0]));
+ dest->lowmant = tmp >> 24;
+ if (src->m32[3] || (tmp << 8))
+ dest->lowmant |= 1;
+ break;
+ case 31:
+ asm volatile ("lsr.l #1,%1; roxr.l #1,%0"
+ : "=d" (dest->mant.m32[0])
+ : "d" (src->m32[0]), "0" (src->m32[1]));
+ asm volatile ("roxr.l #1,%0"
+ : "=d" (dest->mant.m32[1]) : "0" (src->m32[2]));
+ asm volatile ("roxr.l #1,%0"
+ : "=d" (tmp) : "0" (src->m32[3]));
+ dest->lowmant = tmp >> 24;
+ if (src->m32[3] << 7)
+ dest->lowmant |= 1;
+ break;
+ case 32:
+ dest->mant.m32[0] = src->m32[1];
+ dest->mant.m32[1] = src->m32[2];
+ dest->lowmant = src->m32[3] >> 24;
+ if (src->m32[3] << 8)
+ dest->lowmant |= 1;
+ break;
+ }
+}
+
+#if 0 /* old code... */
+static inline int fls(unsigned int a)
+{
+ int r;
+
+ asm volatile ("bfffo %1{#0,#32},%0"
+ : "=d" (r) : "md" (a));
+ return r;
+}
+
+/* fls = "find last set" (cf. ffs(3)) */
+static inline int fls128(const int128 a)
+{
+ if (a[MSW128])
+ return fls(a[MSW128]);
+ if (a[NMSW128])
+ return fls(a[NMSW128]) + 32;
+ /* XXX: it probably never gets beyond this point in actual
+ use, but that's indicative of a more general problem in the
+ algorithm (i.e. as per the actual 68881 implementation, we
+ really only need at most 67 bits of precision [plus
+ overflow]) so I'm not going to fix it. */
+ if (a[NLSW128])
+ return fls(a[NLSW128]) + 64;
+ if (a[LSW128])
+ return fls(a[LSW128]) + 96;
+ else
+ return -1;
+}
+
+static inline int zerop128(const int128 a)
+{
+ return !(a[LSW128] | a[NLSW128] | a[NMSW128] | a[MSW128]);
+}
+
+static inline int nonzerop128(const int128 a)
+{
+ return (a[LSW128] | a[NLSW128] | a[NMSW128] | a[MSW128]);
+}
+
+/* Addition and subtraction */
+/* Do these in "pure" assembly, because "extended" asm is unmanageable
+ here */
+static inline void add128(const int128 a, int128 b)
+{
+ /* rotating carry flags */
+ unsigned int carry[2];
+
+ carry[0] = a[LSW128] > (0xffffffff - b[LSW128]);
+ b[LSW128] += a[LSW128];
+
+ carry[1] = a[NLSW128] > (0xffffffff - b[NLSW128] - carry[0]);
+ b[NLSW128] = a[NLSW128] + b[NLSW128] + carry[0];
+
+ carry[0] = a[NMSW128] > (0xffffffff - b[NMSW128] - carry[1]);
+ b[NMSW128] = a[NMSW128] + b[NMSW128] + carry[1];
+
+ b[MSW128] = a[MSW128] + b[MSW128] + carry[0];
+}
+
+/* Note: assembler semantics: "b -= a" */
+static inline void sub128(const int128 a, int128 b)
+{
+ /* rotating borrow flags */
+ unsigned int borrow[2];
+
+ borrow[0] = b[LSW128] < a[LSW128];
+ b[LSW128] -= a[LSW128];
+
+ borrow[1] = b[NLSW128] < a[NLSW128] + borrow[0];
+ b[NLSW128] = b[NLSW128] - a[NLSW128] - borrow[0];
+
+ borrow[0] = b[NMSW128] < a[NMSW128] + borrow[1];
+ b[NMSW128] = b[NMSW128] - a[NMSW128] - borrow[1];
+
+ b[MSW128] = b[MSW128] - a[MSW128] - borrow[0];
+}
+
+/* Poor man's 64-bit expanding multiply */
+static inline void mul64(unsigned long long a, unsigned long long b, int128 c)
+{
+ unsigned long long acc;
+ int128 acc128;
+
+ zero128(acc128);
+ zero128(c);
+
+ /* first the low words */
+ if (LO_WORD(a) && LO_WORD(b)) {
+ acc = (long long) LO_WORD(a) * LO_WORD(b);
+ c[NLSW128] = HI_WORD(acc);
+ c[LSW128] = LO_WORD(acc);
+ }
+ /* Next the high words */
+ if (HI_WORD(a) && HI_WORD(b)) {
+ acc = (long long) HI_WORD(a) * HI_WORD(b);
+ c[MSW128] = HI_WORD(acc);
+ c[NMSW128] = LO_WORD(acc);
+ }
+ /* The middle words */
+ if (LO_WORD(a) && HI_WORD(b)) {
+ acc = (long long) LO_WORD(a) * HI_WORD(b);
+ acc128[NMSW128] = HI_WORD(acc);
+ acc128[NLSW128] = LO_WORD(acc);
+ add128(acc128, c);
+ }
+ /* The first and last words */
+ if (HI_WORD(a) && LO_WORD(b)) {
+ acc = (long long) HI_WORD(a) * LO_WORD(b);
+ acc128[NMSW128] = HI_WORD(acc);
+ acc128[NLSW128] = LO_WORD(acc);
+ add128(acc128, c);
+ }
+}
+
+/* Note: unsigned */
+static inline int cmp128(int128 a, int128 b)
+{
+ if (a[MSW128] < b[MSW128])
+ return -1;
+ if (a[MSW128] > b[MSW128])
+ return 1;
+ if (a[NMSW128] < b[NMSW128])
+ return -1;
+ if (a[NMSW128] > b[NMSW128])
+ return 1;
+ if (a[NLSW128] < b[NLSW128])
+ return -1;
+ if (a[NLSW128] > b[NLSW128])
+ return 1;
+
+ return (signed) a[LSW128] - b[LSW128];
+}
+
+inline void div128(int128 a, int128 b, int128 c)
+{
+ int128 mask;
+
+ /* Algorithm:
+
+ Shift the divisor until it's at least as big as the
+ dividend, keeping track of the position to which we've
+ shifted it, i.e. the power of 2 which we've multiplied it
+ by.
+
+ Then, for this power of 2 (the mask), and every one smaller
+ than it, subtract the mask from the dividend and add it to
+ the quotient until the dividend is smaller than the raised
+ divisor. At this point, divide the dividend and the mask
+ by 2 (i.e. shift one place to the right). Lather, rinse,
+ and repeat, until there are no more powers of 2 left. */
+
+ /* FIXME: needless to say, there's room for improvement here too. */
+
+ /* Shift up */
+ /* XXX: since it just has to be "at least as big", we can
+ probably eliminate this horribly wasteful loop. I will
+ have to prove this first, though */
+ set128(0, 0, 0, 1, mask);
+ while (cmp128(b, a) < 0 && !btsthi128(b)) {
+ lslone128(b);
+ lslone128(mask);
+ }
+
+ /* Shift down */
+ zero128(c);
+ do {
+ if (cmp128(a, b) >= 0) {
+ sub128(b, a);
+ add128(mask, c);
+ }
+ lsrone128(mask);
+ lsrone128(b);
+ } while (nonzerop128(mask));
+
+ /* The remainder is in a... */
+}
+#endif
+
+#endif /* MULTI_ARITH_H */