From 77f8dd5add8f02253dcea1454c9d2c76d7c788a7 Mon Sep 17 00:00:00 2001 From: bellard Date: Wed, 5 Mar 2003 22:24:48 +0000 Subject: float fixes - added bsr/bsf support git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@23 c046a42c-6fe2-441c-8c8c-71466251a162 --- Makefile | 1 + TODO | 2 +- cpu-i386.h | 4 +- op-i386.c | 185 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- ops_template.h | 36 +++++++++++ translate-i386.c | 83 ++++++++++++++++++++----- 6 files changed, 285 insertions(+), 26 deletions(-) diff --git a/Makefile b/Makefile index a2498001a1..c0a2915bed 100644 --- a/Makefile +++ b/Makefile @@ -83,6 +83,7 @@ cpu-i386.h gemu.h op-i386.c syscall-i386.h translate-i386.c\ dis-asm.h gen-i386.h op-i386.h syscall.c\ dis-buf.c i386-dis.c opreg_template.h syscall_defs.h\ i386.ld ppc.ld\ +tests/Makefile\ tests/test-i386.c tests/test-i386-shift.h tests/test-i386.h\ tests/test-i386-muldiv.h\ tests/test2.c tests/hello.c tests/sha1.c tests/test1.c diff --git a/TODO b/TODO index 24eb3863f6..a12c384678 100644 --- a/TODO +++ b/TODO @@ -3,4 +3,4 @@ - threads - fix printf for doubles (fp87.c bug ?) - make it self runnable (use same trick as ld.so : include its own relocator and libc) -- better FPU comparisons (ucom/com) +- fix FPU exceptions (in particular: gen_op_fpush not before mem load) diff --git a/cpu-i386.h b/cpu-i386.h index c245466630..ae49fffd04 100644 --- a/cpu-i386.h +++ b/cpu-i386.h @@ -114,7 +114,7 @@ enum { }; #ifdef __i386__ -#define USE_X86LDOUBLE +//#define USE_X86LDOUBLE #endif #ifdef USE_X86LDOUBLE @@ -201,7 +201,7 @@ static inline void stl(void *ptr, int v) *(uint32_t *)ptr = v; } -static inline void stq(void *ptr, int v) +static inline void stq(void *ptr, uint64_t v) { *(uint64_t *)ptr = v; } diff --git a/op-i386.c b/op-i386.c index f7f1a9849e..88bdb0a41f 100644 --- a/op-i386.c +++ b/op-i386.c @@ -1055,7 +1055,7 @@ typedef union { #else -typedef { +typedef union { double d; #ifndef WORDS_BIGENDIAN struct { @@ -1119,6 +1119,31 @@ void OPPROTO op_fldl_ST0_A0(void) ST0 = ldfq((void *)A0); } +#ifdef USE_X86LDOUBLE +void OPPROTO op_fldt_ST0_A0(void) +{ + ST0 = *(long double *)A0; +} +#else +void helper_fldt_ST0_A0(void) +{ + CPU86_LDoubleU temp; + int upper, e; + /* mantissa */ + upper = lduw((uint8_t *)A0 + 8); + /* XXX: handle overflow ? */ + e = (upper & 0x7fff) - 16383 + EXPBIAS; /* exponent */ + e |= (upper >> 4) & 0x800; /* sign */ + temp.ll = ((ldq((void *)A0) >> 11) & ((1LL << 52) - 1)) | ((uint64_t)e << 52); + ST0 = temp.d; +} + +void OPPROTO op_fldt_ST0_A0(void) +{ + helper_fldt_ST0_A0(); +} +#endif + void OPPROTO op_fild_ST0_A0(void) { ST0 = (CPU86_LDouble)ldsw((void *)A0); @@ -1143,9 +1168,34 @@ void OPPROTO op_fsts_ST0_A0(void) void OPPROTO op_fstl_ST0_A0(void) { - ST0 = ldfq((void *)A0); + stfq((void *)A0, (double)ST0); } +#ifdef USE_X86LDOUBLE +void OPPROTO op_fstt_ST0_A0(void) +{ + *(long double *)A0 = ST0; +} +#else +void helper_fstt_ST0_A0(void) +{ + CPU86_LDoubleU temp; + int e; + temp.d = ST0; + /* mantissa */ + stq((void *)A0, (MANTD(temp) << 11) | (1LL << 63)); + /* exponent + sign */ + e = EXPD(temp) - EXPBIAS + 16383; + e |= SIGND(temp) >> 16; + stw((uint8_t *)A0 + 8, e); +} + +void OPPROTO op_fstt_ST0_A0(void) +{ + helper_fstt_ST0_A0(); +} +#endif + void OPPROTO op_fist_ST0_A0(void) { int val; @@ -1167,6 +1217,103 @@ void OPPROTO op_fistll_ST0_A0(void) stq((void *)A0, val); } +/* BCD ops */ + +#define MUL10(iv) ( iv + iv + (iv << 3) ) + +void helper_fbld_ST0_A0(void) +{ + uint8_t *seg; + CPU86_LDouble fpsrcop; + int m32i; + unsigned int v; + + /* in this code, seg/m32i will be used as temporary ptr/int */ + seg = (uint8_t *)A0 + 8; + v = ldub(seg--); + /* XXX: raise exception */ + if (v != 0) + return; + v = ldub(seg--); + /* XXX: raise exception */ + if ((v & 0xf0) != 0) + return; + m32i = v; /* <-- d14 */ + v = ldub(seg--); + m32i = MUL10(m32i) + (v >> 4); /* <-- val * 10 + d13 */ + m32i = MUL10(m32i) + (v & 0xf); /* <-- val * 10 + d12 */ + v = ldub(seg--); + m32i = MUL10(m32i) + (v >> 4); /* <-- val * 10 + d11 */ + m32i = MUL10(m32i) + (v & 0xf); /* <-- val * 10 + d10 */ + v = ldub(seg--); + m32i = MUL10(m32i) + (v >> 4); /* <-- val * 10 + d9 */ + m32i = MUL10(m32i) + (v & 0xf); /* <-- val * 10 + d8 */ + fpsrcop = ((CPU86_LDouble)m32i) * 100000000.0; + + v = ldub(seg--); + m32i = (v >> 4); /* <-- d7 */ + m32i = MUL10(m32i) + (v & 0xf); /* <-- val * 10 + d6 */ + v = ldub(seg--); + m32i = MUL10(m32i) + (v >> 4); /* <-- val * 10 + d5 */ + m32i = MUL10(m32i) + (v & 0xf); /* <-- val * 10 + d4 */ + v = ldub(seg--); + m32i = MUL10(m32i) + (v >> 4); /* <-- val * 10 + d3 */ + m32i = MUL10(m32i) + (v & 0xf); /* <-- val * 10 + d2 */ + v = ldub(seg); + m32i = MUL10(m32i) + (v >> 4); /* <-- val * 10 + d1 */ + m32i = MUL10(m32i) + (v & 0xf); /* <-- val * 10 + d0 */ + fpsrcop += ((CPU86_LDouble)m32i); + if ( ldub(seg+9) & 0x80 ) + fpsrcop = -fpsrcop; + ST0 = fpsrcop; +} + +void OPPROTO op_fbld_ST0_A0(void) +{ + helper_fbld_ST0_A0(); +} + +void helper_fbst_ST0_A0(void) +{ + CPU86_LDouble fptemp; + CPU86_LDouble fpsrcop; + int v; + uint8_t *mem_ref, *mem_end; + + fpsrcop = rint(ST0); + mem_ref = (uint8_t *)A0; + mem_end = mem_ref + 8; + if ( fpsrcop < 0.0 ) { + stw(mem_end, 0x8000); + fpsrcop = -fpsrcop; + } else { + stw(mem_end, 0x0000); + } + while (mem_ref < mem_end) { + if (fpsrcop == 0.0) + break; + fptemp = floor(fpsrcop/10.0); + v = ((int)(fpsrcop - fptemp*10.0)); + if (fptemp == 0.0) { + stb(mem_ref++, v); + break; + } + fpsrcop = fptemp; + fptemp = floor(fpsrcop/10.0); + v |= (((int)(fpsrcop - fptemp*10.0)) << 4); + stb(mem_ref++, v); + fpsrcop = fptemp; + } + while (mem_ref < mem_end) { + stb(mem_ref++, 0); + } +} + +void OPPROTO op_fbst_ST0_A0(void) +{ + helper_fbst_ST0_A0(); +} + /* FPU move */ static inline void fpush(void) @@ -1244,6 +1391,17 @@ void OPPROTO op_fcom_ST0_FT0(void) FORCE_RET(); } +/* XXX: handle nans */ +void OPPROTO op_fucom_ST0_FT0(void) +{ + env->fpus &= (~0x4500); /* (C3,C2,C0) <-- 000 */ + if (ST0 < FT0) + env->fpus |= 0x100; /* (C3,C2,C0) <-- 001 */ + else if (ST0 == FT0) + env->fpus |= 0x4000; /* (C3,C2,C0) <-- 100 */ + FORCE_RET(); +} + void OPPROTO op_fadd_ST0_FT0(void) { ST0 += FT0; @@ -1321,7 +1479,7 @@ void OPPROTO op_fabs_ST0(void) ST0 = fabs(ST0); } -void OPPROTO op_fxam_ST0(void) +void helper_fxam_ST0(void) { CPU86_LDoubleU temp; int expdif; @@ -1346,7 +1504,11 @@ void OPPROTO op_fxam_ST0(void) } else { env->fpus |= 0x400; } - FORCE_RET(); +} + +void OPPROTO op_fxam_ST0(void) +{ + helper_fxam_ST0(); } void OPPROTO op_fld1_ST0(void) @@ -1354,12 +1516,12 @@ void OPPROTO op_fld1_ST0(void) ST0 = *(CPU86_LDouble *)&f15rk[1]; } -void OPPROTO op_fld2t_ST0(void) +void OPPROTO op_fldl2t_ST0(void) { ST0 = *(CPU86_LDouble *)&f15rk[6]; } -void OPPROTO op_fld2e_ST0(void) +void OPPROTO op_fldl2e_ST0(void) { ST0 = *(CPU86_LDouble *)&f15rk[5]; } @@ -1681,6 +1843,13 @@ void OPPROTO op_fnstsw_A0(void) stw((void *)A0, fpus); } +void OPPROTO op_fnstsw_EAX(void) +{ + int fpus; + fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; + EAX = (EAX & 0xffff0000) | fpus; +} + void OPPROTO op_fnstcw_A0(void) { stw((void *)A0, env->fpuc); @@ -1784,6 +1953,10 @@ int cpu_x86_exec(CPUX86State *env1) eflags & CC_P ? 'P' : '-', eflags & CC_C ? 'C' : '-' ); +#if 1 + fprintf(logfile, "ST0=%f ST1=%f ST2=%f ST3=%f\n", + (double)ST0, (double)ST1, (double)ST(2), (double)ST(3)); +#endif } #endif cpu_x86_gen_code(code_gen_buffer, &code_gen_size, (uint8_t *)env->pc); diff --git a/ops_template.h b/ops_template.h index ce92db097b..745c27d7e5 100644 --- a/ops_template.h +++ b/ops_template.h @@ -633,6 +633,42 @@ void OPPROTO glue(glue(op_btc, SUFFIX), _T0_T1_cc)(void) T0 ^= (1 << count); } +void OPPROTO glue(glue(op_bsf, SUFFIX), _T0_cc)(void) +{ + int res, count; + res = T0 & DATA_MASK; + if (res != 0) { + count = 0; + while ((res & 1) == 0) { + count++; + res >>= 1; + } + T0 = count; + CC_DST = 1; /* ZF = 1 */ + } else { + CC_DST = 0; /* ZF = 1 */ + } + FORCE_RET(); +} + +void OPPROTO glue(glue(op_bsr, SUFFIX), _T0_cc)(void) +{ + int res, count; + res = T0 & DATA_MASK; + if (res != 0) { + count = DATA_BITS - 1; + while ((res & SIGN_MASK) == 0) { + count--; + res <<= 1; + } + T0 = count; + CC_DST = 1; /* ZF = 1 */ + } else { + CC_DST = 0; /* ZF = 1 */ + } + FORCE_RET(); +} + #endif /* string operations */ diff --git a/translate-i386.c b/translate-i386.c index 69c769c198..5c1fc3dd7b 100644 --- a/translate-i386.c +++ b/translate-i386.c @@ -431,6 +431,17 @@ static GenOpFunc *gen_op_btx_T0_T1_cc[2][4] = { }, }; +static GenOpFunc *gen_op_bsx_T0_cc[2][2] = { + [0] = { + gen_op_bsfw_T0_cc, + gen_op_bsrw_T0_cc, + }, + [1] = { + gen_op_bsfl_T0_cc, + gen_op_bsrl_T0_cc, + }, +}; + static GenOpFunc *gen_op_lds_T0_A0[3] = { gen_op_ldsb_T0_A0, gen_op_ldsw_T0_A0, @@ -652,15 +663,16 @@ static GenOpFunc *gen_op_fp_arith_ST0_FT0[8] = { gen_op_fdivr_ST0_FT0, }; +/* NOTE the exception in "r" op ordering */ static GenOpFunc1 *gen_op_fp_arith_STN_ST0[8] = { gen_op_fadd_STN_ST0, gen_op_fmul_STN_ST0, NULL, NULL, - gen_op_fsub_STN_ST0, gen_op_fsubr_STN_ST0, - gen_op_fdiv_STN_ST0, + gen_op_fsub_STN_ST0, gen_op_fdivr_STN_ST0, + gen_op_fdiv_STN_ST0, }; static void gen_op(DisasContext *s1, int op, int ot, int d, int s) @@ -1866,13 +1878,25 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) case 0x0f: /* fnstcw mem */ gen_op_fnstcw_A0(); break; + case 0x1d: /* fldt mem */ + gen_op_fpush(); + gen_op_fldt_ST0_A0(); + break; + case 0x1f: /* fstpt mem */ + gen_op_fstt_ST0_A0(); + gen_op_fpop(); + break; case 0x2f: /* fnstsw mem */ gen_op_fnstsw_A0(); break; case 0x3c: /* fbld */ + gen_op_fpush(); + op_fbld_ST0_A0(); + break; case 0x3e: /* fbstp */ - error("float BCD not hanlded"); - return -1; + gen_op_fbst_ST0_A0(); + gen_op_fpop(); + break; case 0x3d: /* fildll */ gen_op_fpush(); gen_op_fildll_ST0_A0(); @@ -1882,7 +1906,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) gen_op_fpop(); break; default: - error("unhandled memory FP [op=0x%02x]\n", op); + error("unhandled FPm [op=0x%02x]\n", op); return -1; } } else { @@ -1895,7 +1919,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) gen_op_fmov_ST0_STN((opreg + 1) & 7); break; case 0x09: /* fxchg sti */ - gen_op_fxchg_ST0_STN((opreg + 1) & 7); + gen_op_fxchg_ST0_STN(opreg); break; case 0x0a: /* grp d9/2 */ switch(rm) { @@ -1929,24 +1953,31 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) { switch(rm) { case 0: + gen_op_fpush(); gen_op_fld1_ST0(); break; case 1: - gen_op_fld2t_ST0(); + gen_op_fpush(); + gen_op_fldl2t_ST0(); break; case 2: - gen_op_fld2e_ST0(); + gen_op_fpush(); + gen_op_fldl2e_ST0(); break; case 3: + gen_op_fpush(); gen_op_fldpi_ST0(); break; case 4: + gen_op_fpush(); gen_op_fldlg2_ST0(); break; case 5: + gen_op_fpush(); gen_op_fldln2_ST0(); break; case 6: + gen_op_fpush(); gen_op_fldz_ST0(); break; default: @@ -2021,12 +2052,12 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) op1 = op & 7; if (op >= 0x20) { gen_op_fp_arith_STN_ST0[op1](opreg); + if (op >= 0x30) + gen_op_fpop(); } else { gen_op_fmov_FT0_STN(opreg); gen_op_fp_arith_ST0_FT0[op1](); } - if (op >= 0x30) - gen_op_fpop(); } break; case 0x02: /* fcom */ @@ -2042,7 +2073,7 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) switch(rm) { case 1: /* fucompp */ gen_op_fmov_FT0_STN(1); - gen_op_fcom_ST0_FT0(); + gen_op_fucom_ST0_FT0(); gen_op_fpop(); gen_op_fpop(); break; @@ -2057,6 +2088,15 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) gen_op_fmov_STN_ST0(opreg); gen_op_fpop(); break; + case 0x2c: /* fucom st(i) */ + gen_op_fmov_FT0_STN(opreg); + gen_op_fucom_ST0_FT0(); + break; + case 0x2d: /* fucomp st(i) */ + gen_op_fmov_FT0_STN(opreg); + gen_op_fucom_ST0_FT0(); + gen_op_fpop(); + break; case 0x33: /* de/3 */ switch(rm) { case 1: /* fcompp */ @@ -2071,18 +2111,16 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) break; case 0x3c: /* df/4 */ switch(rm) { -#if 0 case 0: - gen_insn3(OP_FNSTS, OR_EAX, OR_ZERO, OR_ZERO); + gen_op_fnstsw_EAX(); break; -#endif default: - error("unhandled FP df/4\n"); + error("unhandled FP %x df/4\n", rm); return -1; } break; default: - error("unhandled FP\n"); + error("unhandled FPr [op=0x%x]\n", op); return -1; } } @@ -2413,7 +2451,18 @@ long disas_insn(DisasContext *s, uint8_t *pc_start, int *is_jmp_ptr) gen_op_mov_reg_T0[ot][rm](); } break; - + case 0x1bc: /* bsf */ + case 0x1bd: /* bsr */ + ot = dflag ? OT_LONG : OT_WORD; + modrm = ldub(s->pc++); + reg = (modrm >> 3) & 7; + gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0); + gen_op_bsx_T0_cc[ot - OT_WORD][b & 1](); + /* NOTE: we always write back the result. Intel doc says it is + undefined if T0 == 0 */ + gen_op_mov_reg_T0[ot][reg](); + s->cc_op = CC_OP_LOGICB + ot; + break; /************************/ /* misc */ case 0x90: /* nop */ -- cgit v1.2.1