From 87982401963ff686b0289abaf4455417eadcfdcb Mon Sep 17 00:00:00 2001 From: Christophe Lyon Date: Fri, 4 Feb 2011 15:17:51 +0100 Subject: Set the right overflow bit for neon 32 and 64 bit saturating add/sub. Signed-off-by: Christophe Lyon Reviewed-by: Peter Maydell Signed-off-by: Aurelien Jarno (cherry picked from commit 72902672dc2ed6281cdb205259c1d52ecf01f6b2) --- target-arm/helpers.h | 12 ++++--- target-arm/neon_helper.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++ target-arm/op_helper.c | 49 -------------------------- target-arm/translate.c | 18 +++++----- 4 files changed, 105 insertions(+), 63 deletions(-) diff --git a/target-arm/helpers.h b/target-arm/helpers.h index b88ebaecc6..8a2564ec6f 100644 --- a/target-arm/helpers.h +++ b/target-arm/helpers.h @@ -137,10 +137,6 @@ DEF_HELPER_2(rsqrte_f32, f32, f32, env) DEF_HELPER_2(recpe_u32, i32, i32, env) DEF_HELPER_2(rsqrte_u32, i32, i32, env) DEF_HELPER_4(neon_tbl, i32, i32, i32, i32, i32) -DEF_HELPER_2(neon_add_saturate_u64, i64, i64, i64) -DEF_HELPER_2(neon_add_saturate_s64, i64, i64, i64) -DEF_HELPER_2(neon_sub_saturate_u64, i64, i64, i64) -DEF_HELPER_2(neon_sub_saturate_s64, i64, i64, i64) DEF_HELPER_2(add_cc, i32, i32, i32) DEF_HELPER_2(adc_cc, i32, i32, i32) @@ -160,10 +156,18 @@ DEF_HELPER_3(neon_qadd_u8, i32, env, i32, i32) DEF_HELPER_3(neon_qadd_s8, i32, env, i32, i32) DEF_HELPER_3(neon_qadd_u16, i32, env, i32, i32) DEF_HELPER_3(neon_qadd_s16, i32, env, i32, i32) +DEF_HELPER_3(neon_qadd_u32, i32, env, i32, i32) +DEF_HELPER_3(neon_qadd_s32, i32, env, i32, i32) DEF_HELPER_3(neon_qsub_u8, i32, env, i32, i32) DEF_HELPER_3(neon_qsub_s8, i32, env, i32, i32) DEF_HELPER_3(neon_qsub_u16, i32, env, i32, i32) DEF_HELPER_3(neon_qsub_s16, i32, env, i32, i32) +DEF_HELPER_3(neon_qsub_u32, i32, env, i32, i32) +DEF_HELPER_3(neon_qsub_s32, i32, env, i32, i32) +DEF_HELPER_3(neon_qadd_u64, i64, env, i64, i64) +DEF_HELPER_3(neon_qadd_s64, i64, env, i64, i64) +DEF_HELPER_3(neon_qsub_u64, i64, env, i64, i64) +DEF_HELPER_3(neon_qsub_s64, i64, env, i64, i64) DEF_HELPER_2(neon_hadd_s8, i32, i32, i32) DEF_HELPER_2(neon_hadd_u8, i32, i32, i32) diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c index fead1525c4..268af33a6c 100644 --- a/target-arm/neon_helper.c +++ b/target-arm/neon_helper.c @@ -198,6 +198,28 @@ NEON_VOP_ENV(qadd_u16, neon_u16, 2) #undef NEON_FN #undef NEON_USAT +uint32_t HELPER(neon_qadd_u32)(CPUState *env, uint32_t a, uint32_t b) +{ + uint32_t res = a + b; + if (res < a) { + SET_QC(); + res = ~0; + } + return res; +} + +uint64_t HELPER(neon_qadd_u64)(CPUState *env, uint64_t src1, uint64_t src2) +{ + uint64_t res; + + res = src1 + src2; + if (res < src1) { + SET_QC(); + res = ~(uint64_t)0; + } + return res; +} + #define NEON_SSAT(dest, src1, src2, type) do { \ int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ if (tmp != (type)tmp) { \ @@ -218,6 +240,28 @@ NEON_VOP_ENV(qadd_s16, neon_s16, 2) #undef NEON_FN #undef NEON_SSAT +uint32_t HELPER(neon_qadd_s32)(CPUState *env, uint32_t a, uint32_t b) +{ + uint32_t res = a + b; + if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) { + SET_QC(); + res = ~(((int32_t)a >> 31) ^ SIGNBIT); + } + return res; +} + +uint64_t HELPER(neon_qadd_s64)(CPUState *env, uint64_t src1, uint64_t src2) +{ + uint64_t res; + + res = src1 + src2; + if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) { + SET_QC(); + res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; + } + return res; +} + #define NEON_USAT(dest, src1, src2, type) do { \ uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ if (tmp != (type)tmp) { \ @@ -234,6 +278,29 @@ NEON_VOP_ENV(qsub_u16, neon_u16, 2) #undef NEON_FN #undef NEON_USAT +uint32_t HELPER(neon_qsub_u32)(CPUState *env, uint32_t a, uint32_t b) +{ + uint32_t res = a - b; + if (res > a) { + SET_QC(); + res = 0; + } + return res; +} + +uint64_t HELPER(neon_qsub_u64)(CPUState *env, uint64_t src1, uint64_t src2) +{ + uint64_t res; + + if (src1 < src2) { + SET_QC(); + res = 0; + } else { + res = src1 - src2; + } + return res; +} + #define NEON_SSAT(dest, src1, src2, type) do { \ int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ if (tmp != (type)tmp) { \ @@ -254,6 +321,28 @@ NEON_VOP_ENV(qsub_s16, neon_s16, 2) #undef NEON_FN #undef NEON_SSAT +uint32_t HELPER(neon_qsub_s32)(CPUState *env, uint32_t a, uint32_t b) +{ + uint32_t res = a - b; + if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) { + SET_QC(); + res = ~(((int32_t)a >> 31) ^ SIGNBIT); + } + return res; +} + +uint64_t HELPER(neon_qsub_s64)(CPUState *env, uint64_t src1, uint64_t src2) +{ + uint64_t res; + + res = src1 - src2; + if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) { + SET_QC(); + res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; + } + return res; +} + #define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1 NEON_VOP(hadd_s8, neon_s8, 4) NEON_VOP(hadd_u8, neon_u8, 4) diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c index 43baa63006..3de2610348 100644 --- a/target-arm/op_helper.c +++ b/target-arm/op_helper.c @@ -424,52 +424,3 @@ uint32_t HELPER(ror_cc)(uint32_t x, uint32_t i) return ((uint32_t)x >> shift) | (x << (32 - shift)); } } - -uint64_t HELPER(neon_add_saturate_s64)(uint64_t src1, uint64_t src2) -{ - uint64_t res; - - res = src1 + src2; - if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) { - env->QF = 1; - res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; - } - return res; -} - -uint64_t HELPER(neon_add_saturate_u64)(uint64_t src1, uint64_t src2) -{ - uint64_t res; - - res = src1 + src2; - if (res < src1) { - env->QF = 1; - res = ~(uint64_t)0; - } - return res; -} - -uint64_t HELPER(neon_sub_saturate_s64)(uint64_t src1, uint64_t src2) -{ - uint64_t res; - - res = src1 - src2; - if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) { - env->QF = 1; - res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; - } - return res; -} - -uint64_t HELPER(neon_sub_saturate_u64)(uint64_t src1, uint64_t src2) -{ - uint64_t res; - - if (src1 < src2) { - env->QF = 1; - res = 0; - } else { - res = src1 - src2; - } - return res; -} diff --git a/target-arm/translate.c b/target-arm/translate.c index 92d0ef0199..514954300d 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -3539,12 +3539,6 @@ static inline void gen_neon_rsb(int size, TCGv t0, TCGv t1) #define gen_helper_neon_pmin_s32 gen_helper_neon_min_s32 #define gen_helper_neon_pmin_u32 gen_helper_neon_min_u32 -/* FIXME: This is wrong. They set the wrong overflow bit. */ -#define gen_helper_neon_qadd_s32(a, e, b, c) gen_helper_add_saturate(a, b, c) -#define gen_helper_neon_qadd_u32(a, e, b, c) gen_helper_add_usaturate(a, b, c) -#define gen_helper_neon_qsub_s32(a, e, b, c) gen_helper_sub_saturate(a, b, c) -#define gen_helper_neon_qsub_u32(a, e, b, c) gen_helper_sub_usaturate(a, b, c) - #define GEN_NEON_INTEGER_OP_ENV(name) do { \ switch ((size << 1) | u) { \ case 0: \ @@ -4233,16 +4227,20 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) switch (op) { case 1: /* VQADD */ if (u) { - gen_helper_neon_add_saturate_u64(CPU_V001); + gen_helper_neon_qadd_u64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); } else { - gen_helper_neon_add_saturate_s64(CPU_V001); + gen_helper_neon_qadd_s64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); } break; case 5: /* VQSUB */ if (u) { - gen_helper_neon_sub_saturate_u64(CPU_V001); + gen_helper_neon_qsub_u64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); } else { - gen_helper_neon_sub_saturate_s64(CPU_V001); + gen_helper_neon_qsub_s64(cpu_V0, cpu_env, + cpu_V0, cpu_V1); } break; case 8: /* VSHL */ -- cgit v1.2.1