From b41da4ebb2658c4abaaab01e64b9d0bb67dba003 Mon Sep 17 00:00:00 2001 From: Tom Musta Date: Wed, 12 Feb 2014 15:23:11 -0600 Subject: target-ppc: Altivec 2.07: Quadword Addition and Subtracation This patch adds the Vector Quadword Addition and Subtraction instructions introduced in Power ISA Version 2.07: - Vector Add Unsigned Quadword Modulo (vadduqm) - Vector Add & Write Carry Unsigned Quadword (vaddcuq) - Vector Add Extended Unsigned Quadword (vaddeuqm) - Vector Add Extended & Write Carry Unsigned Quadword (vaddecuq) - Vector Subtract Unsigned Quadword Modulo (vsubuqm) - Vector Subtract & Write Carry Unsigned Quadword (vsubcuq) - Vector Subtract Extended Unsigned Quadword (vsubeuqm) - Vector Subtract Extended & Write Carry Unsigned Quadword (vsubecuq) Signed-off-by: Tom Musta Signed-off-by: Alexander Graf --- target-ppc/helper.h | 8 +++ target-ppc/int_helper.c | 185 ++++++++++++++++++++++++++++++++++++++++++++++++ target-ppc/translate.c | 18 +++++ 3 files changed, 211 insertions(+) (limited to 'target-ppc') diff --git a/target-ppc/helper.h b/target-ppc/helper.h index 3201268736..14839305d7 100644 --- a/target-ppc/helper.h +++ b/target-ppc/helper.h @@ -211,6 +211,14 @@ DEF_HELPER_4(vadduws, void, env, avr, avr, avr) DEF_HELPER_4(vsububs, void, env, avr, avr, avr) DEF_HELPER_4(vsubuhs, void, env, avr, avr, avr) DEF_HELPER_4(vsubuws, void, env, avr, avr, avr) +DEF_HELPER_3(vadduqm, void, avr, avr, avr) +DEF_HELPER_4(vaddecuq, void, avr, avr, avr, avr) +DEF_HELPER_4(vaddeuqm, void, avr, avr, avr, avr) +DEF_HELPER_3(vaddcuq, void, avr, avr, avr) +DEF_HELPER_3(vsubuqm, void, avr, avr, avr) +DEF_HELPER_4(vsubecuq, void, avr, avr, avr, avr) +DEF_HELPER_4(vsubeuqm, void, avr, avr, avr, avr) +DEF_HELPER_3(vsubcuq, void, avr, avr, avr) DEF_HELPER_3(vrlb, void, avr, avr, avr) DEF_HELPER_3(vrlh, void, avr, avr, avr) DEF_HELPER_3(vrlw, void, avr, avr, avr) diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c index b4a7298523..72fb13ccfb 100644 --- a/target-ppc/int_helper.c +++ b/target-ppc/int_helper.c @@ -1568,6 +1568,191 @@ VGENERIC_DO(popcntd, u64) #undef VGENERIC_DO +#if defined(HOST_WORDS_BIGENDIAN) +#define QW_ONE { .u64 = { 0, 1 } } +#else +#define QW_ONE { .u64 = { 1, 0 } } +#endif + +#ifndef CONFIG_INT128 + +static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a) +{ + t->u64[0] = ~a.u64[0]; + t->u64[1] = ~a.u64[1]; +} + +static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b) +{ + if (a.u64[HI_IDX] < b.u64[HI_IDX]) { + return -1; + } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) { + return 1; + } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) { + return -1; + } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) { + return 1; + } else { + return 0; + } +} + +static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) +{ + t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX]; + t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] + + (~a.u64[LO_IDX] < b.u64[LO_IDX]); +} + +static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b) +{ + ppc_avr_t not_a; + t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX]; + t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] + + (~a.u64[LO_IDX] < b.u64[LO_IDX]); + avr_qw_not(¬_a, a); + return avr_qw_cmpu(not_a, b) < 0; +} + +#endif + +void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ +#ifdef CONFIG_INT128 + r->u128 = a->u128 + b->u128; +#else + avr_qw_add(r, *a, *b); +#endif +} + +void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ +#ifdef CONFIG_INT128 + r->u128 = a->u128 + b->u128 + (c->u128 & 1); +#else + + if (c->u64[LO_IDX] & 1) { + ppc_avr_t tmp; + + tmp.u64[HI_IDX] = 0; + tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1; + avr_qw_add(&tmp, *a, tmp); + avr_qw_add(r, tmp, *b); + } else { + avr_qw_add(r, *a, *b); + } +#endif +} + +void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ +#ifdef CONFIG_INT128 + r->u128 = (~a->u128 < b->u128); +#else + ppc_avr_t not_a; + + avr_qw_not(¬_a, *a); + + r->u64[HI_IDX] = 0; + r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0); +#endif +} + +void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ +#ifdef CONFIG_INT128 + int carry_out = (~a->u128 < b->u128); + if (!carry_out && (c->u128 & 1)) { + carry_out = ((a->u128 + b->u128 + 1) == 0) && + ((a->u128 != 0) || (b->u128 != 0)); + } + r->u128 = carry_out; +#else + + int carry_in = c->u64[LO_IDX] & 1; + int carry_out = 0; + ppc_avr_t tmp; + + carry_out = avr_qw_addc(&tmp, *a, *b); + + if (!carry_out && carry_in) { + ppc_avr_t one = QW_ONE; + carry_out = avr_qw_addc(&tmp, tmp, one); + } + r->u64[HI_IDX] = 0; + r->u64[LO_IDX] = carry_out; +#endif +} + +void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ +#ifdef CONFIG_INT128 + r->u128 = a->u128 - b->u128; +#else + ppc_avr_t tmp; + ppc_avr_t one = QW_ONE; + + avr_qw_not(&tmp, *b); + avr_qw_add(&tmp, *a, tmp); + avr_qw_add(r, tmp, one); +#endif +} + +void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ +#ifdef CONFIG_INT128 + r->u128 = a->u128 + ~b->u128 + (c->u128 & 1); +#else + ppc_avr_t tmp, sum; + + avr_qw_not(&tmp, *b); + avr_qw_add(&sum, *a, tmp); + + tmp.u64[HI_IDX] = 0; + tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1; + avr_qw_add(r, sum, tmp); +#endif +} + +void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ +#ifdef CONFIG_INT128 + r->u128 = (~a->u128 < ~b->u128) || + (a->u128 + ~b->u128 == (__uint128_t)-1); +#else + int carry = (avr_qw_cmpu(*a, *b) > 0); + if (!carry) { + ppc_avr_t tmp; + avr_qw_not(&tmp, *b); + avr_qw_add(&tmp, *a, tmp); + carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull)); + } + r->u64[HI_IDX] = 0; + r->u64[LO_IDX] = carry; +#endif +} + +void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ +#ifdef CONFIG_INT128 + r->u128 = + (~a->u128 < ~b->u128) || + ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1)); +#else + int carry_in = c->u64[LO_IDX] & 1; + int carry_out = (avr_qw_cmpu(*a, *b) > 0); + if (!carry_out && carry_in) { + ppc_avr_t tmp; + avr_qw_not(&tmp, *b); + avr_qw_add(&tmp, *a, tmp); + carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull)); + } + + r->u64[HI_IDX] = 0; + r->u64[LO_IDX] = carry_out; +#endif +} + #undef VECTOR_FOR_INORDER_I #undef HI_IDX diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 3d38a25cc4..c4d7f0f55a 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -7043,6 +7043,18 @@ GEN_VXFORM_ENV(vsubuws, 0, 26); GEN_VXFORM_ENV(vsubsbs, 0, 28); GEN_VXFORM_ENV(vsubshs, 0, 29); GEN_VXFORM_ENV(vsubsws, 0, 30); +GEN_VXFORM(vadduqm, 0, 4); +GEN_VXFORM(vaddcuq, 0, 5); +GEN_VXFORM3(vaddeuqm, 30, 0); +GEN_VXFORM3(vaddecuq, 30, 0); +GEN_VXFORM_DUAL(vaddeuqm, PPC_NONE, PPC2_ALTIVEC_207, \ + vaddecuq, PPC_NONE, PPC2_ALTIVEC_207) +GEN_VXFORM(vsubuqm, 0, 20); +GEN_VXFORM(vsubcuq, 0, 21); +GEN_VXFORM3(vsubeuqm, 31, 0); +GEN_VXFORM3(vsubecuq, 31, 0); +GEN_VXFORM_DUAL(vsubeuqm, PPC_NONE, PPC2_ALTIVEC_207, \ + vsubecuq, PPC_NONE, PPC2_ALTIVEC_207) GEN_VXFORM(vrlb, 2, 0); GEN_VXFORM(vrlh, 2, 1); GEN_VXFORM(vrlw, 2, 2); @@ -10488,6 +10500,12 @@ GEN_VXFORM(vsubuws, 0, 26), GEN_VXFORM(vsubsbs, 0, 28), GEN_VXFORM(vsubshs, 0, 29), GEN_VXFORM(vsubsws, 0, 30), +GEN_VXFORM_207(vadduqm, 0, 4), +GEN_VXFORM_207(vaddcuq, 0, 5), +GEN_VXFORM_DUAL(vaddeuqm, vaddecuq, 30, 0xFF, PPC_NONE, PPC2_ALTIVEC_207), +GEN_VXFORM_207(vsubuqm, 0, 20), +GEN_VXFORM_207(vsubcuq, 0, 21), +GEN_VXFORM_DUAL(vsubeuqm, vsubecuq, 31, 0xFF, PPC_NONE, PPC2_ALTIVEC_207), GEN_VXFORM(vrlb, 2, 0), GEN_VXFORM(vrlh, 2, 1), GEN_VXFORM(vrlw, 2, 2), -- cgit v1.2.1