X-Git-Url: http://git.maemo.org/git/?a=blobdiff_plain;f=target-ppc%2Fop_helper.c;h=56fab9cb06438f7f2326aca61c48169e2a9b21cc;hb=db241f403213807a3ab9ea9eb88649dcbf71dba4;hp=0a084bf801e3427d95f780894b44593e5ae614f6;hpb=93220573844045c48eeda03df74db04418456ce3;p=qemu diff --git a/target-ppc/op_helper.c b/target-ppc/op_helper.c index 0a084bf..56fab9c 100644 --- a/target-ppc/op_helper.c +++ b/target-ppc/op_helper.c @@ -15,8 +15,9 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA */ +#include #include "exec.h" #include "host-utils.h" #include "helper.h" @@ -27,6 +28,13 @@ //#define DEBUG_EXCEPTIONS //#define DEBUG_SOFTWARE_TLB +#ifdef DEBUG_SOFTWARE_TLB +# define LOG_SWTLB(...) qemu_log(__VA_ARGS__) +#else +# define LOG_SWTLB(...) do { } while (0) +#endif + + /*****************************************************************************/ /* Exceptions processing helpers */ @@ -46,45 +54,17 @@ void helper_raise_exception (uint32_t exception) } /*****************************************************************************/ -/* Registers load and stores */ -target_ulong helper_load_cr (void) -{ - return (env->crf[0] << 28) | - (env->crf[1] << 24) | - (env->crf[2] << 20) | - (env->crf[3] << 16) | - (env->crf[4] << 12) | - (env->crf[5] << 8) | - (env->crf[6] << 4) | - (env->crf[7] << 0); -} - -void helper_store_cr (target_ulong val, uint32_t mask) -{ - int i, sh; - - for (i = 0, sh = 7; i < 8; i++, sh--) { - if (mask & (1 << sh)) - env->crf[i] = (val >> (sh * 4)) & 0xFUL; - } -} - -/*****************************************************************************/ /* SPR accesses */ void helper_load_dump_spr (uint32_t sprn) { - if (loglevel != 0) { - fprintf(logfile, "Read SPR %d %03x => " ADDRX "\n", + qemu_log("Read SPR %d %03x => " ADDRX "\n", sprn, sprn, env->spr[sprn]); - } } void helper_store_dump_spr (uint32_t sprn) { - if (loglevel != 0) { - fprintf(logfile, "Write SPR %d %03x <= " ADDRX "\n", + qemu_log("Write SPR %d %03x <= " ADDRX "\n", sprn, sprn, env->spr[sprn]); - } } target_ulong helper_load_tbl (void) @@ -181,10 +161,8 @@ void helper_store_hid0_601 (target_ulong val) env->hflags_nmsr &= ~(1 << MSR_LE); env->hflags_nmsr |= (1 << MSR_LE) & (((val >> 3) & 1) << MSR_LE); env->hflags |= env->hflags_nmsr; - if (loglevel != 0) { - fprintf(logfile, "%s: set endianness to %c => " ADDRX "\n", + qemu_log("%s: set endianness to %c => " ADDRX "\n", __func__, val & 0x8 ? 'l' : 'b', env->hflags); - } } env->spr[SPR_HID0] = (uint32_t)val; } @@ -338,9 +316,10 @@ void helper_stsw(target_ulong addr, uint32_t nb, uint32_t reg) addr = addr_add(addr, 4); } if (unlikely(nb > 0)) { - for (sh = 24; nb > 0; nb--, sh -= 8) + for (sh = 24; nb > 0; nb--, sh -= 8) { stb(addr, (env->gpr[reg] >> sh) & 0xFF); - addr = addr_add(addr, 1); + addr = addr_add(addr, 1); + } } } @@ -554,15 +533,6 @@ uint32_t helper_float64_to_float32(uint64_t arg) return f.l; } -static always_inline int fpisneg (float64 d) -{ - CPU_DoubleU u; - - u.d = d; - - return u.ll >> 63 != 0; -} - static always_inline int isden (float64 d) { CPU_DoubleU u; @@ -572,53 +542,13 @@ static always_inline int isden (float64 d) return ((u.ll >> 52) & 0x7FF) == 0; } -static always_inline int iszero (float64 d) -{ - CPU_DoubleU u; - - u.d = d; - - return (u.ll & ~0x8000000000000000ULL) == 0; -} - -static always_inline int isinfinity (float64 d) -{ - CPU_DoubleU u; - - u.d = d; - - return ((u.ll >> 52) & 0x7FF) == 0x7FF && - (u.ll & 0x000FFFFFFFFFFFFFULL) == 0; -} - -#ifdef CONFIG_SOFTFLOAT -static always_inline int isfinite (float64 d) -{ - CPU_DoubleU u; - - u.d = d; - - return (((u.ll >> 52) & 0x7FF) != 0x7FF); -} - -static always_inline int isnormal (float64 d) -{ - CPU_DoubleU u; - - u.d = d; - - uint32_t exp = (u.ll >> 52) & 0x7FF; - return ((0 < exp) && (exp < 0x7FF)); -} -#endif - uint32_t helper_compute_fprf (uint64_t arg, uint32_t set_fprf) { CPU_DoubleU farg; int isneg; int ret; farg.ll = arg; - isneg = fpisneg(farg.d); + isneg = float64_is_neg(farg.d); if (unlikely(float64_is_nan(farg.d))) { if (float64_is_signaling_nan(farg.d)) { /* Signaling NaN: flags are undefined */ @@ -627,14 +557,14 @@ uint32_t helper_compute_fprf (uint64_t arg, uint32_t set_fprf) /* Quiet NaN */ ret = 0x11; } - } else if (unlikely(isinfinity(farg.d))) { + } else if (unlikely(float64_is_infinity(farg.d))) { /* +/- infinity */ if (isneg) ret = 0x09; else ret = 0x05; } else { - if (iszero(farg.d)) { + if (float64_is_zero(farg.d)) { /* +/- zero */ if (isneg) ret = 0x12; @@ -671,15 +601,13 @@ static always_inline uint64_t fload_invalid_op_excp (int op) int ve; ve = fpscr_ve; - if (op & POWERPC_EXCP_FP_VXSNAN) { - /* Operation on signaling NaN */ + switch (op) { + case POWERPC_EXCP_FP_VXSNAN: env->fpscr |= 1 << FPSCR_VXSNAN; - } - if (op & POWERPC_EXCP_FP_VXSOFT) { - /* Software-defined condition */ + break; + case POWERPC_EXCP_FP_VXSOFT: env->fpscr |= 1 << FPSCR_VXSOFT; - } - switch (op & ~(POWERPC_EXCP_FP_VXSOFT | POWERPC_EXCP_FP_VXSNAN)) { + break; case POWERPC_EXCP_FP_VXISI: /* Magnitude subtraction of infinities */ env->fpscr |= 1 << FPSCR_VXISI; @@ -718,7 +646,7 @@ static always_inline uint64_t fload_invalid_op_excp (int op) env->fpscr &= ~((1 << FPSCR_FR) | (1 << FPSCR_FI)); if (ve == 0) { /* Set the result to quiet NaN */ - ret = UINT64_MAX; + ret = 0xFFF8000000000000ULL; env->fpscr &= ~(0xF << FPSCR_FPCC); env->fpscr |= 0x11 << FPSCR_FPCC; } @@ -729,7 +657,7 @@ static always_inline uint64_t fload_invalid_op_excp (int op) env->fpscr &= ~((1 << FPSCR_FR) | (1 << FPSCR_FI)); if (ve == 0) { /* Set the result to quiet NaN */ - ret = UINT64_MAX; + ret = 0xFFF8000000000000ULL; env->fpscr &= ~(0xF << FPSCR_FPCC); env->fpscr |= 0x11 << FPSCR_FPCC; } @@ -748,7 +676,7 @@ static always_inline uint64_t fload_invalid_op_excp (int op) return ret; } -static always_inline uint64_t float_zero_divide_excp (uint64_t arg1, uint64_t arg2) +static always_inline void float_zero_divide_excp (void) { env->fpscr |= 1 << FPSCR_ZX; env->fpscr &= ~((1 << FPSCR_FR) | (1 << FPSCR_FI)); @@ -761,12 +689,7 @@ static always_inline uint64_t float_zero_divide_excp (uint64_t arg1, uint64_t ar helper_raise_exception_err(POWERPC_EXCP_PROGRAM, POWERPC_EXCP_FP | POWERPC_EXCP_FP_ZX); } - } else { - /* Set the result to infinity */ - arg1 = ((arg1 ^ arg2) & 0x8000000000000000ULL); - arg1 |= 0x7FFULL << 52; } - return arg1; } static always_inline void float_overflow_excp (void) @@ -843,6 +766,24 @@ static always_inline void fpscr_set_rounding_mode (void) set_float_rounding_mode(rnd_type, &env->fp_status); } +void helper_fpscr_clrbit (uint32_t bit) +{ + int prev; + + prev = (env->fpscr >> bit) & 1; + env->fpscr &= ~(1 << bit); + if (prev == 1) { + switch (bit) { + case FPSCR_RN1: + case FPSCR_RN: + fpscr_set_rounding_mode(); + break; + default: + break; + } + } +} + void helper_fpscr_setbit (uint32_t bit) { int prev; @@ -968,9 +909,9 @@ void helper_store_fpscr (uint64_t arg, uint32_t mask) prev = env->fpscr; new = (uint32_t)arg; - new &= ~0x90000000; - new |= prev & 0x90000000; - for (i = 0; i < 7; i++) { + new &= ~0x60000000; + new |= prev & 0x60000000; + for (i = 0; i < 8; i++) { if (mask & (1 << i)) { env->fpscr &= ~(0xF << (4 * i)); env->fpscr |= new & (0xF << (4 * i)); @@ -1000,12 +941,17 @@ void helper_float_check_status (void) /* Differred floating-point exception after target FPR update */ if (msr_fe0 != 0 || msr_fe1 != 0) helper_raise_exception_err(env->exception_index, env->error_code); - } else if (env->fp_status.float_exception_flags & float_flag_overflow) { - float_overflow_excp(); - } else if (env->fp_status.float_exception_flags & float_flag_underflow) { - float_underflow_excp(); - } else if (env->fp_status.float_exception_flags & float_flag_inexact) { - float_inexact_excp(); + } else { + int status = get_float_exception_flags(&env->fp_status); + if (status & float_flag_divbyzero) { + float_zero_divide_excp(); + } else if (status & float_flag_overflow) { + float_overflow_excp(); + } else if (status & float_flag_underflow) { + float_underflow_excp(); + } else if (status & float_flag_inexact) { + float_inexact_excp(); + } } #else if (env->exception_index == POWERPC_EXCP_PROGRAM && @@ -1020,7 +966,7 @@ void helper_float_check_status (void) #ifdef CONFIG_SOFTFLOAT void helper_reset_fpstatus (void) { - env->fp_status.float_exception_flags = 0; + set_float_exception_flags(0, &env->fp_status); } #endif @@ -1036,12 +982,12 @@ uint64_t helper_fadd (uint64_t arg1, uint64_t arg2) float64_is_signaling_nan(farg2.d))) { /* sNaN addition */ farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN); - } else if (likely(isfinite(farg1.d) || isfinite(farg2.d) || - fpisneg(farg1.d) == fpisneg(farg2.d))) { - farg1.d = float64_add(farg1.d, farg2.d, &env->fp_status); - } else { + } else if (unlikely(float64_is_infinity(farg1.d) && float64_is_infinity(farg2.d) && + float64_is_neg(farg1.d) != float64_is_neg(farg2.d))) { /* Magnitude subtraction of infinities */ - farg1.ll == fload_invalid_op_excp(POWERPC_EXCP_FP_VXISI); + farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXISI); + } else { + farg1.d = float64_add(farg1.d, farg2.d, &env->fp_status); } #else farg1.d = float64_add(farg1.d, farg2.d, &env->fp_status); @@ -1062,12 +1008,12 @@ uint64_t helper_fsub (uint64_t arg1, uint64_t arg2) float64_is_signaling_nan(farg2.d))) { /* sNaN subtraction */ farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN); - } else if (likely(isfinite(farg1.d) || isfinite(farg2.d) || - fpisneg(farg1.d) != fpisneg(farg2.d))) { - farg1.d = float64_sub(farg1.d, farg2.d, &env->fp_status); - } else { + } else if (unlikely(float64_is_infinity(farg1.d) && float64_is_infinity(farg2.d) && + float64_is_neg(farg1.d) == float64_is_neg(farg2.d))) { /* Magnitude subtraction of infinities */ farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXISI); + } else { + farg1.d = float64_sub(farg1.d, farg2.d, &env->fp_status); } } #else @@ -1088,14 +1034,13 @@ uint64_t helper_fmul (uint64_t arg1, uint64_t arg2) float64_is_signaling_nan(farg2.d))) { /* sNaN multiplication */ farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN); - } else if (unlikely((isinfinity(farg1.d) && iszero(farg2.d)) || - (iszero(farg1.d) && isinfinity(farg2.d)))) { + } else if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) || + (float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) { /* Multiplication of zero by infinity */ farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXIMZ); } else { farg1.d = float64_mul(farg1.d, farg2.d, &env->fp_status); } -} #else farg1.d = float64_mul(farg1.d, farg2.d, &env->fp_status); #endif @@ -1114,17 +1059,12 @@ uint64_t helper_fdiv (uint64_t arg1, uint64_t arg2) float64_is_signaling_nan(farg2.d))) { /* sNaN division */ farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN); - } else if (unlikely(isinfinity(farg1.d) && isinfinity(farg2.d))) { + } else if (unlikely(float64_is_infinity(farg1.d) && float64_is_infinity(farg2.d))) { /* Division of infinity by infinity */ farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXIDI); - } else if (unlikely(iszero(farg2.d))) { - if (iszero(farg1.d)) { - /* Division of zero by zero */ - farg1.ll fload_invalid_op_excp(POWERPC_EXCP_FP_VXZDZ); - } else { - /* Division by zero */ - farg1.ll = float_zero_divide_excp(farg1.d, farg2.d); - } + } else if (unlikely(float64_is_zero(farg1.d) && float64_is_zero(farg2.d))) { + /* Division of zero by zero */ + farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXZDZ); } else { farg1.d = float64_div(farg1.d, farg2.d, &env->fp_status); } @@ -1174,7 +1114,7 @@ uint64_t helper_fctiw (uint64_t arg) if (unlikely(float64_is_signaling_nan(farg.d))) { /* sNaN conversion */ farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN | POWERPC_EXCP_FP_VXCVI); - } else if (unlikely(float64_is_nan(farg.d) || isinfinity(farg.d))) { + } else if (unlikely(float64_is_nan(farg.d) || float64_is_infinity(farg.d))) { /* qNan / infinity conversion */ farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXCVI); } else { @@ -1198,7 +1138,7 @@ uint64_t helper_fctiwz (uint64_t arg) if (unlikely(float64_is_signaling_nan(farg.d))) { /* sNaN conversion */ farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN | POWERPC_EXCP_FP_VXCVI); - } else if (unlikely(float64_is_nan(farg.d) || isinfinity(farg.d))) { + } else if (unlikely(float64_is_nan(farg.d) || float64_is_infinity(farg.d))) { /* qNan / infinity conversion */ farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXCVI); } else { @@ -1231,7 +1171,7 @@ uint64_t helper_fctid (uint64_t arg) if (unlikely(float64_is_signaling_nan(farg.d))) { /* sNaN conversion */ farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN | POWERPC_EXCP_FP_VXCVI); - } else if (unlikely(float64_is_nan(farg.d) || isinfinity(farg.d))) { + } else if (unlikely(float64_is_nan(farg.d) || float64_is_infinity(farg.d))) { /* qNan / infinity conversion */ farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXCVI); } else { @@ -1249,7 +1189,7 @@ uint64_t helper_fctidz (uint64_t arg) if (unlikely(float64_is_signaling_nan(farg.d))) { /* sNaN conversion */ farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN | POWERPC_EXCP_FP_VXCVI); - } else if (unlikely(float64_is_nan(farg.d) || isinfinity(farg.d))) { + } else if (unlikely(float64_is_nan(farg.d) || float64_is_infinity(farg.d))) { /* qNan / infinity conversion */ farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXCVI); } else { @@ -1268,7 +1208,7 @@ static always_inline uint64_t do_fri (uint64_t arg, int rounding_mode) if (unlikely(float64_is_signaling_nan(farg.d))) { /* sNaN round */ farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN | POWERPC_EXCP_FP_VXCVI); - } else if (unlikely(float64_is_nan(farg.d) || isinfinity(farg.d))) { + } else if (unlikely(float64_is_nan(farg.d) || float64_is_infinity(farg.d))) { /* qNan / infinity round */ farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXCVI); } else { @@ -1314,6 +1254,10 @@ uint64_t helper_fmadd (uint64_t arg1, uint64_t arg2, uint64_t arg3) float64_is_signaling_nan(farg3.d))) { /* sNaN operation */ farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN); + } else if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) || + (float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) { + /* Multiplication of zero by infinity */ + farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXIMZ); } else { #ifdef FLOAT128 /* This is the way the PowerPC specification defines it */ @@ -1322,9 +1266,15 @@ uint64_t helper_fmadd (uint64_t arg1, uint64_t arg2, uint64_t arg3) ft0_128 = float64_to_float128(farg1.d, &env->fp_status); ft1_128 = float64_to_float128(farg2.d, &env->fp_status); ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status); - ft1_128 = float64_to_float128(farg3.d, &env->fp_status); - ft0_128 = float128_add(ft0_128, ft1_128, &env->fp_status); - farg1.d = float128_to_float64(ft0_128, &env->fp_status); + if (unlikely(float128_is_infinity(ft0_128) && float64_is_infinity(farg3.d) && + float128_is_neg(ft0_128) != float64_is_neg(farg3.d))) { + /* Magnitude subtraction of infinities */ + farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXISI); + } else { + ft1_128 = float64_to_float128(farg3.d, &env->fp_status); + ft0_128 = float128_add(ft0_128, ft1_128, &env->fp_status); + farg1.d = float128_to_float64(ft0_128, &env->fp_status); + } #else /* This is OK on x86 hosts */ farg1.d = (farg1.d * farg2.d) + farg3.d; @@ -1351,6 +1301,10 @@ uint64_t helper_fmsub (uint64_t arg1, uint64_t arg2, uint64_t arg3) float64_is_signaling_nan(farg3.d))) { /* sNaN operation */ farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN); + } else if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) || + (float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) { + /* Multiplication of zero by infinity */ + farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXIMZ); } else { #ifdef FLOAT128 /* This is the way the PowerPC specification defines it */ @@ -1359,9 +1313,15 @@ uint64_t helper_fmsub (uint64_t arg1, uint64_t arg2, uint64_t arg3) ft0_128 = float64_to_float128(farg1.d, &env->fp_status); ft1_128 = float64_to_float128(farg2.d, &env->fp_status); ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status); - ft1_128 = float64_to_float128(farg3.d, &env->fp_status); - ft0_128 = float128_sub(ft0_128, ft1_128, &env->fp_status); - farg1.d = float128_to_float64(ft0_128, &env->fp_status); + if (unlikely(float128_is_infinity(ft0_128) && float64_is_infinity(farg3.d) && + float128_is_neg(ft0_128) == float64_is_neg(farg3.d))) { + /* Magnitude subtraction of infinities */ + farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXISI); + } else { + ft1_128 = float64_to_float128(farg3.d, &env->fp_status); + ft0_128 = float128_sub(ft0_128, ft1_128, &env->fp_status); + farg1.d = float128_to_float64(ft0_128, &env->fp_status); + } #else /* This is OK on x86 hosts */ farg1.d = (farg1.d * farg2.d) - farg3.d; @@ -1388,6 +1348,10 @@ uint64_t helper_fnmadd (uint64_t arg1, uint64_t arg2, uint64_t arg3) float64_is_signaling_nan(farg3.d))) { /* sNaN operation */ farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN); + } else if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) || + (float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) { + /* Multiplication of zero by infinity */ + farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXIMZ); } else { #if USE_PRECISE_EMULATION #ifdef FLOAT128 @@ -1397,9 +1361,15 @@ uint64_t helper_fnmadd (uint64_t arg1, uint64_t arg2, uint64_t arg3) ft0_128 = float64_to_float128(farg1.d, &env->fp_status); ft1_128 = float64_to_float128(farg2.d, &env->fp_status); ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status); - ft1_128 = float64_to_float128(farg3.d, &env->fp_status); - ft0_128 = float128_add(ft0_128, ft1_128, &env->fp_status); - farg1.d= float128_to_float64(ft0_128, &env->fp_status); + if (unlikely(float128_is_infinity(ft0_128) && float64_is_infinity(farg3.d) && + float128_is_neg(ft0_128) != float64_is_neg(farg3.d))) { + /* Magnitude subtraction of infinities */ + farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXISI); + } else { + ft1_128 = float64_to_float128(farg3.d, &env->fp_status); + ft0_128 = float128_add(ft0_128, ft1_128, &env->fp_status); + farg1.d = float128_to_float64(ft0_128, &env->fp_status); + } #else /* This is OK on x86 hosts */ farg1.d = (farg1.d * farg2.d) + farg3.d; @@ -1408,7 +1378,7 @@ uint64_t helper_fnmadd (uint64_t arg1, uint64_t arg2, uint64_t arg3) farg1.d = float64_mul(farg1.d, farg2.d, &env->fp_status); farg1.d = float64_add(farg1.d, farg3.d, &env->fp_status); #endif - if (likely(!isnan(farg1.d))) + if (likely(!float64_is_nan(farg1.d))) farg1.d = float64_chs(farg1.d); } return farg1.ll; @@ -1428,6 +1398,10 @@ uint64_t helper_fnmsub (uint64_t arg1, uint64_t arg2, uint64_t arg3) float64_is_signaling_nan(farg3.d))) { /* sNaN operation */ farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN); + } else if (unlikely((float64_is_infinity(farg1.d) && float64_is_zero(farg2.d)) || + (float64_is_zero(farg1.d) && float64_is_infinity(farg2.d)))) { + /* Multiplication of zero by infinity */ + farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXIMZ); } else { #if USE_PRECISE_EMULATION #ifdef FLOAT128 @@ -1437,9 +1411,15 @@ uint64_t helper_fnmsub (uint64_t arg1, uint64_t arg2, uint64_t arg3) ft0_128 = float64_to_float128(farg1.d, &env->fp_status); ft1_128 = float64_to_float128(farg2.d, &env->fp_status); ft0_128 = float128_mul(ft0_128, ft1_128, &env->fp_status); - ft1_128 = float64_to_float128(farg3.d, &env->fp_status); - ft0_128 = float128_sub(ft0_128, ft1_128, &env->fp_status); - farg1.d = float128_to_float64(ft0_128, &env->fp_status); + if (unlikely(float128_is_infinity(ft0_128) && float64_is_infinity(farg3.d) && + float128_is_neg(ft0_128) == float64_is_neg(farg3.d))) { + /* Magnitude subtraction of infinities */ + farg1.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXISI); + } else { + ft1_128 = float64_to_float128(farg3.d, &env->fp_status); + ft0_128 = float128_sub(ft0_128, ft1_128, &env->fp_status); + farg1.d = float128_to_float64(ft0_128, &env->fp_status); + } #else /* This is OK on x86 hosts */ farg1.d = (farg1.d * farg2.d) - farg3.d; @@ -1448,7 +1428,7 @@ uint64_t helper_fnmsub (uint64_t arg1, uint64_t arg2, uint64_t arg3) farg1.d = float64_mul(farg1.d, farg2.d, &env->fp_status); farg1.d = float64_sub(farg1.d, farg3.d, &env->fp_status); #endif - if (likely(!isnan(farg1.d))) + if (likely(!float64_is_nan(farg1.d))) farg1.d = float64_chs(farg1.d); } return farg1.ll; @@ -1458,6 +1438,7 @@ uint64_t helper_fnmsub (uint64_t arg1, uint64_t arg2, uint64_t arg3) uint64_t helper_frsp (uint64_t arg) { CPU_DoubleU farg; + float32 f32; farg.ll = arg; #if USE_PRECISE_EMULATION @@ -1465,10 +1446,12 @@ uint64_t helper_frsp (uint64_t arg) /* sNaN square root */ farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN); } else { - fard.d = float64_to_float32(farg.d, &env->fp_status); + f32 = float64_to_float32(farg.d, &env->fp_status); + farg.d = float32_to_float64(f32, &env->fp_status); } #else - farg.d = float64_to_float32(farg.d, &env->fp_status); + f32 = float64_to_float32(farg.d, &env->fp_status); + farg.d = float32_to_float64(f32, &env->fp_status); #endif return farg.ll; } @@ -1482,7 +1465,7 @@ uint64_t helper_fsqrt (uint64_t arg) if (unlikely(float64_is_signaling_nan(farg.d))) { /* sNaN square root */ farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN); - } else if (unlikely(fpisneg(farg.d) && !iszero(farg.d))) { + } else if (unlikely(float64_is_neg(farg.d) && !float64_is_zero(farg.d))) { /* Square root of a negative nonzero number */ farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSQRT); } else { @@ -1500,23 +1483,8 @@ uint64_t helper_fre (uint64_t arg) if (unlikely(float64_is_signaling_nan(farg.d))) { /* sNaN reciprocal */ farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN); - } else if (unlikely(iszero(farg.d))) { - /* Zero reciprocal */ - farg.ll = float_zero_divide_excp(1.0, farg.d); - } else if (likely(isnormal(farg.d))) { - farg.d = float64_div(1.0, farg.d, &env->fp_status); } else { - if (farg.ll == 0x8000000000000000ULL) { - farg.ll = 0xFFF0000000000000ULL; - } else if (farg.ll == 0x0000000000000000ULL) { - farg.ll = 0x7FF0000000000000ULL; - } else if (isnan(farg.d)) { - farg.ll = 0x7FF8000000000000ULL; - } else if (fpisneg(farg.d)) { - farg.ll = 0x8000000000000000ULL; - } else { - farg.ll = 0x0000000000000000ULL; - } + farg.d = float64_div(float64_one, farg.d, &env->fp_status); } return farg.d; } @@ -1525,33 +1493,16 @@ uint64_t helper_fre (uint64_t arg) uint64_t helper_fres (uint64_t arg) { CPU_DoubleU farg; + float32 f32; farg.ll = arg; if (unlikely(float64_is_signaling_nan(farg.d))) { /* sNaN reciprocal */ farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN); - } else if (unlikely(iszero(farg.d))) { - /* Zero reciprocal */ - farg.ll = float_zero_divide_excp(1.0, farg.d); - } else if (likely(isnormal(farg.d))) { -#if USE_PRECISE_EMULATION - farg.d = float64_div(1.0, farg.d, &env->fp_status); - farg.d = float64_to_float32(farg.d, &env->fp_status); -#else - farg.d = float32_div(1.0, farg.d, &env->fp_status); -#endif } else { - if (farg.ll == 0x8000000000000000ULL) { - farg.ll = 0xFFF0000000000000ULL; - } else if (farg.ll == 0x0000000000000000ULL) { - farg.ll = 0x7FF0000000000000ULL; - } else if (isnan(farg.d)) { - farg.ll = 0x7FF8000000000000ULL; - } else if (fpisneg(farg.d)) { - farg.ll = 0x8000000000000000ULL; - } else { - farg.ll = 0x0000000000000000ULL; - } + farg.d = float64_div(float64_one, farg.d, &env->fp_status); + f32 = float64_to_float32(farg.d, &env->fp_status); + farg.d = float32_to_float64(f32, &env->fp_status); } return farg.ll; } @@ -1560,29 +1511,20 @@ uint64_t helper_fres (uint64_t arg) uint64_t helper_frsqrte (uint64_t arg) { CPU_DoubleU farg; + float32 f32; farg.ll = arg; if (unlikely(float64_is_signaling_nan(farg.d))) { /* sNaN reciprocal square root */ farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN); - } else if (unlikely(fpisneg(farg.d) && !iszero(farg.d))) { + } else if (unlikely(float64_is_neg(farg.d) && !float64_is_zero(farg.d))) { /* Reciprocal square root of a negative nonzero number */ farg.ll = fload_invalid_op_excp(POWERPC_EXCP_FP_VXSQRT); - } else if (likely(isnormal(farg.d))) { - farg.d = float64_sqrt(farg.d, &env->fp_status); - farg.d = float32_div(1.0, farg.d, &env->fp_status); } else { - if (farg.ll == 0x8000000000000000ULL) { - farg.ll = 0xFFF0000000000000ULL; - } else if (farg.ll == 0x0000000000000000ULL) { - farg.ll = 0x7FF0000000000000ULL; - } else if (isnan(farg.d)) { - farg.ll |= 0x000FFFFFFFFFFFFFULL; - } else if (fpisneg(farg.d)) { - farg.ll = 0x7FF8000000000000ULL; - } else { - farg.ll = 0x0000000000000000ULL; - } + farg.d = float64_sqrt(farg.d, &env->fp_status); + farg.d = float64_div(float64_one, farg.d, &env->fp_status); + f32 = float64_to_float32(farg.d, &env->fp_status); + farg.d = float32_to_float64(f32, &env->fp_status); } return farg.ll; } @@ -1590,44 +1532,46 @@ uint64_t helper_frsqrte (uint64_t arg) /* fsel - fsel. */ uint64_t helper_fsel (uint64_t arg1, uint64_t arg2, uint64_t arg3) { - CPU_DoubleU farg1, farg2, farg3; + CPU_DoubleU farg1; farg1.ll = arg1; - farg2.ll = arg2; - farg3.ll = arg3; - if (!fpisneg(farg1.d) || iszero(farg1.d)) - return farg2.ll; + if ((!float64_is_neg(farg1.d) || float64_is_zero(farg1.d)) && !float64_is_nan(farg1.d)) + return arg2; else - return farg2.ll; + return arg3; } -uint32_t helper_fcmpu (uint64_t arg1, uint64_t arg2) +void helper_fcmpu (uint64_t arg1, uint64_t arg2, uint32_t crfD) { CPU_DoubleU farg1, farg2; uint32_t ret = 0; farg1.ll = arg1; farg2.ll = arg2; - if (unlikely(float64_is_signaling_nan(farg1.d) || - float64_is_signaling_nan(farg2.d))) { - /* sNaN comparison */ - fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN); + if (unlikely(float64_is_nan(farg1.d) || + float64_is_nan(farg2.d))) { + ret = 0x01UL; + } else if (float64_lt(farg1.d, farg2.d, &env->fp_status)) { + ret = 0x08UL; + } else if (!float64_le(farg1.d, farg2.d, &env->fp_status)) { + ret = 0x04UL; } else { - if (float64_lt(farg1.d, farg2.d, &env->fp_status)) { - ret = 0x08UL; - } else if (!float64_le(farg1.d, farg2.d, &env->fp_status)) { - ret = 0x04UL; - } else { - ret = 0x02UL; - } + ret = 0x02UL; } + env->fpscr &= ~(0x0F << FPSCR_FPRF); env->fpscr |= ret << FPSCR_FPRF; - return ret; + env->crf[crfD] = ret; + if (unlikely(ret == 0x01UL + && (float64_is_signaling_nan(farg1.d) || + float64_is_signaling_nan(farg2.d)))) { + /* sNaN comparison */ + fload_invalid_op_excp(POWERPC_EXCP_FP_VXSNAN); + } } -uint32_t helper_fcmpo (uint64_t arg1, uint64_t arg2) +void helper_fcmpo (uint64_t arg1, uint64_t arg2, uint32_t crfD) { CPU_DoubleU farg1, farg2; uint32_t ret = 0; @@ -1636,6 +1580,19 @@ uint32_t helper_fcmpo (uint64_t arg1, uint64_t arg2) if (unlikely(float64_is_nan(farg1.d) || float64_is_nan(farg2.d))) { + ret = 0x01UL; + } else if (float64_lt(farg1.d, farg2.d, &env->fp_status)) { + ret = 0x08UL; + } else if (!float64_le(farg1.d, farg2.d, &env->fp_status)) { + ret = 0x04UL; + } else { + ret = 0x02UL; + } + + env->fpscr &= ~(0x0F << FPSCR_FPRF); + env->fpscr |= ret << FPSCR_FPRF; + env->crf[crfD] = ret; + if (unlikely (ret == 0x01UL)) { if (float64_is_signaling_nan(farg1.d) || float64_is_signaling_nan(farg2.d)) { /* sNaN comparison */ @@ -1645,18 +1602,7 @@ uint32_t helper_fcmpo (uint64_t arg1, uint64_t arg2) /* qNaN comparison */ fload_invalid_op_excp(POWERPC_EXCP_FP_VXVC); } - } else { - if (float64_lt(farg1.d, farg2.d, &env->fp_status)) { - ret = 0x08UL; - } else if (!float64_le(farg1.d, farg2.d, &env->fp_status)) { - ret = 0x04UL; - } else { - ret = 0x02UL; - } } - env->fpscr &= ~(0x0F << FPSCR_FPRF); - env->fpscr |= ret << FPSCR_FPRF; - return ret; } #if !defined (CONFIG_USER_ONLY) @@ -1701,20 +1647,20 @@ static always_inline void do_rfi (target_ulong nip, target_ulong msr, void helper_rfi (void) { do_rfi(env->spr[SPR_SRR0], env->spr[SPR_SRR1], - ~((target_ulong)0xFFFF0000), 1); + ~((target_ulong)0x0), 1); } #if defined(TARGET_PPC64) void helper_rfid (void) { do_rfi(env->spr[SPR_SRR0], env->spr[SPR_SRR1], - ~((target_ulong)0xFFFF0000), 0); + ~((target_ulong)0x0), 0); } void helper_hrfid (void) { do_rfi(env->spr[SPR_HSRR0], env->spr[SPR_HSRR1], - ~((target_ulong)0xFFFF0000), 0); + ~((target_ulong)0x0), 0); } #endif #endif @@ -1863,30 +1809,16 @@ void helper_rfsvc (void) /* 602 specific instructions */ /* mfrom is the most crazy instruction ever seen, imho ! */ /* Real implementation uses a ROM table. Do the same */ +/* Extremly decomposed: + * -arg / 256 + * return 256 * log10(10 + 1.0) + 0.5 + */ #if !defined (CONFIG_USER_ONLY) -#define USE_MFROM_ROM_TABLE target_ulong helper_602_mfrom (target_ulong arg) { if (likely(arg < 602)) { -#if defined(USE_MFROM_ROM_TABLE) #include "mfrom_table.c" return mfrom_ROM_table[arg]; -#else - double d; - /* Extremly decomposed: - * -arg / 256 - * return 256 * log10(10 + 1.0) + 0.5 - */ - d = arg; - d = float64_div(d, 256, &env->fp_status); - d = float64_chs(d); - d = exp10(d); // XXX: use float emulation function - d = float64_add(d, 1.0, &env->fp_status); - d = log10(d); // XXX: use float emulation function - d = float64_mul(d, 256, &env->fp_status); - d = float64_add(d, 0.5, &env->fp_status); - return float64_round_to_int(d, &env->fp_status); -#endif } else { return 0; } @@ -1902,15 +1834,11 @@ target_ulong helper_load_dcr (target_ulong dcrn) target_ulong val = 0; if (unlikely(env->dcr_env == NULL)) { - if (loglevel != 0) { - fprintf(logfile, "No DCR environment\n"); - } + qemu_log("No DCR environment\n"); helper_raise_exception_err(POWERPC_EXCP_PROGRAM, POWERPC_EXCP_INVAL | POWERPC_EXCP_INVAL_INVAL); } else if (unlikely(ppc_dcr_read(env->dcr_env, dcrn, &val) != 0)) { - if (loglevel != 0) { - fprintf(logfile, "DCR read error %d %03x\n", (int)dcrn, (int)dcrn); - } + qemu_log("DCR read error %d %03x\n", (int)dcrn, (int)dcrn); helper_raise_exception_err(POWERPC_EXCP_PROGRAM, POWERPC_EXCP_INVAL | POWERPC_EXCP_PRIV_REG); } @@ -1920,15 +1848,11 @@ target_ulong helper_load_dcr (target_ulong dcrn) void helper_store_dcr (target_ulong dcrn, target_ulong val) { if (unlikely(env->dcr_env == NULL)) { - if (loglevel != 0) { - fprintf(logfile, "No DCR environment\n"); - } + qemu_log("No DCR environment\n"); helper_raise_exception_err(POWERPC_EXCP_PROGRAM, POWERPC_EXCP_INVAL | POWERPC_EXCP_INVAL_INVAL); } else if (unlikely(ppc_dcr_write(env->dcr_env, dcrn, val) != 0)) { - if (loglevel != 0) { - fprintf(logfile, "DCR write error %d %03x\n", (int)dcrn, (int)dcrn); - } + qemu_log("DCR write error %d %03x\n", (int)dcrn, (int)dcrn); helper_raise_exception_err(POWERPC_EXCP_PROGRAM, POWERPC_EXCP_INVAL | POWERPC_EXCP_PRIV_REG); } @@ -1997,6 +1921,1128 @@ target_ulong helper_dlmzb (target_ulong high, target_ulong low, uint32_t update_ } /*****************************************************************************/ +/* Altivec extension helpers */ +#if defined(WORDS_BIGENDIAN) +#define HI_IDX 0 +#define LO_IDX 1 +#else +#define HI_IDX 1 +#define LO_IDX 0 +#endif + +#if defined(WORDS_BIGENDIAN) +#define VECTOR_FOR_INORDER_I(index, element) \ + for (index = 0; index < ARRAY_SIZE(r->element); index++) +#else +#define VECTOR_FOR_INORDER_I(index, element) \ + for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--) +#endif + +/* If X is a NaN, store the corresponding QNaN into RESULT. Otherwise, + * execute the following block. */ +#define DO_HANDLE_NAN(result, x) \ + if (float32_is_nan(x) || float32_is_signaling_nan(x)) { \ + CPU_FloatU __f; \ + __f.f = x; \ + __f.l = __f.l | (1 << 22); /* Set QNaN bit. */ \ + result = __f.f; \ + } else + +#define HANDLE_NAN1(result, x) \ + DO_HANDLE_NAN(result, x) +#define HANDLE_NAN2(result, x, y) \ + DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y) +#define HANDLE_NAN3(result, x, y, z) \ + DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y) DO_HANDLE_NAN(result, z) + +/* Saturating arithmetic helpers. */ +#define SATCVT(from, to, from_type, to_type, min, max, use_min, use_max) \ + static always_inline to_type cvt##from##to (from_type x, int *sat) \ + { \ + to_type r; \ + if (use_min && x < min) { \ + r = min; \ + *sat = 1; \ + } else if (use_max && x > max) { \ + r = max; \ + *sat = 1; \ + } else { \ + r = x; \ + } \ + return r; \ + } +SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX, 1, 1) +SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX, 1, 1) +SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX, 1, 1) +SATCVT(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX, 0, 1) +SATCVT(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX, 0, 1) +SATCVT(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX, 0, 1) +SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX, 1, 1) +SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX, 1, 1) +SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX, 1, 1) +#undef SATCVT + +#define LVE(name, access, swap, element) \ + void helper_##name (ppc_avr_t *r, target_ulong addr) \ + { \ + size_t n_elems = ARRAY_SIZE(r->element); \ + int adjust = HI_IDX*(n_elems-1); \ + int sh = sizeof(r->element[0]) >> 1; \ + int index = (addr & 0xf) >> sh; \ + if(msr_le) { \ + r->element[LO_IDX ? index : (adjust - index)] = swap(access(addr)); \ + } else { \ + r->element[LO_IDX ? index : (adjust - index)] = access(addr); \ + } \ + } +#define I(x) (x) +LVE(lvebx, ldub, I, u8) +LVE(lvehx, lduw, bswap16, u16) +LVE(lvewx, ldl, bswap32, u32) +#undef I +#undef LVE + +void helper_lvsl (ppc_avr_t *r, target_ulong sh) +{ + int i, j = (sh & 0xf); + + VECTOR_FOR_INORDER_I (i, u8) { + r->u8[i] = j++; + } +} + +void helper_lvsr (ppc_avr_t *r, target_ulong sh) +{ + int i, j = 0x10 - (sh & 0xf); + + VECTOR_FOR_INORDER_I (i, u8) { + r->u8[i] = j++; + } +} + +#define STVE(name, access, swap, element) \ + void helper_##name (ppc_avr_t *r, target_ulong addr) \ + { \ + size_t n_elems = ARRAY_SIZE(r->element); \ + int adjust = HI_IDX*(n_elems-1); \ + int sh = sizeof(r->element[0]) >> 1; \ + int index = (addr & 0xf) >> sh; \ + if(msr_le) { \ + access(addr, swap(r->element[LO_IDX ? index : (adjust - index)])); \ + } else { \ + access(addr, r->element[LO_IDX ? index : (adjust - index)]); \ + } \ + } +#define I(x) (x) +STVE(stvebx, stb, I, u8) +STVE(stvehx, stw, bswap16, u16) +STVE(stvewx, stl, bswap32, u32) +#undef I +#undef LVE + +void helper_mtvscr (ppc_avr_t *r) +{ +#if defined(WORDS_BIGENDIAN) + env->vscr = r->u32[3]; +#else + env->vscr = r->u32[0]; +#endif + set_flush_to_zero(vscr_nj, &env->vec_status); +} + +void helper_vaddcuw (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int i; + for (i = 0; i < ARRAY_SIZE(r->u32); i++) { + r->u32[i] = ~a->u32[i] < b->u32[i]; + } +} + +#define VARITH_DO(name, op, element) \ +void helper_v##name (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ +{ \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + r->element[i] = a->element[i] op b->element[i]; \ + } \ +} +#define VARITH(suffix, element) \ + VARITH_DO(add##suffix, +, element) \ + VARITH_DO(sub##suffix, -, element) +VARITH(ubm, u8) +VARITH(uhm, u16) +VARITH(uwm, u32) +#undef VARITH_DO +#undef VARITH + +#define VARITHFP(suffix, func) \ + void helper_v##suffix (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ + HANDLE_NAN2(r->f[i], a->f[i], b->f[i]) { \ + r->f[i] = func(a->f[i], b->f[i], &env->vec_status); \ + } \ + } \ + } +VARITHFP(addfp, float32_add) +VARITHFP(subfp, float32_sub) +#undef VARITHFP + +#define VARITHSAT_CASE(type, op, cvt, element) \ + { \ + type result = (type)a->element[i] op (type)b->element[i]; \ + r->element[i] = cvt(result, &sat); \ + } + +#define VARITHSAT_DO(name, op, optype, cvt, element) \ + void helper_v##name (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int sat = 0; \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + switch (sizeof(r->element[0])) { \ + case 1: VARITHSAT_CASE(optype, op, cvt, element); break; \ + case 2: VARITHSAT_CASE(optype, op, cvt, element); break; \ + case 4: VARITHSAT_CASE(optype, op, cvt, element); break; \ + } \ + } \ + if (sat) { \ + env->vscr |= (1 << VSCR_SAT); \ + } \ + } +#define VARITHSAT_SIGNED(suffix, element, optype, cvt) \ + VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element) \ + VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element) +#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt) \ + VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element) \ + VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element) +VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb) +VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh) +VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw) +VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub) +VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh) +VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw) +#undef VARITHSAT_CASE +#undef VARITHSAT_DO +#undef VARITHSAT_SIGNED +#undef VARITHSAT_UNSIGNED + +#define VAVG_DO(name, element, etype) \ + void helper_v##name (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + etype x = (etype)a->element[i] + (etype)b->element[i] + 1; \ + r->element[i] = x >> 1; \ + } \ + } + +#define VAVG(type, signed_element, signed_type, unsigned_element, unsigned_type) \ + VAVG_DO(avgs##type, signed_element, signed_type) \ + VAVG_DO(avgu##type, unsigned_element, unsigned_type) +VAVG(b, s8, int16_t, u8, uint16_t) +VAVG(h, s16, int32_t, u16, uint32_t) +VAVG(w, s32, int64_t, u32, uint64_t) +#undef VAVG_DO +#undef VAVG + +#define VCF(suffix, cvt, element) \ + void helper_vcf##suffix (ppc_avr_t *r, ppc_avr_t *b, uint32_t uim) \ + { \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ + float32 t = cvt(b->element[i], &env->vec_status); \ + r->f[i] = float32_scalbn (t, -uim, &env->vec_status); \ + } \ + } +VCF(ux, uint32_to_float32, u32) +VCF(sx, int32_to_float32, s32) +#undef VCF + +#define VCMP_DO(suffix, compare, element, record) \ + void helper_vcmp##suffix (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + uint32_t ones = (uint32_t)-1; \ + uint32_t all = ones; \ + uint32_t none = 0; \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + uint32_t result = (a->element[i] compare b->element[i] ? ones : 0x0); \ + switch (sizeof (a->element[0])) { \ + case 4: r->u32[i] = result; break; \ + case 2: r->u16[i] = result; break; \ + case 1: r->u8[i] = result; break; \ + } \ + all &= result; \ + none |= result; \ + } \ + if (record) { \ + env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ + } \ + } +#define VCMP(suffix, compare, element) \ + VCMP_DO(suffix, compare, element, 0) \ + VCMP_DO(suffix##_dot, compare, element, 1) +VCMP(equb, ==, u8) +VCMP(equh, ==, u16) +VCMP(equw, ==, u32) +VCMP(gtub, >, u8) +VCMP(gtuh, >, u16) +VCMP(gtuw, >, u32) +VCMP(gtsb, >, s8) +VCMP(gtsh, >, s16) +VCMP(gtsw, >, s32) +#undef VCMP_DO +#undef VCMP + +#define VCMPFP_DO(suffix, compare, order, record) \ + void helper_vcmp##suffix (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + uint32_t ones = (uint32_t)-1; \ + uint32_t all = ones; \ + uint32_t none = 0; \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ + uint32_t result; \ + int rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status); \ + if (rel == float_relation_unordered) { \ + result = 0; \ + } else if (rel compare order) { \ + result = ones; \ + } else { \ + result = 0; \ + } \ + r->u32[i] = result; \ + all &= result; \ + none |= result; \ + } \ + if (record) { \ + env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1); \ + } \ + } +#define VCMPFP(suffix, compare, order) \ + VCMPFP_DO(suffix, compare, order, 0) \ + VCMPFP_DO(suffix##_dot, compare, order, 1) +VCMPFP(eqfp, ==, float_relation_equal) +VCMPFP(gefp, !=, float_relation_less) +VCMPFP(gtfp, ==, float_relation_greater) +#undef VCMPFP_DO +#undef VCMPFP + +static always_inline void vcmpbfp_internal (ppc_avr_t *r, ppc_avr_t *a, + ppc_avr_t *b, int record) +{ + int i; + int all_in = 0; + for (i = 0; i < ARRAY_SIZE(r->f); i++) { + int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status); + if (le_rel == float_relation_unordered) { + r->u32[i] = 0xc0000000; + /* ALL_IN does not need to be updated here. */ + } else { + float32 bneg = float32_chs(b->f[i]); + int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status); + int le = le_rel != float_relation_greater; + int ge = ge_rel != float_relation_less; + r->u32[i] = ((!le) << 31) | ((!ge) << 30); + all_in |= (!le | !ge); + } + } + if (record) { + env->crf[6] = (all_in == 0) << 1; + } +} + +void helper_vcmpbfp (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + vcmpbfp_internal(r, a, b, 0); +} + +void helper_vcmpbfp_dot (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + vcmpbfp_internal(r, a, b, 1); +} + +#define VCT(suffix, satcvt, element) \ + void helper_vct##suffix (ppc_avr_t *r, ppc_avr_t *b, uint32_t uim) \ + { \ + int i; \ + int sat = 0; \ + float_status s = env->vec_status; \ + set_float_rounding_mode(float_round_to_zero, &s); \ + for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ + if (float32_is_nan(b->f[i]) || \ + float32_is_signaling_nan(b->f[i])) { \ + r->element[i] = 0; \ + } else { \ + float64 t = float32_to_float64(b->f[i], &s); \ + int64_t j; \ + t = float64_scalbn(t, uim, &s); \ + j = float64_to_int64(t, &s); \ + r->element[i] = satcvt(j, &sat); \ + } \ + } \ + if (sat) { \ + env->vscr |= (1 << VSCR_SAT); \ + } \ + } +VCT(uxs, cvtsduw, u32) +VCT(sxs, cvtsdsw, s32) +#undef VCT + +void helper_vmaddfp (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + int i; + for (i = 0; i < ARRAY_SIZE(r->f); i++) { + HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) { + /* Need to do the computation in higher precision and round + * once at the end. */ + float64 af, bf, cf, t; + af = float32_to_float64(a->f[i], &env->vec_status); + bf = float32_to_float64(b->f[i], &env->vec_status); + cf = float32_to_float64(c->f[i], &env->vec_status); + t = float64_mul(af, cf, &env->vec_status); + t = float64_add(t, bf, &env->vec_status); + r->f[i] = float64_to_float32(t, &env->vec_status); + } + } +} + +void helper_vmhaddshs (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + int sat = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(r->s16); i++) { + int32_t prod = a->s16[i] * b->s16[i]; + int32_t t = (int32_t)c->s16[i] + (prod >> 15); + r->s16[i] = cvtswsh (t, &sat); + } + + if (sat) { + env->vscr |= (1 << VSCR_SAT); + } +} + +void helper_vmhraddshs (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + int sat = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(r->s16); i++) { + int32_t prod = a->s16[i] * b->s16[i] + 0x00004000; + int32_t t = (int32_t)c->s16[i] + (prod >> 15); + r->s16[i] = cvtswsh (t, &sat); + } + + if (sat) { + env->vscr |= (1 << VSCR_SAT); + } +} + +#define VMINMAX_DO(name, compare, element) \ + void helper_v##name (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + if (a->element[i] compare b->element[i]) { \ + r->element[i] = b->element[i]; \ + } else { \ + r->element[i] = a->element[i]; \ + } \ + } \ + } +#define VMINMAX(suffix, element) \ + VMINMAX_DO(min##suffix, >, element) \ + VMINMAX_DO(max##suffix, <, element) +VMINMAX(sb, s8) +VMINMAX(sh, s16) +VMINMAX(sw, s32) +VMINMAX(ub, u8) +VMINMAX(uh, u16) +VMINMAX(uw, u32) +#undef VMINMAX_DO +#undef VMINMAX + +#define VMINMAXFP(suffix, rT, rF) \ + void helper_v##suffix (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ + HANDLE_NAN2(r->f[i], a->f[i], b->f[i]) { \ + if (float32_lt_quiet(a->f[i], b->f[i], &env->vec_status)) { \ + r->f[i] = rT->f[i]; \ + } else { \ + r->f[i] = rF->f[i]; \ + } \ + } \ + } \ + } +VMINMAXFP(minfp, a, b) +VMINMAXFP(maxfp, b, a) +#undef VMINMAXFP + +void helper_vmladduhm (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + int i; + for (i = 0; i < ARRAY_SIZE(r->s16); i++) { + int32_t prod = a->s16[i] * b->s16[i]; + r->s16[i] = (int16_t) (prod + c->s16[i]); + } +} + +#define VMRG_DO(name, element, highp) \ + void helper_v##name (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + ppc_avr_t result; \ + int i; \ + size_t n_elems = ARRAY_SIZE(r->element); \ + for (i = 0; i < n_elems/2; i++) { \ + if (highp) { \ + result.element[i*2+HI_IDX] = a->element[i]; \ + result.element[i*2+LO_IDX] = b->element[i]; \ + } else { \ + result.element[n_elems - i*2 - (1+HI_IDX)] = b->element[n_elems - i - 1]; \ + result.element[n_elems - i*2 - (1+LO_IDX)] = a->element[n_elems - i - 1]; \ + } \ + } \ + *r = result; \ + } +#if defined(WORDS_BIGENDIAN) +#define MRGHI 0 +#define MRGLO 1 +#else +#define MRGHI 1 +#define MRGLO 0 +#endif +#define VMRG(suffix, element) \ + VMRG_DO(mrgl##suffix, element, MRGHI) \ + VMRG_DO(mrgh##suffix, element, MRGLO) +VMRG(b, u8) +VMRG(h, u16) +VMRG(w, u32) +#undef VMRG_DO +#undef VMRG +#undef MRGHI +#undef MRGLO + +void helper_vmsummbm (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + int32_t prod[16]; + int i; + + for (i = 0; i < ARRAY_SIZE(r->s8); i++) { + prod[i] = (int32_t)a->s8[i] * b->u8[i]; + } + + VECTOR_FOR_INORDER_I(i, s32) { + r->s32[i] = c->s32[i] + prod[4*i] + prod[4*i+1] + prod[4*i+2] + prod[4*i+3]; + } +} + +void helper_vmsumshm (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + int32_t prod[8]; + int i; + + for (i = 0; i < ARRAY_SIZE(r->s16); i++) { + prod[i] = a->s16[i] * b->s16[i]; + } + + VECTOR_FOR_INORDER_I(i, s32) { + r->s32[i] = c->s32[i] + prod[2*i] + prod[2*i+1]; + } +} + +void helper_vmsumshs (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + int32_t prod[8]; + int i; + int sat = 0; + + for (i = 0; i < ARRAY_SIZE(r->s16); i++) { + prod[i] = (int32_t)a->s16[i] * b->s16[i]; + } + + VECTOR_FOR_INORDER_I (i, s32) { + int64_t t = (int64_t)c->s32[i] + prod[2*i] + prod[2*i+1]; + r->u32[i] = cvtsdsw(t, &sat); + } + + if (sat) { + env->vscr |= (1 << VSCR_SAT); + } +} + +void helper_vmsumubm (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + uint16_t prod[16]; + int i; + + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { + prod[i] = a->u8[i] * b->u8[i]; + } + + VECTOR_FOR_INORDER_I(i, u32) { + r->u32[i] = c->u32[i] + prod[4*i] + prod[4*i+1] + prod[4*i+2] + prod[4*i+3]; + } +} + +void helper_vmsumuhm (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + uint32_t prod[8]; + int i; + + for (i = 0; i < ARRAY_SIZE(r->u16); i++) { + prod[i] = a->u16[i] * b->u16[i]; + } + + VECTOR_FOR_INORDER_I(i, u32) { + r->u32[i] = c->u32[i] + prod[2*i] + prod[2*i+1]; + } +} + +void helper_vmsumuhs (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + uint32_t prod[8]; + int i; + int sat = 0; + + for (i = 0; i < ARRAY_SIZE(r->u16); i++) { + prod[i] = a->u16[i] * b->u16[i]; + } + + VECTOR_FOR_INORDER_I (i, s32) { + uint64_t t = (uint64_t)c->u32[i] + prod[2*i] + prod[2*i+1]; + r->u32[i] = cvtuduw(t, &sat); + } + + if (sat) { + env->vscr |= (1 << VSCR_SAT); + } +} + +#define VMUL_DO(name, mul_element, prod_element, evenp) \ + void helper_v##name (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int i; \ + VECTOR_FOR_INORDER_I(i, prod_element) { \ + if (evenp) { \ + r->prod_element[i] = a->mul_element[i*2+HI_IDX] * b->mul_element[i*2+HI_IDX]; \ + } else { \ + r->prod_element[i] = a->mul_element[i*2+LO_IDX] * b->mul_element[i*2+LO_IDX]; \ + } \ + } \ + } +#define VMUL(suffix, mul_element, prod_element) \ + VMUL_DO(mule##suffix, mul_element, prod_element, 1) \ + VMUL_DO(mulo##suffix, mul_element, prod_element, 0) +VMUL(sb, s8, s16) +VMUL(sh, s16, s32) +VMUL(ub, u8, u16) +VMUL(uh, u16, u32) +#undef VMUL_DO +#undef VMUL + +void helper_vnmsubfp (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + int i; + for (i = 0; i < ARRAY_SIZE(r->f); i++) { + HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) { + /* Need to do the computation is higher precision and round + * once at the end. */ + float64 af, bf, cf, t; + af = float32_to_float64(a->f[i], &env->vec_status); + bf = float32_to_float64(b->f[i], &env->vec_status); + cf = float32_to_float64(c->f[i], &env->vec_status); + t = float64_mul(af, cf, &env->vec_status); + t = float64_sub(t, bf, &env->vec_status); + t = float64_chs(t); + r->f[i] = float64_to_float32(t, &env->vec_status); + } + } +} + +void helper_vperm (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + ppc_avr_t result; + int i; + VECTOR_FOR_INORDER_I (i, u8) { + int s = c->u8[i] & 0x1f; +#if defined(WORDS_BIGENDIAN) + int index = s & 0xf; +#else + int index = 15 - (s & 0xf); +#endif + if (s & 0x10) { + result.u8[i] = b->u8[index]; + } else { + result.u8[i] = a->u8[index]; + } + } + *r = result; +} + +#if defined(WORDS_BIGENDIAN) +#define PKBIG 1 +#else +#define PKBIG 0 +#endif +void helper_vpkpx (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int i, j; + ppc_avr_t result; +#if defined(WORDS_BIGENDIAN) + const ppc_avr_t *x[2] = { a, b }; +#else + const ppc_avr_t *x[2] = { b, a }; +#endif + + VECTOR_FOR_INORDER_I (i, u64) { + VECTOR_FOR_INORDER_I (j, u32){ + uint32_t e = x[i]->u32[j]; + result.u16[4*i+j] = (((e >> 9) & 0xfc00) | + ((e >> 6) & 0x3e0) | + ((e >> 3) & 0x1f)); + } + } + *r = result; +} + +#define VPK(suffix, from, to, cvt, dosat) \ + void helper_vpk##suffix (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int i; \ + int sat = 0; \ + ppc_avr_t result; \ + ppc_avr_t *a0 = PKBIG ? a : b; \ + ppc_avr_t *a1 = PKBIG ? b : a; \ + VECTOR_FOR_INORDER_I (i, from) { \ + result.to[i] = cvt(a0->from[i], &sat); \ + result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat); \ + } \ + *r = result; \ + if (dosat && sat) { \ + env->vscr |= (1 << VSCR_SAT); \ + } \ + } +#define I(x, y) (x) +VPK(shss, s16, s8, cvtshsb, 1) +VPK(shus, s16, u8, cvtshub, 1) +VPK(swss, s32, s16, cvtswsh, 1) +VPK(swus, s32, u16, cvtswuh, 1) +VPK(uhus, u16, u8, cvtuhub, 1) +VPK(uwus, u32, u16, cvtuwuh, 1) +VPK(uhum, u16, u8, I, 0) +VPK(uwum, u32, u16, I, 0) +#undef I +#undef VPK +#undef PKBIG + +void helper_vrefp (ppc_avr_t *r, ppc_avr_t *b) +{ + int i; + for (i = 0; i < ARRAY_SIZE(r->f); i++) { + HANDLE_NAN1(r->f[i], b->f[i]) { + r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status); + } + } +} + +#define VRFI(suffix, rounding) \ + void helper_vrfi##suffix (ppc_avr_t *r, ppc_avr_t *b) \ + { \ + int i; \ + float_status s = env->vec_status; \ + set_float_rounding_mode(rounding, &s); \ + for (i = 0; i < ARRAY_SIZE(r->f); i++) { \ + HANDLE_NAN1(r->f[i], b->f[i]) { \ + r->f[i] = float32_round_to_int (b->f[i], &s); \ + } \ + } \ + } +VRFI(n, float_round_nearest_even) +VRFI(m, float_round_down) +VRFI(p, float_round_up) +VRFI(z, float_round_to_zero) +#undef VRFI + +#define VROTATE(suffix, element) \ + void helper_vrl##suffix (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + unsigned int mask = ((1 << (3 + (sizeof (a->element[0]) >> 1))) - 1); \ + unsigned int shift = b->element[i] & mask; \ + r->element[i] = (a->element[i] << shift) | (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \ + } \ + } +VROTATE(b, u8) +VROTATE(h, u16) +VROTATE(w, u32) +#undef VROTATE + +void helper_vrsqrtefp (ppc_avr_t *r, ppc_avr_t *b) +{ + int i; + for (i = 0; i < ARRAY_SIZE(r->f); i++) { + HANDLE_NAN1(r->f[i], b->f[i]) { + float32 t = float32_sqrt(b->f[i], &env->vec_status); + r->f[i] = float32_div(float32_one, t, &env->vec_status); + } + } +} + +void helper_vsel (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) +{ + r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]); + r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]); +} + +void helper_vlogefp (ppc_avr_t *r, ppc_avr_t *b) +{ + int i; + for (i = 0; i < ARRAY_SIZE(r->f); i++) { + HANDLE_NAN1(r->f[i], b->f[i]) { + r->f[i] = float32_log2(b->f[i], &env->vec_status); + } + } +} + +#if defined(WORDS_BIGENDIAN) +#define LEFT 0 +#define RIGHT 1 +#else +#define LEFT 1 +#define RIGHT 0 +#endif +/* The specification says that the results are undefined if all of the + * shift counts are not identical. We check to make sure that they are + * to conform to what real hardware appears to do. */ +#define VSHIFT(suffix, leftp) \ + void helper_vs##suffix (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int shift = b->u8[LO_IDX*15] & 0x7; \ + int doit = 1; \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { \ + doit = doit && ((b->u8[i] & 0x7) == shift); \ + } \ + if (doit) { \ + if (shift == 0) { \ + *r = *a; \ + } else if (leftp) { \ + uint64_t carry = a->u64[LO_IDX] >> (64 - shift); \ + r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry; \ + r->u64[LO_IDX] = a->u64[LO_IDX] << shift; \ + } else { \ + uint64_t carry = a->u64[HI_IDX] << (64 - shift); \ + r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry; \ + r->u64[HI_IDX] = a->u64[HI_IDX] >> shift; \ + } \ + } \ + } +VSHIFT(l, LEFT) +VSHIFT(r, RIGHT) +#undef VSHIFT +#undef LEFT +#undef RIGHT + +#define VSL(suffix, element) \ + void helper_vsl##suffix (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + unsigned int mask = ((1 << (3 + (sizeof (a->element[0]) >> 1))) - 1); \ + unsigned int shift = b->element[i] & mask; \ + r->element[i] = a->element[i] << shift; \ + } \ + } +VSL(b, u8) +VSL(h, u16) +VSL(w, u32) +#undef VSL + +void helper_vsldoi (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift) +{ + int sh = shift & 0xf; + int i; + ppc_avr_t result; + +#if defined(WORDS_BIGENDIAN) + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { + int index = sh + i; + if (index > 0xf) { + result.u8[i] = b->u8[index-0x10]; + } else { + result.u8[i] = a->u8[index]; + } + } +#else + for (i = 0; i < ARRAY_SIZE(r->u8); i++) { + int index = (16 - sh) + i; + if (index > 0xf) { + result.u8[i] = a->u8[index-0x10]; + } else { + result.u8[i] = b->u8[index]; + } + } +#endif + *r = result; +} + +void helper_vslo (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf; + +#if defined (WORDS_BIGENDIAN) + memmove (&r->u8[0], &a->u8[sh], 16-sh); + memset (&r->u8[16-sh], 0, sh); +#else + memmove (&r->u8[sh], &a->u8[0], 16-sh); + memset (&r->u8[0], 0, sh); +#endif +} + +/* Experimental testing shows that hardware masks the immediate. */ +#define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1)) +#if defined(WORDS_BIGENDIAN) +#define SPLAT_ELEMENT(element) _SPLAT_MASKED(element) +#else +#define SPLAT_ELEMENT(element) (ARRAY_SIZE(r->element)-1 - _SPLAT_MASKED(element)) +#endif +#define VSPLT(suffix, element) \ + void helper_vsplt##suffix (ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \ + { \ + uint32_t s = b->element[SPLAT_ELEMENT(element)]; \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + r->element[i] = s; \ + } \ + } +VSPLT(b, u8) +VSPLT(h, u16) +VSPLT(w, u32) +#undef VSPLT +#undef SPLAT_ELEMENT +#undef _SPLAT_MASKED + +#define VSPLTI(suffix, element, splat_type) \ + void helper_vspltis##suffix (ppc_avr_t *r, uint32_t splat) \ + { \ + splat_type x = (int8_t)(splat << 3) >> 3; \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + r->element[i] = x; \ + } \ + } +VSPLTI(b, s8, int8_t) +VSPLTI(h, s16, int16_t) +VSPLTI(w, s32, int32_t) +#undef VSPLTI + +#define VSR(suffix, element) \ + void helper_vsr##suffix (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ + { \ + int i; \ + for (i = 0; i < ARRAY_SIZE(r->element); i++) { \ + unsigned int mask = ((1 << (3 + (sizeof (a->element[0]) >> 1))) - 1); \ + unsigned int shift = b->element[i] & mask; \ + r->element[i] = a->element[i] >> shift; \ + } \ + } +VSR(ab, s8) +VSR(ah, s16) +VSR(aw, s32) +VSR(b, u8) +VSR(h, u16) +VSR(w, u32) +#undef VSR + +void helper_vsro (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf; + +#if defined (WORDS_BIGENDIAN) + memmove (&r->u8[sh], &a->u8[0], 16-sh); + memset (&r->u8[0], 0, sh); +#else + memmove (&r->u8[0], &a->u8[sh], 16-sh); + memset (&r->u8[16-sh], 0, sh); +#endif +} + +void helper_vsubcuw (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int i; + for (i = 0; i < ARRAY_SIZE(r->u32); i++) { + r->u32[i] = a->u32[i] >= b->u32[i]; + } +} + +void helper_vsumsws (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int64_t t; + int i, upper; + ppc_avr_t result; + int sat = 0; + +#if defined(WORDS_BIGENDIAN) + upper = ARRAY_SIZE(r->s32)-1; +#else + upper = 0; +#endif + t = (int64_t)b->s32[upper]; + for (i = 0; i < ARRAY_SIZE(r->s32); i++) { + t += a->s32[i]; + result.s32[i] = 0; + } + result.s32[upper] = cvtsdsw(t, &sat); + *r = result; + + if (sat) { + env->vscr |= (1 << VSCR_SAT); + } +} + +void helper_vsum2sws (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int i, j, upper; + ppc_avr_t result; + int sat = 0; + +#if defined(WORDS_BIGENDIAN) + upper = 1; +#else + upper = 0; +#endif + for (i = 0; i < ARRAY_SIZE(r->u64); i++) { + int64_t t = (int64_t)b->s32[upper+i*2]; + result.u64[i] = 0; + for (j = 0; j < ARRAY_SIZE(r->u64); j++) { + t += a->s32[2*i+j]; + } + result.s32[upper+i*2] = cvtsdsw(t, &sat); + } + + *r = result; + if (sat) { + env->vscr |= (1 << VSCR_SAT); + } +} + +void helper_vsum4sbs (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int i, j; + int sat = 0; + + for (i = 0; i < ARRAY_SIZE(r->s32); i++) { + int64_t t = (int64_t)b->s32[i]; + for (j = 0; j < ARRAY_SIZE(r->s32); j++) { + t += a->s8[4*i+j]; + } + r->s32[i] = cvtsdsw(t, &sat); + } + + if (sat) { + env->vscr |= (1 << VSCR_SAT); + } +} + +void helper_vsum4shs (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int sat = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(r->s32); i++) { + int64_t t = (int64_t)b->s32[i]; + t += a->s16[2*i] + a->s16[2*i+1]; + r->s32[i] = cvtsdsw(t, &sat); + } + + if (sat) { + env->vscr |= (1 << VSCR_SAT); + } +} + +void helper_vsum4ubs (ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) +{ + int i, j; + int sat = 0; + + for (i = 0; i < ARRAY_SIZE(r->u32); i++) { + uint64_t t = (uint64_t)b->u32[i]; + for (j = 0; j < ARRAY_SIZE(r->u32); j++) { + t += a->u8[4*i+j]; + } + r->u32[i] = cvtuduw(t, &sat); + } + + if (sat) { + env->vscr |= (1 << VSCR_SAT); + } +} + +#if defined(WORDS_BIGENDIAN) +#define UPKHI 1 +#define UPKLO 0 +#else +#define UPKHI 0 +#define UPKLO 1 +#endif +#define VUPKPX(suffix, hi) \ + void helper_vupk##suffix (ppc_avr_t *r, ppc_avr_t *b) \ + { \ + int i; \ + ppc_avr_t result; \ + for (i = 0; i < ARRAY_SIZE(r->u32); i++) { \ + uint16_t e = b->u16[hi ? i : i+4]; \ + uint8_t a = (e >> 15) ? 0xff : 0; \ + uint8_t r = (e >> 10) & 0x1f; \ + uint8_t g = (e >> 5) & 0x1f; \ + uint8_t b = e & 0x1f; \ + result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b; \ + } \ + *r = result; \ + } +VUPKPX(lpx, UPKLO) +VUPKPX(hpx, UPKHI) +#undef VUPKPX + +#define VUPK(suffix, unpacked, packee, hi) \ + void helper_vupk##suffix (ppc_avr_t *r, ppc_avr_t *b) \ + { \ + int i; \ + ppc_avr_t result; \ + if (hi) { \ + for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) { \ + result.unpacked[i] = b->packee[i]; \ + } \ + } else { \ + for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); i++) { \ + result.unpacked[i-ARRAY_SIZE(r->unpacked)] = b->packee[i]; \ + } \ + } \ + *r = result; \ + } +VUPK(hsb, s16, s8, UPKHI) +VUPK(hsh, s32, s16, UPKHI) +VUPK(lsb, s16, s8, UPKLO) +VUPK(lsh, s32, s16, UPKLO) +#undef VUPK +#undef UPKHI +#undef UPKLO + +#undef DO_HANDLE_NAN +#undef HANDLE_NAN1 +#undef HANDLE_NAN2 +#undef HANDLE_NAN3 +#undef VECTOR_FOR_INORDER_I +#undef HI_IDX +#undef LO_IDX + +/*****************************************************************************/ /* SPE extension helpers */ /* Use a table to make this quicker */ static uint8_t hbrev[16] = { @@ -2045,7 +3091,7 @@ static always_inline uint32_t efscfsi (uint32_t val) { CPU_FloatU u; - u.f = int32_to_float32(val, &env->spe_status); + u.f = int32_to_float32(val, &env->vec_status); return u.l; } @@ -2054,7 +3100,7 @@ static always_inline uint32_t efscfui (uint32_t val) { CPU_FloatU u; - u.f = uint32_to_float32(val, &env->spe_status); + u.f = uint32_to_float32(val, &env->vec_status); return u.l; } @@ -2065,10 +3111,10 @@ static always_inline int32_t efsctsi (uint32_t val) u.l = val; /* NaN are not treated the same way IEEE 754 does */ - if (unlikely(isnan(u.f))) + if (unlikely(float32_is_nan(u.f))) return 0; - return float32_to_int32(u.f, &env->spe_status); + return float32_to_int32(u.f, &env->vec_status); } static always_inline uint32_t efsctui (uint32_t val) @@ -2077,10 +3123,10 @@ static always_inline uint32_t efsctui (uint32_t val) u.l = val; /* NaN are not treated the same way IEEE 754 does */ - if (unlikely(isnan(u.f))) + if (unlikely(float32_is_nan(u.f))) return 0; - return float32_to_uint32(u.f, &env->spe_status); + return float32_to_uint32(u.f, &env->vec_status); } static always_inline uint32_t efsctsiz (uint32_t val) @@ -2089,10 +3135,10 @@ static always_inline uint32_t efsctsiz (uint32_t val) u.l = val; /* NaN are not treated the same way IEEE 754 does */ - if (unlikely(isnan(u.f))) + if (unlikely(float32_is_nan(u.f))) return 0; - return float32_to_int32_round_to_zero(u.f, &env->spe_status); + return float32_to_int32_round_to_zero(u.f, &env->vec_status); } static always_inline uint32_t efsctuiz (uint32_t val) @@ -2101,10 +3147,10 @@ static always_inline uint32_t efsctuiz (uint32_t val) u.l = val; /* NaN are not treated the same way IEEE 754 does */ - if (unlikely(isnan(u.f))) + if (unlikely(float32_is_nan(u.f))) return 0; - return float32_to_uint32_round_to_zero(u.f, &env->spe_status); + return float32_to_uint32_round_to_zero(u.f, &env->vec_status); } static always_inline uint32_t efscfsf (uint32_t val) @@ -2112,9 +3158,9 @@ static always_inline uint32_t efscfsf (uint32_t val) CPU_FloatU u; float32 tmp; - u.f = int32_to_float32(val, &env->spe_status); - tmp = int64_to_float32(1ULL << 32, &env->spe_status); - u.f = float32_div(u.f, tmp, &env->spe_status); + u.f = int32_to_float32(val, &env->vec_status); + tmp = int64_to_float32(1ULL << 32, &env->vec_status); + u.f = float32_div(u.f, tmp, &env->vec_status); return u.l; } @@ -2124,9 +3170,9 @@ static always_inline uint32_t efscfuf (uint32_t val) CPU_FloatU u; float32 tmp; - u.f = uint32_to_float32(val, &env->spe_status); - tmp = uint64_to_float32(1ULL << 32, &env->spe_status); - u.f = float32_div(u.f, tmp, &env->spe_status); + u.f = uint32_to_float32(val, &env->vec_status); + tmp = uint64_to_float32(1ULL << 32, &env->vec_status); + u.f = float32_div(u.f, tmp, &env->vec_status); return u.l; } @@ -2138,12 +3184,12 @@ static always_inline uint32_t efsctsf (uint32_t val) u.l = val; /* NaN are not treated the same way IEEE 754 does */ - if (unlikely(isnan(u.f))) + if (unlikely(float32_is_nan(u.f))) return 0; - tmp = uint64_to_float32(1ULL << 32, &env->spe_status); - u.f = float32_mul(u.f, tmp, &env->spe_status); + tmp = uint64_to_float32(1ULL << 32, &env->vec_status); + u.f = float32_mul(u.f, tmp, &env->vec_status); - return float32_to_int32(u.f, &env->spe_status); + return float32_to_int32(u.f, &env->vec_status); } static always_inline uint32_t efsctuf (uint32_t val) @@ -2153,12 +3199,12 @@ static always_inline uint32_t efsctuf (uint32_t val) u.l = val; /* NaN are not treated the same way IEEE 754 does */ - if (unlikely(isnan(u.f))) + if (unlikely(float32_is_nan(u.f))) return 0; - tmp = uint64_to_float32(1ULL << 32, &env->spe_status); - u.f = float32_mul(u.f, tmp, &env->spe_status); + tmp = uint64_to_float32(1ULL << 32, &env->vec_status); + u.f = float32_mul(u.f, tmp, &env->vec_status); - return float32_to_uint32(u.f, &env->spe_status); + return float32_to_uint32(u.f, &env->vec_status); } #define HELPER_SPE_SINGLE_CONV(name) \ @@ -2220,7 +3266,7 @@ static always_inline uint32_t efsadd (uint32_t op1, uint32_t op2) CPU_FloatU u1, u2; u1.l = op1; u2.l = op2; - u1.f = float32_add(u1.f, u2.f, &env->spe_status); + u1.f = float32_add(u1.f, u2.f, &env->vec_status); return u1.l; } @@ -2229,7 +3275,7 @@ static always_inline uint32_t efssub (uint32_t op1, uint32_t op2) CPU_FloatU u1, u2; u1.l = op1; u2.l = op2; - u1.f = float32_sub(u1.f, u2.f, &env->spe_status); + u1.f = float32_sub(u1.f, u2.f, &env->vec_status); return u1.l; } @@ -2238,7 +3284,7 @@ static always_inline uint32_t efsmul (uint32_t op1, uint32_t op2) CPU_FloatU u1, u2; u1.l = op1; u2.l = op2; - u1.f = float32_mul(u1.f, u2.f, &env->spe_status); + u1.f = float32_mul(u1.f, u2.f, &env->vec_status); return u1.l; } @@ -2247,7 +3293,7 @@ static always_inline uint32_t efsdiv (uint32_t op1, uint32_t op2) CPU_FloatU u1, u2; u1.l = op1; u2.l = op2; - u1.f = float32_div(u1.f, u2.f, &env->spe_status); + u1.f = float32_div(u1.f, u2.f, &env->vec_status); return u1.l; } @@ -2286,7 +3332,7 @@ static always_inline uint32_t efststlt (uint32_t op1, uint32_t op2) CPU_FloatU u1, u2; u1.l = op1; u2.l = op2; - return float32_lt(u1.f, u2.f, &env->spe_status) ? 4 : 0; + return float32_lt(u1.f, u2.f, &env->vec_status) ? 4 : 0; } static always_inline uint32_t efststgt (uint32_t op1, uint32_t op2) @@ -2294,7 +3340,7 @@ static always_inline uint32_t efststgt (uint32_t op1, uint32_t op2) CPU_FloatU u1, u2; u1.l = op1; u2.l = op2; - return float32_le(u1.f, u2.f, &env->spe_status) ? 0 : 4; + return float32_le(u1.f, u2.f, &env->vec_status) ? 0 : 4; } static always_inline uint32_t efststeq (uint32_t op1, uint32_t op2) @@ -2302,7 +3348,7 @@ static always_inline uint32_t efststeq (uint32_t op1, uint32_t op2) CPU_FloatU u1, u2; u1.l = op1; u2.l = op2; - return float32_eq(u1.f, u2.f, &env->spe_status) ? 4 : 0; + return float32_eq(u1.f, u2.f, &env->vec_status) ? 4 : 0; } static always_inline uint32_t efscmplt (uint32_t op1, uint32_t op2) @@ -2369,7 +3415,7 @@ uint64_t helper_efdcfsi (uint32_t val) { CPU_DoubleU u; - u.d = int32_to_float64(val, &env->spe_status); + u.d = int32_to_float64(val, &env->vec_status); return u.ll; } @@ -2378,7 +3424,7 @@ uint64_t helper_efdcfsid (uint64_t val) { CPU_DoubleU u; - u.d = int64_to_float64(val, &env->spe_status); + u.d = int64_to_float64(val, &env->vec_status); return u.ll; } @@ -2387,7 +3433,7 @@ uint64_t helper_efdcfui (uint32_t val) { CPU_DoubleU u; - u.d = uint32_to_float64(val, &env->spe_status); + u.d = uint32_to_float64(val, &env->vec_status); return u.ll; } @@ -2396,7 +3442,7 @@ uint64_t helper_efdcfuid (uint64_t val) { CPU_DoubleU u; - u.d = uint64_to_float64(val, &env->spe_status); + u.d = uint64_to_float64(val, &env->vec_status); return u.ll; } @@ -2407,10 +3453,10 @@ uint32_t helper_efdctsi (uint64_t val) u.ll = val; /* NaN are not treated the same way IEEE 754 does */ - if (unlikely(isnan(u.d))) + if (unlikely(float64_is_nan(u.d))) return 0; - return float64_to_int32(u.d, &env->spe_status); + return float64_to_int32(u.d, &env->vec_status); } uint32_t helper_efdctui (uint64_t val) @@ -2419,10 +3465,10 @@ uint32_t helper_efdctui (uint64_t val) u.ll = val; /* NaN are not treated the same way IEEE 754 does */ - if (unlikely(isnan(u.d))) + if (unlikely(float64_is_nan(u.d))) return 0; - return float64_to_uint32(u.d, &env->spe_status); + return float64_to_uint32(u.d, &env->vec_status); } uint32_t helper_efdctsiz (uint64_t val) @@ -2431,10 +3477,10 @@ uint32_t helper_efdctsiz (uint64_t val) u.ll = val; /* NaN are not treated the same way IEEE 754 does */ - if (unlikely(isnan(u.d))) + if (unlikely(float64_is_nan(u.d))) return 0; - return float64_to_int32_round_to_zero(u.d, &env->spe_status); + return float64_to_int32_round_to_zero(u.d, &env->vec_status); } uint64_t helper_efdctsidz (uint64_t val) @@ -2443,10 +3489,10 @@ uint64_t helper_efdctsidz (uint64_t val) u.ll = val; /* NaN are not treated the same way IEEE 754 does */ - if (unlikely(isnan(u.d))) + if (unlikely(float64_is_nan(u.d))) return 0; - return float64_to_int64_round_to_zero(u.d, &env->spe_status); + return float64_to_int64_round_to_zero(u.d, &env->vec_status); } uint32_t helper_efdctuiz (uint64_t val) @@ -2455,10 +3501,10 @@ uint32_t helper_efdctuiz (uint64_t val) u.ll = val; /* NaN are not treated the same way IEEE 754 does */ - if (unlikely(isnan(u.d))) + if (unlikely(float64_is_nan(u.d))) return 0; - return float64_to_uint32_round_to_zero(u.d, &env->spe_status); + return float64_to_uint32_round_to_zero(u.d, &env->vec_status); } uint64_t helper_efdctuidz (uint64_t val) @@ -2467,10 +3513,10 @@ uint64_t helper_efdctuidz (uint64_t val) u.ll = val; /* NaN are not treated the same way IEEE 754 does */ - if (unlikely(isnan(u.d))) + if (unlikely(float64_is_nan(u.d))) return 0; - return float64_to_uint64_round_to_zero(u.d, &env->spe_status); + return float64_to_uint64_round_to_zero(u.d, &env->vec_status); } uint64_t helper_efdcfsf (uint32_t val) @@ -2478,9 +3524,9 @@ uint64_t helper_efdcfsf (uint32_t val) CPU_DoubleU u; float64 tmp; - u.d = int32_to_float64(val, &env->spe_status); - tmp = int64_to_float64(1ULL << 32, &env->spe_status); - u.d = float64_div(u.d, tmp, &env->spe_status); + u.d = int32_to_float64(val, &env->vec_status); + tmp = int64_to_float64(1ULL << 32, &env->vec_status); + u.d = float64_div(u.d, tmp, &env->vec_status); return u.ll; } @@ -2490,9 +3536,9 @@ uint64_t helper_efdcfuf (uint32_t val) CPU_DoubleU u; float64 tmp; - u.d = uint32_to_float64(val, &env->spe_status); - tmp = int64_to_float64(1ULL << 32, &env->spe_status); - u.d = float64_div(u.d, tmp, &env->spe_status); + u.d = uint32_to_float64(val, &env->vec_status); + tmp = int64_to_float64(1ULL << 32, &env->vec_status); + u.d = float64_div(u.d, tmp, &env->vec_status); return u.ll; } @@ -2504,12 +3550,12 @@ uint32_t helper_efdctsf (uint64_t val) u.ll = val; /* NaN are not treated the same way IEEE 754 does */ - if (unlikely(isnan(u.d))) + if (unlikely(float64_is_nan(u.d))) return 0; - tmp = uint64_to_float64(1ULL << 32, &env->spe_status); - u.d = float64_mul(u.d, tmp, &env->spe_status); + tmp = uint64_to_float64(1ULL << 32, &env->vec_status); + u.d = float64_mul(u.d, tmp, &env->vec_status); - return float64_to_int32(u.d, &env->spe_status); + return float64_to_int32(u.d, &env->vec_status); } uint32_t helper_efdctuf (uint64_t val) @@ -2519,12 +3565,12 @@ uint32_t helper_efdctuf (uint64_t val) u.ll = val; /* NaN are not treated the same way IEEE 754 does */ - if (unlikely(isnan(u.d))) + if (unlikely(float64_is_nan(u.d))) return 0; - tmp = uint64_to_float64(1ULL << 32, &env->spe_status); - u.d = float64_mul(u.d, tmp, &env->spe_status); + tmp = uint64_to_float64(1ULL << 32, &env->vec_status); + u.d = float64_mul(u.d, tmp, &env->vec_status); - return float64_to_uint32(u.d, &env->spe_status); + return float64_to_uint32(u.d, &env->vec_status); } uint32_t helper_efscfd (uint64_t val) @@ -2533,7 +3579,7 @@ uint32_t helper_efscfd (uint64_t val) CPU_FloatU u2; u1.ll = val; - u2.f = float64_to_float32(u1.d, &env->spe_status); + u2.f = float64_to_float32(u1.d, &env->vec_status); return u2.l; } @@ -2544,7 +3590,7 @@ uint64_t helper_efdcfs (uint32_t val) CPU_FloatU u1; u1.l = val; - u2.d = float32_to_float64(u1.f, &env->spe_status); + u2.d = float32_to_float64(u1.f, &env->vec_status); return u2.ll; } @@ -2555,7 +3601,7 @@ uint64_t helper_efdadd (uint64_t op1, uint64_t op2) CPU_DoubleU u1, u2; u1.ll = op1; u2.ll = op2; - u1.d = float64_add(u1.d, u2.d, &env->spe_status); + u1.d = float64_add(u1.d, u2.d, &env->vec_status); return u1.ll; } @@ -2564,7 +3610,7 @@ uint64_t helper_efdsub (uint64_t op1, uint64_t op2) CPU_DoubleU u1, u2; u1.ll = op1; u2.ll = op2; - u1.d = float64_sub(u1.d, u2.d, &env->spe_status); + u1.d = float64_sub(u1.d, u2.d, &env->vec_status); return u1.ll; } @@ -2573,7 +3619,7 @@ uint64_t helper_efdmul (uint64_t op1, uint64_t op2) CPU_DoubleU u1, u2; u1.ll = op1; u2.ll = op2; - u1.d = float64_mul(u1.d, u2.d, &env->spe_status); + u1.d = float64_mul(u1.d, u2.d, &env->vec_status); return u1.ll; } @@ -2582,7 +3628,7 @@ uint64_t helper_efddiv (uint64_t op1, uint64_t op2) CPU_DoubleU u1, u2; u1.ll = op1; u2.ll = op2; - u1.d = float64_div(u1.d, u2.d, &env->spe_status); + u1.d = float64_div(u1.d, u2.d, &env->vec_status); return u1.ll; } @@ -2592,7 +3638,7 @@ uint32_t helper_efdtstlt (uint64_t op1, uint64_t op2) CPU_DoubleU u1, u2; u1.ll = op1; u2.ll = op2; - return float64_lt(u1.d, u2.d, &env->spe_status) ? 4 : 0; + return float64_lt(u1.d, u2.d, &env->vec_status) ? 4 : 0; } uint32_t helper_efdtstgt (uint64_t op1, uint64_t op2) @@ -2600,7 +3646,7 @@ uint32_t helper_efdtstgt (uint64_t op1, uint64_t op2) CPU_DoubleU u1, u2; u1.ll = op1; u2.ll = op2; - return float64_le(u1.d, u2.d, &env->spe_status) ? 0 : 4; + return float64_le(u1.d, u2.d, &env->vec_status) ? 0 : 4; } uint32_t helper_efdtsteq (uint64_t op1, uint64_t op2) @@ -2608,7 +3654,7 @@ uint32_t helper_efdtsteq (uint64_t op1, uint64_t op2) CPU_DoubleU u1, u2; u1.ll = op1; u2.ll = op2; - return float64_eq(u1.d, u2.d, &env->spe_status) ? 4 : 0; + return float64_eq(u1.d, u2.d, &env->vec_status) ? 4 : 0; } uint32_t helper_efdcmplt (uint64_t op1, uint64_t op2) @@ -2682,6 +3728,10 @@ void tlb_fill (target_ulong addr, int is_write, int mmu_idx, void *retaddr) /* Segment registers load and store */ target_ulong helper_load_sr (target_ulong sr_num) { +#if defined(TARGET_PPC64) + if (env->mmu_model & POWERPC_MMU_64) + return ppc_load_sr(env, sr_num); +#endif return env->sr[sr_num]; } @@ -2697,9 +3747,9 @@ target_ulong helper_load_slb (target_ulong slb_nr) return ppc_load_slb(env, slb_nr); } -void helper_store_slb (target_ulong slb_nr, target_ulong rs) +void helper_store_slb (target_ulong rb, target_ulong rs) { - ppc_store_slb(env, slb_nr, rs); + ppc_store_slb(env, rb, rs); } void helper_slbia (void) @@ -2741,13 +3791,9 @@ static void do_6xx_tlb (target_ulong new_EPN, int is_code) EPN = env->spr[SPR_DMISS]; } way = (env->spr[SPR_SRR1] >> 17) & 1; -#if defined (DEBUG_SOFTWARE_TLB) - if (loglevel != 0) { - fprintf(logfile, "%s: EPN " ADDRX " " ADDRX " PTE0 " ADDRX + LOG_SWTLB("%s: EPN " ADDRX " " ADDRX " PTE0 " ADDRX " PTE1 " ADDRX " way %d\n", __func__, new_EPN, EPN, CMP, RPN, way); - } -#endif /* Store this TLB */ ppc6xx_tlb_store(env, (uint32_t)(new_EPN & TARGET_PAGE_MASK), way, is_code, CMP, RPN); @@ -2773,13 +3819,9 @@ static void do_74xx_tlb (target_ulong new_EPN, int is_code) CMP = env->spr[SPR_PTEHI]; EPN = env->spr[SPR_TLBMISS] & ~0x3; way = env->spr[SPR_TLBMISS] & 0x3; -#if defined (DEBUG_SOFTWARE_TLB) - if (loglevel != 0) { - fprintf(logfile, "%s: EPN " ADDRX " " ADDRX " PTE0 " ADDRX + LOG_SWTLB("%s: EPN " ADDRX " " ADDRX " PTE0 " ADDRX " PTE1 " ADDRX " way %d\n", __func__, new_EPN, EPN, CMP, RPN, way); - } -#endif /* Store this TLB */ ppc6xx_tlb_store(env, (uint32_t)(new_EPN & TARGET_PAGE_MASK), way, is_code, CMP, RPN); @@ -2903,22 +3945,14 @@ void helper_4xx_tlbwe_hi (target_ulong entry, target_ulong val) ppcemb_tlb_t *tlb; target_ulong page, end; -#if defined (DEBUG_SOFTWARE_TLB) - if (loglevel != 0) { - fprintf(logfile, "%s entry %d val " ADDRX "\n", __func__, (int)entry, val); - } -#endif + LOG_SWTLB("%s entry %d val " ADDRX "\n", __func__, (int)entry, val); entry &= 0x3F; tlb = &env->tlb[entry].tlbe; /* Invalidate previous TLB (if it's valid) */ if (tlb->prot & PAGE_VALID) { end = tlb->EPN + tlb->size; -#if defined (DEBUG_SOFTWARE_TLB) - if (loglevel != 0) { - fprintf(logfile, "%s: invalidate old TLB %d start " ADDRX + LOG_SWTLB("%s: invalidate old TLB %d start " ADDRX " end " ADDRX "\n", __func__, (int)entry, tlb->EPN, end); - } -#endif for (page = tlb->EPN; page < end; page += TARGET_PAGE_SIZE) tlb_flush_page(env, page); } @@ -2943,26 +3977,18 @@ void helper_4xx_tlbwe_hi (target_ulong entry, target_ulong val) } tlb->PID = env->spr[SPR_40x_PID]; /* PID */ tlb->attr = val & 0xFF; -#if defined (DEBUG_SOFTWARE_TLB) - if (loglevel != 0) { - fprintf(logfile, "%s: set up TLB %d RPN " PADDRX " EPN " ADDRX + LOG_SWTLB("%s: set up TLB %d RPN " PADDRX " EPN " ADDRX " size " ADDRX " prot %c%c%c%c PID %d\n", __func__, (int)entry, tlb->RPN, tlb->EPN, tlb->size, tlb->prot & PAGE_READ ? 'r' : '-', tlb->prot & PAGE_WRITE ? 'w' : '-', tlb->prot & PAGE_EXEC ? 'x' : '-', tlb->prot & PAGE_VALID ? 'v' : '-', (int)tlb->PID); - } -#endif /* Invalidate new TLB (if valid) */ if (tlb->prot & PAGE_VALID) { end = tlb->EPN + tlb->size; -#if defined (DEBUG_SOFTWARE_TLB) - if (loglevel != 0) { - fprintf(logfile, "%s: invalidate TLB %d start " ADDRX + LOG_SWTLB("%s: invalidate TLB %d start " ADDRX " end " ADDRX "\n", __func__, (int)entry, tlb->EPN, end); - } -#endif for (page = tlb->EPN; page < end; page += TARGET_PAGE_SIZE) tlb_flush_page(env, page); } @@ -2972,11 +3998,7 @@ void helper_4xx_tlbwe_lo (target_ulong entry, target_ulong val) { ppcemb_tlb_t *tlb; -#if defined (DEBUG_SOFTWARE_TLB) - if (loglevel != 0) { - fprintf(logfile, "%s entry %i val " ADDRX "\n", __func__, (int)entry, val); - } -#endif + LOG_SWTLB("%s entry %i val " ADDRX "\n", __func__, (int)entry, val); entry &= 0x3F; tlb = &env->tlb[entry].tlbe; tlb->RPN = val & 0xFFFFFC00; @@ -2985,17 +4007,13 @@ void helper_4xx_tlbwe_lo (target_ulong entry, target_ulong val) tlb->prot |= PAGE_EXEC; if (val & 0x100) tlb->prot |= PAGE_WRITE; -#if defined (DEBUG_SOFTWARE_TLB) - if (loglevel != 0) { - fprintf(logfile, "%s: set up TLB %d RPN " PADDRX " EPN " ADDRX + LOG_SWTLB("%s: set up TLB %d RPN " PADDRX " EPN " ADDRX " size " ADDRX " prot %c%c%c%c PID %d\n", __func__, (int)entry, tlb->RPN, tlb->EPN, tlb->size, tlb->prot & PAGE_READ ? 'r' : '-', tlb->prot & PAGE_WRITE ? 'w' : '-', tlb->prot & PAGE_EXEC ? 'x' : '-', tlb->prot & PAGE_VALID ? 'v' : '-', (int)tlb->PID); - } -#endif } target_ulong helper_4xx_tlbsx (target_ulong address) @@ -3010,12 +4028,8 @@ void helper_440_tlbwe (uint32_t word, target_ulong entry, target_ulong value) target_ulong EPN, RPN, size; int do_flush_tlbs; -#if defined (DEBUG_SOFTWARE_TLB) - if (loglevel != 0) { - fprintf(logfile, "%s word %d entry %d value " ADDRX "\n", + LOG_SWTLB("%s word %d entry %d value " ADDRX "\n", __func__, word, (int)entry, value); - } -#endif do_flush_tlbs = 0; entry &= 0x3F; tlb = &env->tlb[entry].tlbe;