Move also DEBUG_PCALL (see r5085)
[qemu] / target-sparc / op_helper.c
index 48f5fc6..64b56e3 100644 (file)
 #include "exec.h"
 #include "host-utils.h"
+#include "helper.h"
+#if !defined(CONFIG_USER_ONLY)
+#include "softmmu_exec.h"
+#endif /* !defined(CONFIG_USER_ONLY) */
 
-//#define DEBUG_PCALL
 //#define DEBUG_MMU
 //#define DEBUG_MXCC
 //#define DEBUG_UNALIGNED
 //#define DEBUG_UNASSIGNED
+//#define DEBUG_ASI
+//#define DEBUG_PCALL
 
 #ifdef DEBUG_MMU
 #define DPRINTF_MMU(fmt, args...) \
 do { printf("MMU: " fmt , ##args); } while (0)
 #else
-#define DPRINTF_MMU(fmt, args...)
+#define DPRINTF_MMU(fmt, args...) do {} while (0)
 #endif
 
 #ifdef DEBUG_MXCC
 #define DPRINTF_MXCC(fmt, args...) \
 do { printf("MXCC: " fmt , ##args); } while (0)
 #else
-#define DPRINTF_MXCC(fmt, args...)
+#define DPRINTF_MXCC(fmt, args...) do {} while (0)
+#endif
+
+#ifdef DEBUG_ASI
+#define DPRINTF_ASI(fmt, args...) \
+do { printf("ASI: " fmt , ##args); } while (0)
+#else
+#define DPRINTF_ASI(fmt, args...) do {} while (0)
 #endif
 
+#ifdef TARGET_SPARC64
+#ifndef TARGET_ABI32
+#define AM_CHECK(env1) ((env1)->pstate & PS_AM)
+#else
+#define AM_CHECK(env1) (1)
+#endif
+#endif
+
+static inline void address_mask(CPUState *env1, target_ulong *addr)
+{
+#ifdef TARGET_SPARC64
+    if (AM_CHECK(env1))
+        *addr &= 0xffffffffULL;
+#endif
+}
+
 void raise_exception(int tt)
 {
     env->exception_index = tt;
     cpu_loop_exit();
 }
 
-void check_ieee_exceptions()
+void helper_trap(target_ulong nb_trap)
+{
+    env->exception_index = TT_TRAP + (nb_trap & 0x7f);
+    cpu_loop_exit();
+}
+
+void helper_trapcc(target_ulong nb_trap, target_ulong do_trap)
+{
+    if (do_trap) {
+        env->exception_index = TT_TRAP + (nb_trap & 0x7f);
+        cpu_loop_exit();
+    }
+}
+
+static inline void set_cwp(int new_cwp)
+{
+    cpu_set_cwp(env, new_cwp);
+}
+
+void helper_check_align(target_ulong addr, uint32_t align)
+{
+    if (addr & align) {
+#ifdef DEBUG_UNALIGNED
+    printf("Unaligned access to 0x" TARGET_FMT_lx " from 0x" TARGET_FMT_lx
+           "\n", addr, env->pc);
+#endif
+        raise_exception(TT_UNALIGNED);
+    }
+}
+
+#define F_HELPER(name, p) void helper_f##name##p(void)
+
+#define F_BINOP(name)                                           \
+    float32 helper_f ## name ## s (float32 src1, float32 src2)  \
+    {                                                           \
+        return float32_ ## name (src1, src2, &env->fp_status);  \
+    }                                                           \
+    F_HELPER(name, d)                                           \
+    {                                                           \
+        DT0 = float64_ ## name (DT0, DT1, &env->fp_status);     \
+    }                                                           \
+    F_HELPER(name, q)                                           \
+    {                                                           \
+        QT0 = float128_ ## name (QT0, QT1, &env->fp_status);    \
+    }
+
+F_BINOP(add);
+F_BINOP(sub);
+F_BINOP(mul);
+F_BINOP(div);
+#undef F_BINOP
+
+void helper_fsmuld(float32 src1, float32 src2)
+{
+    DT0 = float64_mul(float32_to_float64(src1, &env->fp_status),
+                      float32_to_float64(src2, &env->fp_status),
+                      &env->fp_status);
+}
+
+void helper_fdmulq(void)
+{
+    QT0 = float128_mul(float64_to_float128(DT0, &env->fp_status),
+                       float64_to_float128(DT1, &env->fp_status),
+                       &env->fp_status);
+}
+
+float32 helper_fnegs(float32 src)
+{
+    return float32_chs(src);
+}
+
+#ifdef TARGET_SPARC64
+F_HELPER(neg, d)
+{
+    DT0 = float64_chs(DT1);
+}
+
+F_HELPER(neg, q)
+{
+    QT0 = float128_chs(QT1);
+}
+#endif
+
+/* Integer to float conversion.  */
+float32 helper_fitos(int32_t src)
+{
+    return int32_to_float32(src, &env->fp_status);
+}
+
+void helper_fitod(int32_t src)
+{
+    DT0 = int32_to_float64(src, &env->fp_status);
+}
+
+void helper_fitoq(int32_t src)
+{
+    QT0 = int32_to_float128(src, &env->fp_status);
+}
+
+#ifdef TARGET_SPARC64
+float32 helper_fxtos(void)
+{
+    return int64_to_float32(*((int64_t *)&DT1), &env->fp_status);
+}
+
+F_HELPER(xto, d)
+{
+    DT0 = int64_to_float64(*((int64_t *)&DT1), &env->fp_status);
+}
+
+F_HELPER(xto, q)
+{
+    QT0 = int64_to_float128(*((int64_t *)&DT1), &env->fp_status);
+}
+#endif
+#undef F_HELPER
+
+/* floating point conversion */
+float32 helper_fdtos(void)
+{
+    return float64_to_float32(DT1, &env->fp_status);
+}
+
+void helper_fstod(float32 src)
+{
+    DT0 = float32_to_float64(src, &env->fp_status);
+}
+
+float32 helper_fqtos(void)
+{
+    return float128_to_float32(QT1, &env->fp_status);
+}
+
+void helper_fstoq(float32 src)
+{
+    QT0 = float32_to_float128(src, &env->fp_status);
+}
+
+void helper_fqtod(void)
+{
+    DT0 = float128_to_float64(QT1, &env->fp_status);
+}
+
+void helper_fdtoq(void)
+{
+    QT0 = float64_to_float128(DT1, &env->fp_status);
+}
+
+/* Float to integer conversion.  */
+int32_t helper_fstoi(float32 src)
+{
+    return float32_to_int32_round_to_zero(src, &env->fp_status);
+}
+
+int32_t helper_fdtoi(void)
+{
+    return float64_to_int32_round_to_zero(DT1, &env->fp_status);
+}
+
+int32_t helper_fqtoi(void)
+{
+    return float128_to_int32_round_to_zero(QT1, &env->fp_status);
+}
+
+#ifdef TARGET_SPARC64
+void helper_fstox(float32 src)
+{
+    *((int64_t *)&DT0) = float32_to_int64_round_to_zero(src, &env->fp_status);
+}
+
+void helper_fdtox(void)
+{
+    *((int64_t *)&DT0) = float64_to_int64_round_to_zero(DT1, &env->fp_status);
+}
+
+void helper_fqtox(void)
+{
+    *((int64_t *)&DT0) = float128_to_int64_round_to_zero(QT1, &env->fp_status);
+}
+
+void helper_faligndata(void)
 {
-     T0 = get_float_exception_flags(&env->fp_status);
-     if (T0)
-     {
+    uint64_t tmp;
+
+    tmp = (*((uint64_t *)&DT0)) << ((env->gsr & 7) * 8);
+    /* on many architectures a shift of 64 does nothing */
+    if ((env->gsr & 7) != 0) {
+        tmp |= (*((uint64_t *)&DT1)) >> (64 - (env->gsr & 7) * 8);
+    }
+    *((uint64_t *)&DT0) = tmp;
+}
+
+#ifdef WORDS_BIGENDIAN
+#define VIS_B64(n) b[7 - (n)]
+#define VIS_W64(n) w[3 - (n)]
+#define VIS_SW64(n) sw[3 - (n)]
+#define VIS_L64(n) l[1 - (n)]
+#define VIS_B32(n) b[3 - (n)]
+#define VIS_W32(n) w[1 - (n)]
+#else
+#define VIS_B64(n) b[n]
+#define VIS_W64(n) w[n]
+#define VIS_SW64(n) sw[n]
+#define VIS_L64(n) l[n]
+#define VIS_B32(n) b[n]
+#define VIS_W32(n) w[n]
+#endif
+
+typedef union {
+    uint8_t b[8];
+    uint16_t w[4];
+    int16_t sw[4];
+    uint32_t l[2];
+    float64 d;
+} vis64;
+
+typedef union {
+    uint8_t b[4];
+    uint16_t w[2];
+    uint32_t l;
+    float32 f;
+} vis32;
+
+void helper_fpmerge(void)
+{
+    vis64 s, d;
+
+    s.d = DT0;
+    d.d = DT1;
+
+    // Reverse calculation order to handle overlap
+    d.VIS_B64(7) = s.VIS_B64(3);
+    d.VIS_B64(6) = d.VIS_B64(3);
+    d.VIS_B64(5) = s.VIS_B64(2);
+    d.VIS_B64(4) = d.VIS_B64(2);
+    d.VIS_B64(3) = s.VIS_B64(1);
+    d.VIS_B64(2) = d.VIS_B64(1);
+    d.VIS_B64(1) = s.VIS_B64(0);
+    //d.VIS_B64(0) = d.VIS_B64(0);
+
+    DT0 = d.d;
+}
+
+void helper_fmul8x16(void)
+{
+    vis64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                 \
+    tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r);       \
+    if ((tmp & 0xff) > 0x7f)                                    \
+        tmp += 0x100;                                           \
+    d.VIS_W64(r) = tmp >> 8;
+
+    PMUL(0);
+    PMUL(1);
+    PMUL(2);
+    PMUL(3);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fmul8x16al(void)
+{
+    vis64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                 \
+    tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r);       \
+    if ((tmp & 0xff) > 0x7f)                                    \
+        tmp += 0x100;                                           \
+    d.VIS_W64(r) = tmp >> 8;
+
+    PMUL(0);
+    PMUL(1);
+    PMUL(2);
+    PMUL(3);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fmul8x16au(void)
+{
+    vis64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                 \
+    tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
+    if ((tmp & 0xff) > 0x7f)                                    \
+        tmp += 0x100;                                           \
+    d.VIS_W64(r) = tmp >> 8;
+
+    PMUL(0);
+    PMUL(1);
+    PMUL(2);
+    PMUL(3);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fmul8sux16(void)
+{
+    vis64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                         \
+    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
+    if ((tmp & 0xff) > 0x7f)                                            \
+        tmp += 0x100;                                                   \
+    d.VIS_W64(r) = tmp >> 8;
+
+    PMUL(0);
+    PMUL(1);
+    PMUL(2);
+    PMUL(3);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fmul8ulx16(void)
+{
+    vis64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                         \
+    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
+    if ((tmp & 0xff) > 0x7f)                                            \
+        tmp += 0x100;                                                   \
+    d.VIS_W64(r) = tmp >> 8;
+
+    PMUL(0);
+    PMUL(1);
+    PMUL(2);
+    PMUL(3);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fmuld8sux16(void)
+{
+    vis64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                         \
+    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
+    if ((tmp & 0xff) > 0x7f)                                            \
+        tmp += 0x100;                                                   \
+    d.VIS_L64(r) = tmp;
+
+    // Reverse calculation order to handle overlap
+    PMUL(1);
+    PMUL(0);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fmuld8ulx16(void)
+{
+    vis64 s, d;
+    uint32_t tmp;
+
+    s.d = DT0;
+    d.d = DT1;
+
+#define PMUL(r)                                                         \
+    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
+    if ((tmp & 0xff) > 0x7f)                                            \
+        tmp += 0x100;                                                   \
+    d.VIS_L64(r) = tmp;
+
+    // Reverse calculation order to handle overlap
+    PMUL(1);
+    PMUL(0);
+#undef PMUL
+
+    DT0 = d.d;
+}
+
+void helper_fexpand(void)
+{
+    vis32 s;
+    vis64 d;
+
+    s.l = (uint32_t)(*(uint64_t *)&DT0 & 0xffffffff);
+    d.d = DT1;
+    d.VIS_L64(0) = s.VIS_W32(0) << 4;
+    d.VIS_L64(1) = s.VIS_W32(1) << 4;
+    d.VIS_L64(2) = s.VIS_W32(2) << 4;
+    d.VIS_L64(3) = s.VIS_W32(3) << 4;
+
+    DT0 = d.d;
+}
+
+#define VIS_HELPER(name, F)                             \
+    void name##16(void)                                 \
+    {                                                   \
+        vis64 s, d;                                     \
+                                                        \
+        s.d = DT0;                                      \
+        d.d = DT1;                                      \
+                                                        \
+        d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0));   \
+        d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1));   \
+        d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2));   \
+        d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3));   \
+                                                        \
+        DT0 = d.d;                                      \
+    }                                                   \
+                                                        \
+    uint32_t name##16s(uint32_t src1, uint32_t src2)    \
+    {                                                   \
+        vis32 s, d;                                     \
+                                                        \
+        s.l = src1;                                     \
+        d.l = src2;                                     \
+                                                        \
+        d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0));   \
+        d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1));   \
+                                                        \
+        return d.l;                                     \
+    }                                                   \
+                                                        \
+    void name##32(void)                                 \
+    {                                                   \
+        vis64 s, d;                                     \
+                                                        \
+        s.d = DT0;                                      \
+        d.d = DT1;                                      \
+                                                        \
+        d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0));   \
+        d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1));   \
+                                                        \
+        DT0 = d.d;                                      \
+    }                                                   \
+                                                        \
+    uint32_t name##32s(uint32_t src1, uint32_t src2)    \
+    {                                                   \
+        vis32 s, d;                                     \
+                                                        \
+        s.l = src1;                                     \
+        d.l = src2;                                     \
+                                                        \
+        d.l = F(d.l, s.l);                              \
+                                                        \
+        return d.l;                                     \
+    }
+
+#define FADD(a, b) ((a) + (b))
+#define FSUB(a, b) ((a) - (b))
+VIS_HELPER(helper_fpadd, FADD)
+VIS_HELPER(helper_fpsub, FSUB)
+
+#define VIS_CMPHELPER(name, F)                                        \
+    void name##16(void)                                           \
+    {                                                             \
+        vis64 s, d;                                               \
+                                                                  \
+        s.d = DT0;                                                \
+        d.d = DT1;                                                \
+                                                                  \
+        d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0))? 1: 0;       \
+        d.VIS_W64(0) |= F(d.VIS_W64(1), s.VIS_W64(1))? 2: 0;      \
+        d.VIS_W64(0) |= F(d.VIS_W64(2), s.VIS_W64(2))? 4: 0;      \
+        d.VIS_W64(0) |= F(d.VIS_W64(3), s.VIS_W64(3))? 8: 0;      \
+                                                                  \
+        DT0 = d.d;                                                \
+    }                                                             \
+                                                                  \
+    void name##32(void)                                           \
+    {                                                             \
+        vis64 s, d;                                               \
+                                                                  \
+        s.d = DT0;                                                \
+        d.d = DT1;                                                \
+                                                                  \
+        d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0))? 1: 0;       \
+        d.VIS_L64(0) |= F(d.VIS_L64(1), s.VIS_L64(1))? 2: 0;      \
+                                                                  \
+        DT0 = d.d;                                                \
+    }
+
+#define FCMPGT(a, b) ((a) > (b))
+#define FCMPEQ(a, b) ((a) == (b))
+#define FCMPLE(a, b) ((a) <= (b))
+#define FCMPNE(a, b) ((a) != (b))
+
+VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
+VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
+VIS_CMPHELPER(helper_fcmple, FCMPLE)
+VIS_CMPHELPER(helper_fcmpne, FCMPNE)
+#endif
+
+void helper_check_ieee_exceptions(void)
+{
+    target_ulong status;
+
+    status = get_float_exception_flags(&env->fp_status);
+    if (status) {
         /* Copy IEEE 754 flags into FSR */
-        if (T0 & float_flag_invalid)
+        if (status & float_flag_invalid)
             env->fsr |= FSR_NVC;
-        if (T0 & float_flag_overflow)
+        if (status & float_flag_overflow)
             env->fsr |= FSR_OFC;
-        if (T0 & float_flag_underflow)
+        if (status & float_flag_underflow)
             env->fsr |= FSR_UFC;
-        if (T0 & float_flag_divbyzero)
+        if (status & float_flag_divbyzero)
             env->fsr |= FSR_DZC;
-        if (T0 & float_flag_inexact)
+        if (status & float_flag_inexact)
             env->fsr |= FSR_NXC;
 
-        if ((env->fsr & FSR_CEXC_MASK) & ((env->fsr & FSR_TEM_MASK) >> 23))
-        {
+        if ((env->fsr & FSR_CEXC_MASK) & ((env->fsr & FSR_TEM_MASK) >> 23)) {
             /* Unmasked exception, generate a trap */
             env->fsr |= FSR_FTT_IEEE_EXCP;
             raise_exception(TT_FP_EXCP);
-        }
-        else
-        {
+        } else {
             /* Accumulate exceptions */
             env->fsr |= (env->fsr & FSR_CEXC_MASK) << 5;
         }
-     }
-}
-
-#ifdef USE_INT_TO_FLOAT_HELPERS
-void do_fitos(void)
-{
-    set_float_exception_flags(0, &env->fp_status);
-    FT0 = int32_to_float32(*((int32_t *)&FT1), &env->fp_status);
-    check_ieee_exceptions();
-}
-
-void do_fitod(void)
-{
-    DT0 = int32_to_float64(*((int32_t *)&FT1), &env->fp_status);
-}
-#ifdef TARGET_SPARC64
-void do_fxtos(void)
-{
-    set_float_exception_flags(0, &env->fp_status);
-    FT0 = int64_to_float32(*((int64_t *)&DT1), &env->fp_status);
-    check_ieee_exceptions();
+    }
 }
 
-void do_fxtod(void)
+void helper_clear_float_exceptions(void)
 {
     set_float_exception_flags(0, &env->fp_status);
-    DT0 = int64_to_float64(*((int64_t *)&DT1), &env->fp_status);
-    check_ieee_exceptions();
 }
-#endif
-#endif
 
-void do_fabss(void)
+float32 helper_fabss(float32 src)
 {
-    FT0 = float32_abs(FT1);
+    return float32_abs(src);
 }
 
 #ifdef TARGET_SPARC64
-void do_fabsd(void)
+void helper_fabsd(void)
 {
     DT0 = float64_abs(DT1);
 }
 
-#if defined(CONFIG_USER_ONLY)
-void do_fabsq(void)
+void helper_fabsq(void)
 {
     QT0 = float128_abs(QT1);
 }
 #endif
-#endif
 
-void do_fsqrts(void)
+float32 helper_fsqrts(float32 src)
 {
-    set_float_exception_flags(0, &env->fp_status);
-    FT0 = float32_sqrt(FT1, &env->fp_status);
-    check_ieee_exceptions();
+    return float32_sqrt(src, &env->fp_status);
 }
 
-void do_fsqrtd(void)
+void helper_fsqrtd(void)
 {
-    set_float_exception_flags(0, &env->fp_status);
     DT0 = float64_sqrt(DT1, &env->fp_status);
-    check_ieee_exceptions();
 }
 
-#if defined(CONFIG_USER_ONLY)
-void do_fsqrtq(void)
+void helper_fsqrtq(void)
 {
-    set_float_exception_flags(0, &env->fp_status);
     QT0 = float128_sqrt(QT1, &env->fp_status);
-    check_ieee_exceptions();
 }
-#endif
 
 #define GEN_FCMP(name, size, reg1, reg2, FS, TRAP)                      \
-    void glue(do_, name) (void)                                         \
+    void glue(helper_, name) (void)                                     \
     {                                                                   \
+        target_ulong new_fsr;                                           \
+                                                                        \
         env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                     \
         switch (glue(size, _compare) (reg1, reg2, &env->fp_status)) {   \
         case float_relation_unordered:                                  \
-            T0 = (FSR_FCC1 | FSR_FCC0) << FS;                           \
+            new_fsr = (FSR_FCC1 | FSR_FCC0) << FS;                      \
             if ((env->fsr & FSR_NVM) || TRAP) {                         \
-                env->fsr |= T0;                                         \
+                env->fsr |= new_fsr;                                    \
                 env->fsr |= FSR_NVC;                                    \
                 env->fsr |= FSR_FTT_IEEE_EXCP;                          \
                 raise_exception(TT_FP_EXCP);                            \
@@ -146,117 +656,177 @@ void do_fsqrtq(void)
             }                                                           \
             break;                                                      \
         case float_relation_less:                                       \
-            T0 = FSR_FCC0 << FS;                                        \
+            new_fsr = FSR_FCC0 << FS;                                   \
             break;                                                      \
         case float_relation_greater:                                    \
-            T0 = FSR_FCC1 << FS;                                        \
+            new_fsr = FSR_FCC1 << FS;                                   \
             break;                                                      \
         default:                                                        \
-            T0 = 0;                                                     \
+            new_fsr = 0;                                                \
             break;                                                      \
         }                                                               \
-        env->fsr |= T0;                                                 \
+        env->fsr |= new_fsr;                                            \
+    }
+#define GEN_FCMPS(name, size, FS, TRAP)                                 \
+    void glue(helper_, name)(float32 src1, float32 src2)                \
+    {                                                                   \
+        target_ulong new_fsr;                                           \
+                                                                        \
+        env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                     \
+        switch (glue(size, _compare) (src1, src2, &env->fp_status)) {   \
+        case float_relation_unordered:                                  \
+            new_fsr = (FSR_FCC1 | FSR_FCC0) << FS;                      \
+            if ((env->fsr & FSR_NVM) || TRAP) {                         \
+                env->fsr |= new_fsr;                                    \
+                env->fsr |= FSR_NVC;                                    \
+                env->fsr |= FSR_FTT_IEEE_EXCP;                          \
+                raise_exception(TT_FP_EXCP);                            \
+            } else {                                                    \
+                env->fsr |= FSR_NVA;                                    \
+            }                                                           \
+            break;                                                      \
+        case float_relation_less:                                       \
+            new_fsr = FSR_FCC0 << FS;                                   \
+            break;                                                      \
+        case float_relation_greater:                                    \
+            new_fsr = FSR_FCC1 << FS;                                   \
+            break;                                                      \
+        default:                                                        \
+            new_fsr = 0;                                                \
+            break;                                                      \
+        }                                                               \
+        env->fsr |= new_fsr;                                            \
     }
 
-GEN_FCMP(fcmps, float32, FT0, FT1, 0, 0);
+GEN_FCMPS(fcmps, float32, 0, 0);
 GEN_FCMP(fcmpd, float64, DT0, DT1, 0, 0);
 
-GEN_FCMP(fcmpes, float32, FT0, FT1, 0, 1);
+GEN_FCMPS(fcmpes, float32, 0, 1);
 GEN_FCMP(fcmped, float64, DT0, DT1, 0, 1);
 
-#ifdef CONFIG_USER_ONLY
 GEN_FCMP(fcmpq, float128, QT0, QT1, 0, 0);
 GEN_FCMP(fcmpeq, float128, QT0, QT1, 0, 1);
-#endif
 
 #ifdef TARGET_SPARC64
-GEN_FCMP(fcmps_fcc1, float32, FT0, FT1, 22, 0);
+GEN_FCMPS(fcmps_fcc1, float32, 22, 0);
 GEN_FCMP(fcmpd_fcc1, float64, DT0, DT1, 22, 0);
+GEN_FCMP(fcmpq_fcc1, float128, QT0, QT1, 22, 0);
 
-GEN_FCMP(fcmps_fcc2, float32, FT0, FT1, 24, 0);
+GEN_FCMPS(fcmps_fcc2, float32, 24, 0);
 GEN_FCMP(fcmpd_fcc2, float64, DT0, DT1, 24, 0);
+GEN_FCMP(fcmpq_fcc2, float128, QT0, QT1, 24, 0);
 
-GEN_FCMP(fcmps_fcc3, float32, FT0, FT1, 26, 0);
+GEN_FCMPS(fcmps_fcc3, float32, 26, 0);
 GEN_FCMP(fcmpd_fcc3, float64, DT0, DT1, 26, 0);
+GEN_FCMP(fcmpq_fcc3, float128, QT0, QT1, 26, 0);
 
-GEN_FCMP(fcmpes_fcc1, float32, FT0, FT1, 22, 1);
+GEN_FCMPS(fcmpes_fcc1, float32, 22, 1);
 GEN_FCMP(fcmped_fcc1, float64, DT0, DT1, 22, 1);
+GEN_FCMP(fcmpeq_fcc1, float128, QT0, QT1, 22, 1);
 
-GEN_FCMP(fcmpes_fcc2, float32, FT0, FT1, 24, 1);
+GEN_FCMPS(fcmpes_fcc2, float32, 24, 1);
 GEN_FCMP(fcmped_fcc2, float64, DT0, DT1, 24, 1);
+GEN_FCMP(fcmpeq_fcc2, float128, QT0, QT1, 24, 1);
 
-GEN_FCMP(fcmpes_fcc3, float32, FT0, FT1, 26, 1);
+GEN_FCMPS(fcmpes_fcc3, float32, 26, 1);
 GEN_FCMP(fcmped_fcc3, float64, DT0, DT1, 26, 1);
-#ifdef CONFIG_USER_ONLY
-GEN_FCMP(fcmpq_fcc1, float128, QT0, QT1, 22, 0);
-GEN_FCMP(fcmpq_fcc2, float128, QT0, QT1, 24, 0);
-GEN_FCMP(fcmpq_fcc3, float128, QT0, QT1, 26, 0);
-GEN_FCMP(fcmpeq_fcc1, float128, QT0, QT1, 22, 1);
-GEN_FCMP(fcmpeq_fcc2, float128, QT0, QT1, 24, 1);
 GEN_FCMP(fcmpeq_fcc3, float128, QT0, QT1, 26, 1);
 #endif
-#endif
-
-#ifndef TARGET_SPARC64
-#ifndef CONFIG_USER_ONLY
+#undef GEN_FCMPS
 
-#ifdef DEBUG_MXCC
+#if !defined(TARGET_SPARC64) && !defined(CONFIG_USER_ONLY) && \
+    defined(DEBUG_MXCC)
 static void dump_mxcc(CPUState *env)
 {
     printf("mxccdata: %016llx %016llx %016llx %016llx\n",
-        env->mxccdata[0], env->mxccdata[1], env->mxccdata[2], env->mxccdata[3]);
+           env->mxccdata[0], env->mxccdata[1],
+           env->mxccdata[2], env->mxccdata[3]);
     printf("mxccregs: %016llx %016llx %016llx %016llx\n"
            "          %016llx %016llx %016llx %016llx\n",
-        env->mxccregs[0], env->mxccregs[1], env->mxccregs[2], env->mxccregs[3],
-        env->mxccregs[4], env->mxccregs[5], env->mxccregs[6], env->mxccregs[7]);
+           env->mxccregs[0], env->mxccregs[1],
+           env->mxccregs[2], env->mxccregs[3],
+           env->mxccregs[4], env->mxccregs[5],
+           env->mxccregs[6], env->mxccregs[7]);
 }
 #endif
 
-void helper_ld_asi(int asi, int size, int sign)
+#if (defined(TARGET_SPARC64) || !defined(CONFIG_USER_ONLY)) \
+    && defined(DEBUG_ASI)
+static void dump_asi(const char *txt, target_ulong addr, int asi, int size,
+                     uint64_t r1)
 {
-    uint32_t ret = 0;
-    uint64_t tmp;
-#ifdef DEBUG_MXCC
-    uint32_t last_T0 = T0;
+    switch (size)
+    {
+    case 1:
+        DPRINTF_ASI("%s "TARGET_FMT_lx " asi 0x%02x = %02" PRIx64 "\n", txt,
+                    addr, asi, r1 & 0xff);
+        break;
+    case 2:
+        DPRINTF_ASI("%s "TARGET_FMT_lx " asi 0x%02x = %04" PRIx64 "\n", txt,
+                    addr, asi, r1 & 0xffff);
+        break;
+    case 4:
+        DPRINTF_ASI("%s "TARGET_FMT_lx " asi 0x%02x = %08" PRIx64 "\n", txt,
+                    addr, asi, r1 & 0xffffffff);
+        break;
+    case 8:
+        DPRINTF_ASI("%s "TARGET_FMT_lx " asi 0x%02x = %016" PRIx64 "\n", txt,
+                    addr, asi, r1);
+        break;
+    }
+}
 #endif
 
+#ifndef TARGET_SPARC64
+#ifndef CONFIG_USER_ONLY
+uint64_t helper_ld_asi(target_ulong addr, int asi, int size, int sign)
+{
+    uint64_t ret = 0;
+#if defined(DEBUG_MXCC) || defined(DEBUG_ASI)
+    uint32_t last_addr = addr;
+#endif
+
+    helper_check_align(addr, size - 1);
     switch (asi) {
     case 2: /* SuperSparc MXCC registers */
-        switch (T0) {
+        switch (addr) {
         case 0x01c00a00: /* MXCC control register */
-            if (size == 8) {
-                ret = env->mxccregs[3] >> 32;
-                T0 = env->mxccregs[3];
-            } else
-                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", T0, size);
+            if (size == 8)
+                ret = env->mxccregs[3];
+            else
+                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", addr,
+                             size);
             break;
         case 0x01c00a04: /* MXCC control register */
             if (size == 4)
                 ret = env->mxccregs[3];
             else
-                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", T0, size);
+                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", addr,
+                             size);
             break;
         case 0x01c00c00: /* Module reset register */
             if (size == 8) {
-                ret = env->mxccregs[5] >> 32;
-                T0 = env->mxccregs[5];
+                ret = env->mxccregs[5];
                 // should we do something here?
             } else
-                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", T0, size);
+                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", addr,
+                             size);
             break;
         case 0x01c00f00: /* MBus port address register */
-            if (size == 8) {
-                ret = env->mxccregs[7] >> 32;
-                T0 = env->mxccregs[7];
-            } else
-                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", T0, size);
+            if (size == 8)
+                ret = env->mxccregs[7];
+            else
+                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", addr,
+                             size);
             break;
         default:
-            DPRINTF_MXCC("%08x: unimplemented address, size: %d\n", T0, size);
+            DPRINTF_MXCC("%08x: unimplemented address, size: %d\n", addr,
+                         size);
             break;
         }
-        DPRINTF_MXCC("asi = %d, size = %d, sign = %d, T0 = %08x -> ret = %08x,"
-                     "T0 = %08x\n", asi, size, sign, last_T0, ret, T0);
+        DPRINTF_MXCC("asi = %d, size = %d, sign = %d, "
+                     "addr = %08x -> ret = %08x,"
+                     "addr = %08x\n", asi, size, sign, last_addr, ret, addr);
 #ifdef DEBUG_MXCC
         dump_mxcc(env);
 #endif
@@ -265,19 +835,18 @@ void helper_ld_asi(int asi, int size, int sign)
         {
             int mmulev;
 
-            mmulev = (T0 >> 8) & 15;
+            mmulev = (addr >> 8) & 15;
             if (mmulev > 4)
                 ret = 0;
-            else {
-                ret = mmu_probe(env, T0, mmulev);
-                //bswap32s(&ret);
-            }
-            DPRINTF_MMU("mmu_probe: 0x%08x (lev %d) -> 0x%08x\n", T0, mmulev, ret);
+            else
+                ret = mmu_probe(env, addr, mmulev);
+            DPRINTF_MMU("mmu_probe: 0x%08x (lev %d) -> 0x%08" PRIx64 "\n",
+                        addr, mmulev, ret);
         }
         break;
     case 4: /* read MMU regs */
         {
-            int reg = (T0 >> 8) & 0x1f;
+            int reg = (addr >> 8) & 0x1f;
 
             ret = env->mmuregs[reg];
             if (reg == 3) /* Fault status cleared on read */
@@ -286,63 +855,61 @@ void helper_ld_asi(int asi, int size, int sign)
                 ret = env->mmuregs[3];
             else if (reg == 0x14) /* Fault address read */
                 ret = env->mmuregs[4];
-            DPRINTF_MMU("mmu_read: reg[%d] = 0x%08x\n", reg, ret);
+            DPRINTF_MMU("mmu_read: reg[%d] = 0x%08" PRIx64 "\n", reg, ret);
         }
         break;
+    case 5: // Turbosparc ITLB Diagnostic
+    case 6: // Turbosparc DTLB Diagnostic
+    case 7: // Turbosparc IOTLB Diagnostic
+        break;
     case 9: /* Supervisor code access */
         switch(size) {
         case 1:
-            ret = ldub_code(T0);
+            ret = ldub_code(addr);
             break;
         case 2:
-            ret = lduw_code(T0 & ~1);
+            ret = lduw_code(addr);
             break;
         default:
         case 4:
-            ret = ldl_code(T0 & ~3);
+            ret = ldl_code(addr);
             break;
         case 8:
-            tmp = ldq_code(T0 & ~7);
-            ret = tmp >> 32;
-            T0 = tmp;
+            ret = ldq_code(addr);
             break;
         }
         break;
     case 0xa: /* User data access */
         switch(size) {
         case 1:
-            ret = ldub_user(T0);
+            ret = ldub_user(addr);
             break;
         case 2:
-            ret = lduw_user(T0 & ~1);
+            ret = lduw_user(addr);
             break;
         default:
         case 4:
-            ret = ldl_user(T0 & ~3);
+            ret = ldl_user(addr);
             break;
         case 8:
-            tmp = ldq_user(T0 & ~7);
-            ret = tmp >> 32;
-            T0 = tmp;
+            ret = ldq_user(addr);
             break;
         }
         break;
     case 0xb: /* Supervisor data access */
         switch(size) {
         case 1:
-            ret = ldub_kernel(T0);
+            ret = ldub_kernel(addr);
             break;
         case 2:
-            ret = lduw_kernel(T0 & ~1);
+            ret = lduw_kernel(addr);
             break;
         default:
         case 4:
-            ret = ldl_kernel(T0 & ~3);
+            ret = ldl_kernel(addr);
             break;
         case 8:
-            tmp = ldq_kernel(T0 & ~7);
-            ret = tmp >> 32;
-            T0 = tmp;
+            ret = ldq_kernel(addr);
             break;
         }
         break;
@@ -354,148 +921,175 @@ void helper_ld_asi(int asi, int size, int sign)
     case 0x20: /* MMU passthrough */
         switch(size) {
         case 1:
-            ret = ldub_phys(T0);
+            ret = ldub_phys(addr);
             break;
         case 2:
-            ret = lduw_phys(T0 & ~1);
+            ret = lduw_phys(addr);
             break;
         default:
         case 4:
-            ret = ldl_phys(T0 & ~3);
+            ret = ldl_phys(addr);
             break;
         case 8:
-            tmp = ldq_phys(T0 & ~7);
-            ret = tmp >> 32;
-            T0 = tmp;
+            ret = ldq_phys(addr);
             break;
         }
         break;
-    case 0x2e: /* MMU passthrough, 0xexxxxxxxx */
-    case 0x2f: /* MMU passthrough, 0xfxxxxxxxx */
+    case 0x21 ... 0x2f: /* MMU passthrough, 0x100000000 to 0xfffffffff */
         switch(size) {
         case 1:
-            ret = ldub_phys((target_phys_addr_t)T0
+            ret = ldub_phys((target_phys_addr_t)addr
                             | ((target_phys_addr_t)(asi & 0xf) << 32));
             break;
         case 2:
-            ret = lduw_phys((target_phys_addr_t)(T0 & ~1)
+            ret = lduw_phys((target_phys_addr_t)addr
                             | ((target_phys_addr_t)(asi & 0xf) << 32));
             break;
         default:
         case 4:
-            ret = ldl_phys((target_phys_addr_t)(T0 & ~3)
+            ret = ldl_phys((target_phys_addr_t)addr
                            | ((target_phys_addr_t)(asi & 0xf) << 32));
             break;
         case 8:
-            tmp = ldq_phys((target_phys_addr_t)(T0 & ~7)
+            ret = ldq_phys((target_phys_addr_t)addr
                            | ((target_phys_addr_t)(asi & 0xf) << 32));
-            ret = tmp >> 32;
-            T0 = tmp;
             break;
         }
         break;
-    case 0x21 ... 0x2d: /* MMU passthrough, unassigned */
+    case 0x30: // Turbosparc secondary cache diagnostic
+    case 0x31: // Turbosparc RAM snoop
+    case 0x32: // Turbosparc page table descriptor diagnostic
+    case 0x39: /* data cache diagnostic register */
+        ret = 0;
+        break;
+    case 8: /* User code access, XXX */
     default:
-        do_unassigned_access(T0, 0, 0, 1);
+        do_unassigned_access(addr, 0, 0, asi);
         ret = 0;
         break;
     }
     if (sign) {
         switch(size) {
         case 1:
-            T1 = (int8_t) ret;
+            ret = (int8_t) ret;
             break;
         case 2:
-            T1 = (int16_t) ret;
+            ret = (int16_t) ret;
+            break;
+        case 4:
+            ret = (int32_t) ret;
             break;
         default:
-            T1 = ret;
             break;
         }
     }
-    else
-        T1 = ret;
+#ifdef DEBUG_ASI
+    dump_asi("read ", last_addr, asi, size, ret);
+#endif
+    return ret;
 }
 
-void helper_st_asi(int asi, int size)
+void helper_st_asi(target_ulong addr, uint64_t val, int asi, int size)
 {
+    helper_check_align(addr, size - 1);
     switch(asi) {
     case 2: /* SuperSparc MXCC registers */
-        switch (T0) {
+        switch (addr) {
         case 0x01c00000: /* MXCC stream data register 0 */
             if (size == 8)
-                env->mxccdata[0] = ((uint64_t)T1 << 32) | T2;
+                env->mxccdata[0] = val;
             else
-                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", T0, size);
+                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", addr,
+                             size);
             break;
         case 0x01c00008: /* MXCC stream data register 1 */
             if (size == 8)
-                env->mxccdata[1] = ((uint64_t)T1 << 32) | T2;
+                env->mxccdata[1] = val;
             else
-                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", T0, size);
+                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", addr,
+                             size);
             break;
         case 0x01c00010: /* MXCC stream data register 2 */
             if (size == 8)
-                env->mxccdata[2] = ((uint64_t)T1 << 32) | T2;
+                env->mxccdata[2] = val;
             else
-                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", T0, size);
+                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", addr,
+                             size);
             break;
         case 0x01c00018: /* MXCC stream data register 3 */
             if (size == 8)
-                env->mxccdata[3] = ((uint64_t)T1 << 32) | T2;
+                env->mxccdata[3] = val;
             else
-                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", T0, size);
+                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", addr,
+                             size);
             break;
         case 0x01c00100: /* MXCC stream source */
             if (size == 8)
-                env->mxccregs[0] = ((uint64_t)T1 << 32) | T2;
+                env->mxccregs[0] = val;
             else
-                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", T0, size);
-            env->mxccdata[0] = ldq_phys((env->mxccregs[0] & 0xffffffffULL) +  0);
-            env->mxccdata[1] = ldq_phys((env->mxccregs[0] & 0xffffffffULL) +  8);
-            env->mxccdata[2] = ldq_phys((env->mxccregs[0] & 0xffffffffULL) + 16);
-            env->mxccdata[3] = ldq_phys((env->mxccregs[0] & 0xffffffffULL) + 24);
+                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", addr,
+                             size);
+            env->mxccdata[0] = ldq_phys((env->mxccregs[0] & 0xffffffffULL) +
+                                        0);
+            env->mxccdata[1] = ldq_phys((env->mxccregs[0] & 0xffffffffULL) +
+                                        8);
+            env->mxccdata[2] = ldq_phys((env->mxccregs[0] & 0xffffffffULL) +
+                                        16);
+            env->mxccdata[3] = ldq_phys((env->mxccregs[0] & 0xffffffffULL) +
+                                        24);
             break;
         case 0x01c00200: /* MXCC stream destination */
             if (size == 8)
-                env->mxccregs[1] = ((uint64_t)T1 << 32) | T2;
+                env->mxccregs[1] = val;
             else
-                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", T0, size);
-            stq_phys((env->mxccregs[1] & 0xffffffffULL) +  0, env->mxccdata[0]);
-            stq_phys((env->mxccregs[1] & 0xffffffffULL) +  8, env->mxccdata[1]);
-            stq_phys((env->mxccregs[1] & 0xffffffffULL) + 16, env->mxccdata[2]);
-            stq_phys((env->mxccregs[1] & 0xffffffffULL) + 24, env->mxccdata[3]);
+                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", addr,
+                             size);
+            stq_phys((env->mxccregs[1] & 0xffffffffULL) +  0,
+                     env->mxccdata[0]);
+            stq_phys((env->mxccregs[1] & 0xffffffffULL) +  8,
+                     env->mxccdata[1]);
+            stq_phys((env->mxccregs[1] & 0xffffffffULL) + 16,
+                     env->mxccdata[2]);
+            stq_phys((env->mxccregs[1] & 0xffffffffULL) + 24,
+                     env->mxccdata[3]);
             break;
         case 0x01c00a00: /* MXCC control register */
             if (size == 8)
-                env->mxccregs[3] = ((uint64_t)T1 << 32) | T2;
+                env->mxccregs[3] = val;
             else
-                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", T0, size);
+                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", addr,
+                             size);
             break;
         case 0x01c00a04: /* MXCC control register */
             if (size == 4)
-                env->mxccregs[3] = (env->mxccregs[0xa] & 0xffffffff00000000ULL) | T1;
+                env->mxccregs[3] = (env->mxccregs[3] & 0xffffffff00000000ULL)
+                    | val;
             else
-                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", T0, size);
+                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", addr,
+                             size);
             break;
         case 0x01c00e00: /* MXCC error register  */
             // writing a 1 bit clears the error
             if (size == 8)
-                env->mxccregs[6] &= ~(((uint64_t)T1 << 32) | T2);
+                env->mxccregs[6] &= ~val;
             else
-                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", T0, size);
+                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", addr,
+                             size);
             break;
         case 0x01c00f00: /* MBus port address register */
             if (size == 8)
-                env->mxccregs[7] = ((uint64_t)T1 << 32) | T2;
+                env->mxccregs[7] = val;
             else
-                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", T0, size);
+                DPRINTF_MXCC("%08x: unimplemented access size: %d\n", addr,
+                             size);
             break;
         default:
-            DPRINTF_MXCC("%08x: unimplemented address, size: %d\n", T0, size);
+            DPRINTF_MXCC("%08x: unimplemented address, size: %d\n", addr,
+                         size);
             break;
         }
-        DPRINTF_MXCC("asi = %d, size = %d, T0 = %08x, T1 = %08x\n", asi, size, T0, T1);
+        DPRINTF_MXCC("asi = %d, size = %d, addr = %08x, val = %08x\n", asi,
+                     size, addr, val);
 #ifdef DEBUG_MXCC
         dump_mxcc(env);
 #endif
@@ -504,11 +1098,11 @@ void helper_st_asi(int asi, int size)
         {
             int mmulev;
 
-            mmulev = (T0 >> 8) & 15;
+            mmulev = (addr >> 8) & 15;
             DPRINTF_MMU("mmu flush level %d\n", mmulev);
             switch (mmulev) {
             case 0: // flush page
-                tlb_flush_page(env, T0 & 0xfffff000);
+                tlb_flush_page(env, addr & 0xfffff000);
                 break;
             case 1: // flush segment (256k)
             case 2: // flush region (16M)
@@ -522,84 +1116,95 @@ void helper_st_asi(int asi, int size)
 #ifdef DEBUG_MMU
             dump_mmu(env);
 #endif
-            return;
         }
+        break;
     case 4: /* write MMU regs */
         {
-            int reg = (T0 >> 8) & 0x1f;
+            int reg = (addr >> 8) & 0x1f;
             uint32_t oldreg;
 
             oldreg = env->mmuregs[reg];
             switch(reg) {
-            case 0:
+            case 0: // Control Register
                 env->mmuregs[reg] = (env->mmuregs[reg] & 0xff000000) |
-                                    (T1 & 0x00ffffff);
+                                    (val & 0x00ffffff);
                 // Mappings generated during no-fault mode or MMU
                 // disabled mode are invalid in normal mode
-                if ((oldreg & (MMU_E | MMU_NF | env->mmu_bm)) !=
-                    (env->mmuregs[reg] & (MMU_E | MMU_NF | env->mmu_bm)))
+                if ((oldreg & (MMU_E | MMU_NF | env->def->mmu_bm)) !=
+                    (env->mmuregs[reg] & (MMU_E | MMU_NF | env->def->mmu_bm)))
                     tlb_flush(env, 1);
                 break;
-            case 2:
-                env->mmuregs[reg] = T1;
+            case 1: // Context Table Pointer Register
+                env->mmuregs[reg] = val & env->def->mmu_ctpr_mask;
+                break;
+            case 2: // Context Register
+                env->mmuregs[reg] = val & env->def->mmu_cxr_mask;
                 if (oldreg != env->mmuregs[reg]) {
                     /* we flush when the MMU context changes because
                        QEMU has no MMU context support */
                     tlb_flush(env, 1);
                 }
                 break;
-            case 3:
-            case 4:
+            case 3: // Synchronous Fault Status Register with Clear
+            case 4: // Synchronous Fault Address Register
+                break;
+            case 0x10: // TLB Replacement Control Register
+                env->mmuregs[reg] = val & env->def->mmu_trcr_mask;
                 break;
-            case 0x13:
-                env->mmuregs[3] = T1;
+            case 0x13: // Synchronous Fault Status Register with Read and Clear
+                env->mmuregs[3] = val & env->def->mmu_sfsr_mask;
                 break;
-            case 0x14:
-                env->mmuregs[4] = T1;
+            case 0x14: // Synchronous Fault Address Register
+                env->mmuregs[4] = val;
                 break;
             default:
-                env->mmuregs[reg] = T1;
+                env->mmuregs[reg] = val;
                 break;
             }
             if (oldreg != env->mmuregs[reg]) {
-                DPRINTF_MMU("mmu change reg[%d]: 0x%08x -> 0x%08x\n", reg, oldreg, env->mmuregs[reg]);
+                DPRINTF_MMU("mmu change reg[%d]: 0x%08x -> 0x%08x\n",
+                            reg, oldreg, env->mmuregs[reg]);
             }
 #ifdef DEBUG_MMU
             dump_mmu(env);
 #endif
-            return;
         }
+        break;
+    case 5: // Turbosparc ITLB Diagnostic
+    case 6: // Turbosparc DTLB Diagnostic
+    case 7: // Turbosparc IOTLB Diagnostic
+        break;
     case 0xa: /* User data access */
         switch(size) {
         case 1:
-            stb_user(T0, T1);
+            stb_user(addr, val);
             break;
         case 2:
-            stw_user(T0 & ~1, T1);
+            stw_user(addr, val);
             break;
         default:
         case 4:
-            stl_user(T0 & ~3, T1);
+            stl_user(addr, val);
             break;
         case 8:
-            stq_user(T0 & ~7, ((uint64_t)T1 << 32) | T2);
+            stq_user(addr, val);
             break;
         }
         break;
     case 0xb: /* Supervisor data access */
         switch(size) {
         case 1:
-            stb_kernel(T0, T1);
+            stb_kernel(addr, val);
             break;
         case 2:
-            stw_kernel(T0 & ~1, T1);
+            stw_kernel(addr, val);
             break;
         default:
         case 4:
-            stl_kernel(T0 & ~3, T1);
+            stl_kernel(addr, val);
             break;
         case 8:
-            stq_kernel(T0 & ~7, ((uint64_t)T1 << 32) | T2);
+            stq_kernel(addr, val);
             break;
         }
         break;
@@ -615,127 +1220,150 @@ void helper_st_asi(int asi, int size)
         break;
     case 0x17: /* Block copy, sta access */
         {
-            // value (T1) = src
-            // address (T0) = dst
+            // val = src
+            // addr = dst
             // copy 32 bytes
             unsigned int i;
-            uint32_t src = T1 & ~3, dst = T0 & ~3, temp;
+            uint32_t src = val & ~3, dst = addr & ~3, temp;
 
             for (i = 0; i < 32; i += 4, src += 4, dst += 4) {
                 temp = ldl_kernel(src);
                 stl_kernel(dst, temp);
             }
         }
-        return;
+        break;
     case 0x1f: /* Block fill, stda access */
         {
-            // value (T1, T2)
-            // address (T0) = dst
-            // fill 32 bytes
+            // addr = dst
+            // fill 32 bytes with val
             unsigned int i;
-            uint32_t dst = T0 & 7;
-            uint64_t val;
-
-            val = (((uint64_t)T1) << 32) | T2;
+            uint32_t dst = addr & 7;
 
             for (i = 0; i < 32; i += 8, dst += 8)
                 stq_kernel(dst, val);
         }
-        return;
+        break;
     case 0x20: /* MMU passthrough */
         {
             switch(size) {
             case 1:
-                stb_phys(T0, T1);
+                stb_phys(addr, val);
                 break;
             case 2:
-                stw_phys(T0 & ~1, T1);
+                stw_phys(addr, val);
                 break;
             case 4:
             default:
-                stl_phys(T0 & ~3, T1);
+                stl_phys(addr, val);
                 break;
             case 8:
-                stq_phys(T0 & ~7, ((uint64_t)T1 << 32) | T2);
+                stq_phys(addr, val);
                 break;
             }
         }
-        return;
-    case 0x2e: /* MMU passthrough, 0xexxxxxxxx */
-    case 0x2f: /* MMU passthrough, 0xfxxxxxxxx */
+        break;
+    case 0x21 ... 0x2f: /* MMU passthrough, 0x100000000 to 0xfffffffff */
         {
             switch(size) {
             case 1:
-                stb_phys((target_phys_addr_t)T0
-                         | ((target_phys_addr_t)(asi & 0xf) << 32), T1);
+                stb_phys((target_phys_addr_t)addr
+                         | ((target_phys_addr_t)(asi & 0xf) << 32), val);
                 break;
             case 2:
-                stw_phys((target_phys_addr_t)(T0 & ~1)
-                            | ((target_phys_addr_t)(asi & 0xf) << 32), T1);
+                stw_phys((target_phys_addr_t)addr
+                         | ((target_phys_addr_t)(asi & 0xf) << 32), val);
                 break;
             case 4:
             default:
-                stl_phys((target_phys_addr_t)(T0 & ~3)
-                           | ((target_phys_addr_t)(asi & 0xf) << 32), T1);
+                stl_phys((target_phys_addr_t)addr
+                         | ((target_phys_addr_t)(asi & 0xf) << 32), val);
                 break;
             case 8:
-                stq_phys((target_phys_addr_t)(T0 & ~7)
-                           | ((target_phys_addr_t)(asi & 0xf) << 32),
-                         ((uint64_t)T1 << 32) | T2);
+                stq_phys((target_phys_addr_t)addr
+                         | ((target_phys_addr_t)(asi & 0xf) << 32), val);
                 break;
             }
         }
-        return;
-    case 0x31: /* Ross RT620 I-cache flush */
+        break;
+    case 0x30: // store buffer tags or Turbosparc secondary cache diagnostic
+    case 0x31: // store buffer data, Ross RT620 I-cache flush or
+               // Turbosparc snoop RAM
+    case 0x32: // store buffer control or Turbosparc page table
+               // descriptor diagnostic
     case 0x36: /* I-cache flash clear */
     case 0x37: /* D-cache flash clear */
+    case 0x38: /* breakpoint diagnostics */
+    case 0x4c: /* breakpoint action */
         break;
+    case 8: /* User code access, XXX */
     case 9: /* Supervisor code access, XXX */
-    case 0x21 ... 0x2d: /* MMU passthrough, unassigned */
     default:
-        do_unassigned_access(T0, 1, 0, 1);
-        return;
+        do_unassigned_access(addr, 1, 0, asi);
+        break;
     }
+#ifdef DEBUG_ASI
+    dump_asi("write", addr, asi, size, val);
+#endif
 }
 
 #endif /* CONFIG_USER_ONLY */
 #else /* TARGET_SPARC64 */
 
 #ifdef CONFIG_USER_ONLY
-void helper_ld_asi(int asi, int size, int sign)
+uint64_t helper_ld_asi(target_ulong addr, int asi, int size, int sign)
 {
     uint64_t ret = 0;
+#if defined(DEBUG_ASI)
+    target_ulong last_addr = addr;
+#endif
 
     if (asi < 0x80)
         raise_exception(TT_PRIV_ACT);
 
+    helper_check_align(addr, size - 1);
+    address_mask(env, &addr);
+
     switch (asi) {
-    case 0x80: // Primary
     case 0x82: // Primary no-fault
-    case 0x88: // Primary LE
     case 0x8a: // Primary no-fault LE
+        if (page_check_range(addr, size, PAGE_READ) == -1) {
+#ifdef DEBUG_ASI
+            dump_asi("read ", last_addr, asi, size, ret);
+#endif
+            return 0;
+        }
+        // Fall through
+    case 0x80: // Primary
+    case 0x88: // Primary LE
         {
             switch(size) {
             case 1:
-                ret = ldub_raw(T0);
+                ret = ldub_raw(addr);
                 break;
             case 2:
-                ret = lduw_raw(T0 & ~1);
+                ret = lduw_raw(addr);
                 break;
             case 4:
-                ret = ldl_raw(T0 & ~3);
+                ret = ldl_raw(addr);
                 break;
             default:
             case 8:
-                ret = ldq_raw(T0 & ~7);
+                ret = ldq_raw(addr);
                 break;
             }
         }
         break;
-    case 0x81: // Secondary
     case 0x83: // Secondary no-fault
-    case 0x89: // Secondary LE
     case 0x8b: // Secondary no-fault LE
+        if (page_check_range(addr, size, PAGE_READ) == -1) {
+#ifdef DEBUG_ASI
+            dump_asi("read ", last_addr, asi, size, ret);
+#endif
+            return 0;
+        }
+        // Fall through
+    case 0x81: // Secondary
+    case 0x89: // Secondary LE
         // XXX
         break;
     default:
@@ -781,27 +1409,36 @@ void helper_ld_asi(int asi, int size, int sign)
             break;
         }
     }
-    T1 = ret;
+#ifdef DEBUG_ASI
+    dump_asi("read ", last_addr, asi, size, ret);
+#endif
+    return ret;
 }
 
-void helper_st_asi(int asi, int size)
+void helper_st_asi(target_ulong addr, target_ulong val, int asi, int size)
 {
+#ifdef DEBUG_ASI
+    dump_asi("write", addr, asi, size, val);
+#endif
     if (asi < 0x80)
         raise_exception(TT_PRIV_ACT);
 
+    helper_check_align(addr, size - 1);
+    address_mask(env, &addr);
+
     /* Convert to little endian */
     switch (asi) {
     case 0x88: // Primary LE
     case 0x89: // Secondary LE
         switch(size) {
         case 2:
-            T0 = bswap16(T0);
+            addr = bswap16(addr);
             break;
         case 4:
-            T0 = bswap32(T0);
+            addr = bswap32(addr);
             break;
         case 8:
-            T0 = bswap64(T0);
+            addr = bswap64(addr);
             break;
         default:
             break;
@@ -816,17 +1453,17 @@ void helper_st_asi(int asi, int size)
         {
             switch(size) {
             case 1:
-                stb_raw(T0, T1);
+                stb_raw(addr, val);
                 break;
             case 2:
-                stw_raw(T0 & ~1, T1);
+                stw_raw(addr, val);
                 break;
             case 4:
-                stl_raw(T0 & ~3, T1);
+                stl_raw(addr, val);
                 break;
             case 8:
             default:
-                stq_raw(T0 & ~7, T1);
+                stq_raw(addr, val);
                 break;
             }
         }
@@ -841,76 +1478,90 @@ void helper_st_asi(int asi, int size)
     case 0x8a: // Primary no-fault LE, RO
     case 0x8b: // Secondary no-fault LE, RO
     default:
-        do_unassigned_access(T0, 1, 0, 1);
+        do_unassigned_access(addr, 1, 0, 1);
         return;
     }
 }
 
 #else /* CONFIG_USER_ONLY */
 
-void helper_ld_asi(int asi, int size, int sign)
+uint64_t helper_ld_asi(target_ulong addr, int asi, int size, int sign)
 {
     uint64_t ret = 0;
+#if defined(DEBUG_ASI)
+    target_ulong last_addr = addr;
+#endif
 
     if ((asi < 0x80 && (env->pstate & PS_PRIV) == 0)
-        || (asi >= 0x30 && asi < 0x80 && !(env->hpstate & HS_PRIV)))
+        || ((env->def->features & CPU_FEATURE_HYPV)
+            && asi >= 0x30 && asi < 0x80
+            && !(env->hpstate & HS_PRIV)))
         raise_exception(TT_PRIV_ACT);
 
+    helper_check_align(addr, size - 1);
     switch (asi) {
+    case 0x82: // Primary no-fault
+    case 0x8a: // Primary no-fault LE
+        if (cpu_get_phys_page_debug(env, addr) == -1ULL) {
+#ifdef DEBUG_ASI
+            dump_asi("read ", last_addr, asi, size, ret);
+#endif
+            return 0;
+        }
+        // Fall through
     case 0x10: // As if user primary
     case 0x18: // As if user primary LE
     case 0x80: // Primary
-    case 0x82: // Primary no-fault
     case 0x88: // Primary LE
-    case 0x8a: // Primary no-fault LE
         if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
-            if (env->hpstate & HS_PRIV) {
+            if ((env->def->features & CPU_FEATURE_HYPV)
+                && env->hpstate & HS_PRIV) {
                 switch(size) {
                 case 1:
-                    ret = ldub_hypv(T0);
+                    ret = ldub_hypv(addr);
                     break;
                 case 2:
-                    ret = lduw_hypv(T0 & ~1);
+                    ret = lduw_hypv(addr);
                     break;
                 case 4:
-                    ret = ldl_hypv(T0 & ~3);
+                    ret = ldl_hypv(addr);
                     break;
                 default:
                 case 8:
-                    ret = ldq_hypv(T0 & ~7);
+                    ret = ldq_hypv(addr);
                     break;
                 }
             } else {
                 switch(size) {
                 case 1:
-                    ret = ldub_kernel(T0);
+                    ret = ldub_kernel(addr);
                     break;
                 case 2:
-                    ret = lduw_kernel(T0 & ~1);
+                    ret = lduw_kernel(addr);
                     break;
                 case 4:
-                    ret = ldl_kernel(T0 & ~3);
+                    ret = ldl_kernel(addr);
                     break;
                 default:
                 case 8:
-                    ret = ldq_kernel(T0 & ~7);
+                    ret = ldq_kernel(addr);
                     break;
                 }
             }
         } else {
             switch(size) {
             case 1:
-                ret = ldub_user(T0);
+                ret = ldub_user(addr);
                 break;
             case 2:
-                ret = lduw_user(T0 & ~1);
+                ret = lduw_user(addr);
                 break;
             case 4:
-                ret = ldl_user(T0 & ~3);
+                ret = ldl_user(addr);
                 break;
             default:
             case 8:
-                ret = ldq_user(T0 & ~7);
+                ret = ldq_user(addr);
                 break;
             }
         }
@@ -922,32 +1573,42 @@ void helper_ld_asi(int asi, int size, int sign)
         {
             switch(size) {
             case 1:
-                ret = ldub_phys(T0);
+                ret = ldub_phys(addr);
                 break;
             case 2:
-                ret = lduw_phys(T0 & ~1);
+                ret = lduw_phys(addr);
                 break;
             case 4:
-                ret = ldl_phys(T0 & ~3);
+                ret = ldl_phys(addr);
                 break;
             default:
             case 8:
-                ret = ldq_phys(T0 & ~7);
+                ret = ldq_phys(addr);
                 break;
             }
             break;
         }
+    case 0x24: // Nucleus quad LDD 128 bit atomic
+    case 0x2c: // Nucleus quad LDD 128 bit atomic LE
+        //  Only ldda allowed
+        raise_exception(TT_ILL_INSN);
+        return 0;
+    case 0x83: // Secondary no-fault
+    case 0x8b: // Secondary no-fault LE
+        if (cpu_get_phys_page_debug(env, addr) == -1ULL) {
+#ifdef DEBUG_ASI
+            dump_asi("read ", last_addr, asi, size, ret);
+#endif
+            return 0;
+        }
+        // Fall through
     case 0x04: // Nucleus
     case 0x0c: // Nucleus Little Endian (LE)
     case 0x11: // As if user secondary
     case 0x19: // As if user secondary LE
-    case 0x24: // Nucleus quad LDD 128 bit atomic
-    case 0x2c: // Nucleus quad LDD 128 bit atomic
     case 0x4a: // UPA config
     case 0x81: // Secondary
-    case 0x83: // Secondary no-fault
     case 0x89: // Secondary LE
-    case 0x8b: // Secondary no-fault LE
         // XXX
         break;
     case 0x45: // LSU
@@ -955,55 +1616,66 @@ void helper_ld_asi(int asi, int size, int sign)
         break;
     case 0x50: // I-MMU regs
         {
-            int reg = (T0 >> 3) & 0xf;
+            int reg = (addr >> 3) & 0xf;
 
             ret = env->immuregs[reg];
             break;
         }
     case 0x51: // I-MMU 8k TSB pointer
     case 0x52: // I-MMU 64k TSB pointer
-    case 0x55: // I-MMU data access
         // XXX
         break;
+    case 0x55: // I-MMU data access
+        {
+            int reg = (addr >> 3) & 0x3f;
+
+            ret = env->itlb_tte[reg];
+            break;
+        }
     case 0x56: // I-MMU tag read
         {
-            unsigned int i;
+            int reg = (addr >> 3) & 0x3f;
 
-            for (i = 0; i < 64; i++) {
-                // Valid, ctx match, vaddr match
-                if ((env->itlb_tte[i] & 0x8000000000000000ULL) != 0 &&
-                    env->itlb_tag[i] == T0) {
-                    ret = env->itlb_tag[i];
-                    break;
-                }
-            }
+            ret = env->itlb_tag[reg];
             break;
         }
     case 0x58: // D-MMU regs
         {
-            int reg = (T0 >> 3) & 0xf;
+            int reg = (addr >> 3) & 0xf;
+
+            ret = env->dmmuregs[reg];
+            break;
+        }
+    case 0x5d: // D-MMU data access
+        {
+            int reg = (addr >> 3) & 0x3f;
 
-            ret = env->dmmuregs[reg];
+            ret = env->dtlb_tte[reg];
             break;
         }
     case 0x5e: // D-MMU tag read
         {
-            unsigned int i;
+            int reg = (addr >> 3) & 0x3f;
 
-            for (i = 0; i < 64; i++) {
-                // Valid, ctx match, vaddr match
-                if ((env->dtlb_tte[i] & 0x8000000000000000ULL) != 0 &&
-                    env->dtlb_tag[i] == T0) {
-                    ret = env->dtlb_tag[i];
-                    break;
-                }
-            }
+            ret = env->dtlb_tag[reg];
             break;
         }
+    case 0x46: // D-cache data
+    case 0x47: // D-cache tag access
+    case 0x4b: // E-cache error enable
+    case 0x4c: // E-cache asynchronous fault status
+    case 0x4d: // E-cache asynchronous fault address
+    case 0x4e: // E-cache tag data
+    case 0x66: // I-cache instruction access
+    case 0x67: // I-cache tag access
+    case 0x6e: // I-cache predecode
+    case 0x6f: // I-cache LRU etc.
+    case 0x76: // E-cache tag
+    case 0x7e: // E-cache tag
+        break;
     case 0x59: // D-MMU 8k TSB pointer
     case 0x5a: // D-MMU 64k TSB pointer
     case 0x5b: // D-MMU data pointer
-    case 0x5d: // D-MMU data access
     case 0x48: // Interrupt dispatch, RO
     case 0x49: // Interrupt data receive
     case 0x7f: // Incoming interrupt vector, RO
@@ -1015,7 +1687,7 @@ void helper_ld_asi(int asi, int size, int sign)
     case 0x5f: // D-MMU demap, WO
     case 0x77: // Interrupt vector, WO
     default:
-        do_unassigned_access(T0, 0, 0, 1);
+        do_unassigned_access(addr, 0, 0, 1);
         ret = 0;
         break;
     }
@@ -1064,15 +1736,24 @@ void helper_ld_asi(int asi, int size, int sign)
             break;
         }
     }
-    T1 = ret;
+#ifdef DEBUG_ASI
+    dump_asi("read ", last_addr, asi, size, ret);
+#endif
+    return ret;
 }
 
-void helper_st_asi(int asi, int size)
+void helper_st_asi(target_ulong addr, target_ulong val, int asi, int size)
 {
+#ifdef DEBUG_ASI
+    dump_asi("write", addr, asi, size, val);
+#endif
     if ((asi < 0x80 && (env->pstate & PS_PRIV) == 0)
-        || (asi >= 0x30 && asi < 0x80 && !(env->hpstate & HS_PRIV)))
+        || ((env->def->features & CPU_FEATURE_HYPV)
+            && asi >= 0x30 && asi < 0x80
+            && !(env->hpstate & HS_PRIV)))
         raise_exception(TT_PRIV_ACT);
 
+    helper_check_align(addr, size - 1);
     /* Convert to little endian */
     switch (asi) {
     case 0x0c: // Nucleus Little Endian (LE)
@@ -1084,13 +1765,13 @@ void helper_st_asi(int asi, int size)
     case 0x89: // Secondary LE
         switch(size) {
         case 2:
-            T0 = bswap16(T0);
+            addr = bswap16(addr);
             break;
         case 4:
-            T0 = bswap32(T0);
+            addr = bswap32(addr);
             break;
         case 8:
-            T0 = bswap64(T0);
+            addr = bswap64(addr);
             break;
         default:
             break;
@@ -1105,53 +1786,54 @@ void helper_st_asi(int asi, int size)
     case 0x80: // Primary
     case 0x88: // Primary LE
         if ((asi & 0x80) && (env->pstate & PS_PRIV)) {
-            if (env->hpstate & HS_PRIV) {
+            if ((env->def->features & CPU_FEATURE_HYPV)
+                && env->hpstate & HS_PRIV) {
                 switch(size) {
                 case 1:
-                    stb_hypv(T0, T1);
+                    stb_hypv(addr, val);
                     break;
                 case 2:
-                    stw_hypv(T0 & ~1, T1);
+                    stw_hypv(addr, val);
                     break;
                 case 4:
-                    stl_hypv(T0 & ~3, T1);
+                    stl_hypv(addr, val);
                     break;
                 case 8:
                 default:
-                    stq_hypv(T0 & ~7, T1);
+                    stq_hypv(addr, val);
                     break;
                 }
             } else {
                 switch(size) {
                 case 1:
-                    stb_kernel(T0, T1);
+                    stb_kernel(addr, val);
                     break;
                 case 2:
-                    stw_kernel(T0 & ~1, T1);
+                    stw_kernel(addr, val);
                     break;
                 case 4:
-                    stl_kernel(T0 & ~3, T1);
+                    stl_kernel(addr, val);
                     break;
                 case 8:
                 default:
-                    stq_kernel(T0 & ~7, T1);
+                    stq_kernel(addr, val);
                     break;
                 }
             }
         } else {
             switch(size) {
             case 1:
-                stb_user(T0, T1);
+                stb_user(addr, val);
                 break;
             case 2:
-                stw_user(T0 & ~1, T1);
+                stw_user(addr, val);
                 break;
             case 4:
-                stl_user(T0 & ~3, T1);
+                stl_user(addr, val);
                 break;
             case 8:
             default:
-                stq_user(T0 & ~7, T1);
+                stq_user(addr, val);
                 break;
             }
         }
@@ -1163,27 +1845,30 @@ void helper_st_asi(int asi, int size)
         {
             switch(size) {
             case 1:
-                stb_phys(T0, T1);
+                stb_phys(addr, val);
                 break;
             case 2:
-                stw_phys(T0 & ~1, T1);
+                stw_phys(addr, val);
                 break;
             case 4:
-                stl_phys(T0 & ~3, T1);
+                stl_phys(addr, val);
                 break;
             case 8:
             default:
-                stq_phys(T0 & ~7, T1);
+                stq_phys(addr, val);
                 break;
             }
         }
         return;
+    case 0x24: // Nucleus quad LDD 128 bit atomic
+    case 0x2c: // Nucleus quad LDD 128 bit atomic LE
+        //  Only ldda allowed
+        raise_exception(TT_ILL_INSN);
+        return;
     case 0x04: // Nucleus
     case 0x0c: // Nucleus Little Endian (LE)
     case 0x11: // As if user secondary
     case 0x19: // As if user secondary LE
-    case 0x24: // Nucleus quad LDD 128 bit atomic
-    case 0x2c: // Nucleus quad LDD 128 bit atomic
     case 0x4a: // UPA config
     case 0x81: // Secondary
     case 0x89: // Secondary LE
@@ -1194,11 +1879,12 @@ void helper_st_asi(int asi, int size)
             uint64_t oldreg;
 
             oldreg = env->lsu;
-            env->lsu = T1 & (DMMU_E | IMMU_E);
+            env->lsu = val & (DMMU_E | IMMU_E);
             // Mappings generated during D/I MMU disabled mode are
             // invalid in normal mode
             if (oldreg != env->lsu) {
-                DPRINTF_MMU("LSU change: 0x%" PRIx64 " -> 0x%" PRIx64 "\n", oldreg, env->lsu);
+                DPRINTF_MMU("LSU change: 0x%" PRIx64 " -> 0x%" PRIx64 "\n",
+                            oldreg, env->lsu);
 #ifdef DEBUG_MMU
                 dump_mmu(env);
 #endif
@@ -1208,7 +1894,7 @@ void helper_st_asi(int asi, int size)
         }
     case 0x50: // I-MMU regs
         {
-            int reg = (T0 >> 3) & 0xf;
+            int reg = (addr >> 3) & 0xf;
             uint64_t oldreg;
 
             oldreg = env->immuregs[reg];
@@ -1222,17 +1908,18 @@ void helper_st_asi(int asi, int size)
             case 8:
                 return;
             case 3: // SFSR
-                if ((T1 & 1) == 0)
-                    T1 = 0; // Clear SFSR
+                if ((val & 1) == 0)
+                    val = 0; // Clear SFSR
                 break;
             case 5: // TSB access
             case 6: // Tag access
             default:
                 break;
             }
-            env->immuregs[reg] = T1;
+            env->immuregs[reg] = val;
             if (oldreg != env->immuregs[reg]) {
-                DPRINTF_MMU("mmu change reg[%d]: 0x%08" PRIx64 " -> 0x%08" PRIx64 "\n", reg, oldreg, env->immuregs[reg]);
+                DPRINTF_MMU("mmu change reg[%d]: 0x%08" PRIx64 " -> 0x%08"
+                            PRIx64 "\n", reg, oldreg, env->immuregs[reg]);
             }
 #ifdef DEBUG_MMU
             dump_mmu(env);
@@ -1247,7 +1934,7 @@ void helper_st_asi(int asi, int size)
             for (i = 0; i < 64; i++) {
                 if ((env->itlb_tte[i] & 0x8000000000000000ULL) == 0) {
                     env->itlb_tag[i] = env->immuregs[6];
-                    env->itlb_tte[i] = T1;
+                    env->itlb_tte[i] = val;
                     return;
                 }
             }
@@ -1255,7 +1942,7 @@ void helper_st_asi(int asi, int size)
             for (i = 0; i < 64; i++) {
                 if ((env->itlb_tte[i] & 0x40) == 0) {
                     env->itlb_tag[i] = env->immuregs[6];
-                    env->itlb_tte[i] = T1;
+                    env->itlb_tte[i] = val;
                     return;
                 }
             }
@@ -1264,10 +1951,10 @@ void helper_st_asi(int asi, int size)
         }
     case 0x55: // I-MMU data access
         {
-            unsigned int i = (T0 >> 3) & 0x3f;
+            unsigned int i = (addr >> 3) & 0x3f;
 
             env->itlb_tag[i] = env->immuregs[6];
-            env->itlb_tte[i] = T1;
+            env->itlb_tte[i] = val;
             return;
         }
     case 0x57: // I-MMU demap
@@ -1275,7 +1962,7 @@ void helper_st_asi(int asi, int size)
         return;
     case 0x58: // D-MMU regs
         {
-            int reg = (T0 >> 3) & 0xf;
+            int reg = (addr >> 3) & 0xf;
             uint64_t oldreg;
 
             oldreg = env->dmmuregs[reg];
@@ -1284,11 +1971,11 @@ void helper_st_asi(int asi, int size)
             case 4:
                 return;
             case 3: // SFSR
-                if ((T1 & 1) == 0) {
-                    T1 = 0; // Clear SFSR, Fault address
+                if ((val & 1) == 0) {
+                    val = 0; // Clear SFSR, Fault address
                     env->dmmuregs[4] = 0;
                 }
-                env->dmmuregs[reg] = T1;
+                env->dmmuregs[reg] = val;
                 break;
             case 1: // Primary context
             case 2: // Secondary context
@@ -1299,9 +1986,10 @@ void helper_st_asi(int asi, int size)
             default:
                 break;
             }
-            env->dmmuregs[reg] = T1;
+            env->dmmuregs[reg] = val;
             if (oldreg != env->dmmuregs[reg]) {
-                DPRINTF_MMU("mmu change reg[%d]: 0x%08" PRIx64 " -> 0x%08" PRIx64 "\n", reg, oldreg, env->dmmuregs[reg]);
+                DPRINTF_MMU("mmu change reg[%d]: 0x%08" PRIx64 " -> 0x%08"
+                            PRIx64 "\n", reg, oldreg, env->dmmuregs[reg]);
             }
 #ifdef DEBUG_MMU
             dump_mmu(env);
@@ -1316,7 +2004,7 @@ void helper_st_asi(int asi, int size)
             for (i = 0; i < 64; i++) {
                 if ((env->dtlb_tte[i] & 0x8000000000000000ULL) == 0) {
                     env->dtlb_tag[i] = env->dmmuregs[6];
-                    env->dtlb_tte[i] = T1;
+                    env->dtlb_tte[i] = val;
                     return;
                 }
             }
@@ -1324,7 +2012,7 @@ void helper_st_asi(int asi, int size)
             for (i = 0; i < 64; i++) {
                 if ((env->dtlb_tte[i] & 0x40) == 0) {
                     env->dtlb_tag[i] = env->dmmuregs[6];
-                    env->dtlb_tte[i] = T1;
+                    env->dtlb_tte[i] = val;
                     return;
                 }
             }
@@ -1333,16 +2021,29 @@ void helper_st_asi(int asi, int size)
         }
     case 0x5d: // D-MMU data access
         {
-            unsigned int i = (T0 >> 3) & 0x3f;
+            unsigned int i = (addr >> 3) & 0x3f;
 
             env->dtlb_tag[i] = env->dmmuregs[6];
-            env->dtlb_tte[i] = T1;
+            env->dtlb_tte[i] = val;
             return;
         }
     case 0x5f: // D-MMU demap
     case 0x49: // Interrupt data receive
         // XXX
         return;
+    case 0x46: // D-cache data
+    case 0x47: // D-cache tag access
+    case 0x4b: // E-cache error enable
+    case 0x4c: // E-cache asynchronous fault status
+    case 0x4d: // E-cache asynchronous fault address
+    case 0x4e: // E-cache tag data
+    case 0x66: // I-cache instruction access
+    case 0x67: // I-cache tag access
+    case 0x6e: // I-cache predecode
+    case 0x6f: // I-cache LRU etc.
+    case 0x76: // E-cache tag
+    case 0x7e: // E-cache tag
+        return;
     case 0x51: // I-MMU 8k TSB pointer, RO
     case 0x52: // I-MMU 64k TSB pointer, RO
     case 0x56: // I-MMU tag read, RO
@@ -1357,17 +2058,65 @@ void helper_st_asi(int asi, int size)
     case 0x8a: // Primary no-fault LE, RO
     case 0x8b: // Secondary no-fault LE, RO
     default:
-        do_unassigned_access(T0, 1, 0, 1);
+        do_unassigned_access(addr, 1, 0, 1);
         return;
     }
 }
 #endif /* CONFIG_USER_ONLY */
 
-void helper_ldf_asi(int asi, int size, int rd)
+void helper_ldda_asi(target_ulong addr, int asi, int rd)
+{
+    if ((asi < 0x80 && (env->pstate & PS_PRIV) == 0)
+        || ((env->def->features & CPU_FEATURE_HYPV)
+            && asi >= 0x30 && asi < 0x80
+            && !(env->hpstate & HS_PRIV)))
+        raise_exception(TT_PRIV_ACT);
+
+    switch (asi) {
+    case 0x24: // Nucleus quad LDD 128 bit atomic
+    case 0x2c: // Nucleus quad LDD 128 bit atomic LE
+        helper_check_align(addr, 0xf);
+        if (rd == 0) {
+            env->gregs[1] = ldq_kernel(addr + 8);
+            if (asi == 0x2c)
+                bswap64s(&env->gregs[1]);
+        } else if (rd < 8) {
+            env->gregs[rd] = ldq_kernel(addr);
+            env->gregs[rd + 1] = ldq_kernel(addr + 8);
+            if (asi == 0x2c) {
+                bswap64s(&env->gregs[rd]);
+                bswap64s(&env->gregs[rd + 1]);
+            }
+        } else {
+            env->regwptr[rd] = ldq_kernel(addr);
+            env->regwptr[rd + 1] = ldq_kernel(addr + 8);
+            if (asi == 0x2c) {
+                bswap64s(&env->regwptr[rd]);
+                bswap64s(&env->regwptr[rd + 1]);
+            }
+        }
+        break;
+    default:
+        helper_check_align(addr, 0x3);
+        if (rd == 0)
+            env->gregs[1] = helper_ld_asi(addr + 4, asi, 4, 0);
+        else if (rd < 8) {
+            env->gregs[rd] = helper_ld_asi(addr, asi, 4, 0);
+            env->gregs[rd + 1] = helper_ld_asi(addr + 4, asi, 4, 0);
+        } else {
+            env->regwptr[rd] = helper_ld_asi(addr, asi, 4, 0);
+            env->regwptr[rd + 1] = helper_ld_asi(addr + 4, asi, 4, 0);
+        }
+        break;
+    }
+}
+
+void helper_ldf_asi(target_ulong addr, int asi, int size, int rd)
 {
-    target_ulong tmp_T0 = T0, tmp_T1 = T1;
     unsigned int i;
+    target_ulong val;
 
+    helper_check_align(addr, 3);
     switch (asi) {
     case 0xf0: // Block load primary
     case 0xf1: // Block load secondary
@@ -1377,46 +2126,39 @@ void helper_ldf_asi(int asi, int size, int rd)
             raise_exception(TT_ILL_INSN);
             return;
         }
-        if (T0 & 0x3f) {
-            raise_exception(TT_UNALIGNED);
-            return;
-        }
+        helper_check_align(addr, 0x3f);
         for (i = 0; i < 16; i++) {
-            helper_ld_asi(asi & 0x8f, 4, 0);
-            *(uint32_t *)&env->fpr[rd++] = T1;
-            T0 += 4;
+            *(uint32_t *)&env->fpr[rd++] = helper_ld_asi(addr, asi & 0x8f, 4,
+                                                         0);
+            addr += 4;
         }
-        T0 = tmp_T0;
-        T1 = tmp_T1;
 
         return;
     default:
         break;
     }
 
-    helper_ld_asi(asi, size, 0);
+    val = helper_ld_asi(addr, asi, size, 0);
     switch(size) {
     default:
     case 4:
-        *((uint32_t *)&FT0) = T1;
+        *((uint32_t *)&env->fpr[rd]) = val;
         break;
     case 8:
-        *((int64_t *)&DT0) = T1;
+        *((int64_t *)&DT0) = val;
         break;
-#if defined(CONFIG_USER_ONLY)
     case 16:
         // XXX
         break;
-#endif
     }
-    T1 = tmp_T1;
 }
 
-void helper_stf_asi(int asi, int size, int rd)
+void helper_stf_asi(target_ulong addr, int asi, int size, int rd)
 {
-    target_ulong tmp_T0 = T0, tmp_T1 = T1;
     unsigned int i;
+    target_ulong val = 0;
 
+    helper_check_align(addr, 3);
     switch (asi) {
     case 0xf0: // Block store primary
     case 0xf1: // Block store secondary
@@ -1426,17 +2168,12 @@ void helper_stf_asi(int asi, int size, int rd)
             raise_exception(TT_ILL_INSN);
             return;
         }
-        if (T0 & 0x3f) {
-            raise_exception(TT_UNALIGNED);
-            return;
-        }
+        helper_check_align(addr, 0x3f);
         for (i = 0; i < 16; i++) {
-            T1 = *(uint32_t *)&env->fpr[rd++];
-            helper_st_asi(asi & 0x8f, 4);
-            T0 += 4;
+            val = *(uint32_t *)&env->fpr[rd++];
+            helper_st_asi(addr, val, asi & 0x8f, 4);
+            addr += 4;
         }
-        T0 = tmp_T0;
-        T1 = tmp_T1;
 
         return;
     default:
@@ -1446,25 +2183,45 @@ void helper_stf_asi(int asi, int size, int rd)
     switch(size) {
     default:
     case 4:
-        T1 = *((uint32_t *)&FT0);
+        val = *((uint32_t *)&env->fpr[rd]);
         break;
     case 8:
-        T1 = *((int64_t *)&DT0);
+        val = *((int64_t *)&DT0);
         break;
-#if defined(CONFIG_USER_ONLY)
     case 16:
         // XXX
         break;
-#endif
     }
-    helper_st_asi(asi, size);
-    T1 = tmp_T1;
+    helper_st_asi(addr, val, asi, size);
 }
 
+target_ulong helper_cas_asi(target_ulong addr, target_ulong val1,
+                            target_ulong val2, uint32_t asi)
+{
+    target_ulong ret;
+
+    val2 &= 0xffffffffUL;
+    ret = helper_ld_asi(addr, asi, 4, 0);
+    ret &= 0xffffffffUL;
+    if (val2 == ret)
+        helper_st_asi(addr, val1 & 0xffffffffUL, asi, 4);
+    return ret;
+}
+
+target_ulong helper_casx_asi(target_ulong addr, target_ulong val1,
+                             target_ulong val2, uint32_t asi)
+{
+    target_ulong ret;
+
+    ret = helper_ld_asi(addr, asi, 8, 0);
+    if (val2 == ret)
+        helper_st_asi(addr, val1, asi, 8);
+    return ret;
+}
 #endif /* TARGET_SPARC64 */
 
 #ifndef TARGET_SPARC64
-void helper_rett()
+void helper_rett(void)
 {
     unsigned int cwp;
 
@@ -1472,7 +2229,7 @@ void helper_rett()
         raise_exception(TT_ILL_INSN);
 
     env->psret = 1;
-    cwp = (env->cwp + 1) & (NWINDOWS - 1);
+    cwp = cpu_cwp_inc(env, env->cwp + 1) ;
     if (env->wim & (1 << cwp)) {
         raise_exception(TT_WIN_UNF);
     }
@@ -1481,9 +2238,176 @@ void helper_rett()
 }
 #endif
 
-void helper_ldfsr(void)
+target_ulong helper_udiv(target_ulong a, target_ulong b)
+{
+    uint64_t x0;
+    uint32_t x1;
+
+    x0 = (a & 0xffffffff) | ((int64_t) (env->y) << 32);
+    x1 = b;
+
+    if (x1 == 0) {
+        raise_exception(TT_DIV_ZERO);
+    }
+
+    x0 = x0 / x1;
+    if (x0 > 0xffffffff) {
+        env->cc_src2 = 1;
+        return 0xffffffff;
+    } else {
+        env->cc_src2 = 0;
+        return x0;
+    }
+}
+
+target_ulong helper_sdiv(target_ulong a, target_ulong b)
+{
+    int64_t x0;
+    int32_t x1;
+
+    x0 = (a & 0xffffffff) | ((int64_t) (env->y) << 32);
+    x1 = b;
+
+    if (x1 == 0) {
+        raise_exception(TT_DIV_ZERO);
+    }
+
+    x0 = x0 / x1;
+    if ((int32_t) x0 != x0) {
+        env->cc_src2 = 1;
+        return x0 < 0? 0x80000000: 0x7fffffff;
+    } else {
+        env->cc_src2 = 0;
+        return x0;
+    }
+}
+
+void helper_stdf(target_ulong addr, int mem_idx)
+{
+    helper_check_align(addr, 7);
+#if !defined(CONFIG_USER_ONLY)
+    switch (mem_idx) {
+    case 0:
+        stfq_user(addr, DT0);
+        break;
+    case 1:
+        stfq_kernel(addr, DT0);
+        break;
+#ifdef TARGET_SPARC64
+    case 2:
+        stfq_hypv(addr, DT0);
+        break;
+#endif
+    default:
+        break;
+    }
+#else
+    address_mask(env, &addr);
+    stfq_raw(addr, DT0);
+#endif
+}
+
+void helper_lddf(target_ulong addr, int mem_idx)
+{
+    helper_check_align(addr, 7);
+#if !defined(CONFIG_USER_ONLY)
+    switch (mem_idx) {
+    case 0:
+        DT0 = ldfq_user(addr);
+        break;
+    case 1:
+        DT0 = ldfq_kernel(addr);
+        break;
+#ifdef TARGET_SPARC64
+    case 2:
+        DT0 = ldfq_hypv(addr);
+        break;
+#endif
+    default:
+        break;
+    }
+#else
+    address_mask(env, &addr);
+    DT0 = ldfq_raw(addr);
+#endif
+}
+
+void helper_ldqf(target_ulong addr, int mem_idx)
+{
+    // XXX add 128 bit load
+    CPU_QuadU u;
+
+    helper_check_align(addr, 7);
+#if !defined(CONFIG_USER_ONLY)
+    switch (mem_idx) {
+    case 0:
+        u.ll.upper = ldq_user(addr);
+        u.ll.lower = ldq_user(addr + 8);
+        QT0 = u.q;
+        break;
+    case 1:
+        u.ll.upper = ldq_kernel(addr);
+        u.ll.lower = ldq_kernel(addr + 8);
+        QT0 = u.q;
+        break;
+#ifdef TARGET_SPARC64
+    case 2:
+        u.ll.upper = ldq_hypv(addr);
+        u.ll.lower = ldq_hypv(addr + 8);
+        QT0 = u.q;
+        break;
+#endif
+    default:
+        break;
+    }
+#else
+    address_mask(env, &addr);
+    u.ll.upper = ldq_raw(addr);
+    u.ll.lower = ldq_raw((addr + 8) & 0xffffffffULL);
+    QT0 = u.q;
+#endif
+}
+
+void helper_stqf(target_ulong addr, int mem_idx)
+{
+    // XXX add 128 bit store
+    CPU_QuadU u;
+
+    helper_check_align(addr, 7);
+#if !defined(CONFIG_USER_ONLY)
+    switch (mem_idx) {
+    case 0:
+        u.q = QT0;
+        stq_user(addr, u.ll.upper);
+        stq_user(addr + 8, u.ll.lower);
+        break;
+    case 1:
+        u.q = QT0;
+        stq_kernel(addr, u.ll.upper);
+        stq_kernel(addr + 8, u.ll.lower);
+        break;
+#ifdef TARGET_SPARC64
+    case 2:
+        u.q = QT0;
+        stq_hypv(addr, u.ll.upper);
+        stq_hypv(addr + 8, u.ll.lower);
+        break;
+#endif
+    default:
+        break;
+    }
+#else
+    u.q = QT0;
+    address_mask(env, &addr);
+    stq_raw(addr, u.ll.upper);
+    stq_raw((addr + 8) & 0xffffffffULL, u.ll.lower);
+#endif
+}
+
+static inline void set_fsr(void)
 {
     int rnd_mode;
+
     switch (env->fsr & FSR_RD_MASK) {
     case FSR_RD_NEAREST:
         rnd_mode = float_round_nearest_even;
@@ -1502,31 +2426,189 @@ void helper_ldfsr(void)
     set_float_rounding_mode(rnd_mode, &env->fp_status);
 }
 
-void helper_debug()
+void helper_ldfsr(uint32_t new_fsr)
+{
+    env->fsr = (new_fsr & FSR_LDFSR_MASK) | (env->fsr & FSR_LDFSR_OLDMASK);
+    set_fsr();
+}
+
+#ifdef TARGET_SPARC64
+void helper_ldxfsr(uint64_t new_fsr)
+{
+    env->fsr = (new_fsr & FSR_LDXFSR_MASK) | (env->fsr & FSR_LDXFSR_OLDMASK);
+    set_fsr();
+}
+#endif
+
+void helper_debug(void)
 {
     env->exception_index = EXCP_DEBUG;
     cpu_loop_exit();
 }
 
 #ifndef TARGET_SPARC64
-void do_wrpsr()
+/* XXX: use another pointer for %iN registers to avoid slow wrapping
+   handling ? */
+void helper_save(void)
+{
+    uint32_t cwp;
+
+    cwp = cpu_cwp_dec(env, env->cwp - 1);
+    if (env->wim & (1 << cwp)) {
+        raise_exception(TT_WIN_OVF);
+    }
+    set_cwp(cwp);
+}
+
+void helper_restore(void)
+{
+    uint32_t cwp;
+
+    cwp = cpu_cwp_inc(env, env->cwp + 1);
+    if (env->wim & (1 << cwp)) {
+        raise_exception(TT_WIN_UNF);
+    }
+    set_cwp(cwp);
+}
+
+void helper_wrpsr(target_ulong new_psr)
 {
-    if ((T0 & PSR_CWP) >= NWINDOWS)
+    if ((new_psr & PSR_CWP) >= env->nwindows)
         raise_exception(TT_ILL_INSN);
     else
-        PUT_PSR(env, T0);
+        PUT_PSR(env, new_psr);
 }
 
-void do_rdpsr()
+target_ulong helper_rdpsr(void)
 {
-    T0 = GET_PSR(env);
+    return GET_PSR(env);
 }
 
 #else
+/* XXX: use another pointer for %iN registers to avoid slow wrapping
+   handling ? */
+void helper_save(void)
+{
+    uint32_t cwp;
+
+    cwp = cpu_cwp_dec(env, env->cwp - 1);
+    if (env->cansave == 0) {
+        raise_exception(TT_SPILL | (env->otherwin != 0 ?
+                                    (TT_WOTHER | ((env->wstate & 0x38) >> 1)):
+                                    ((env->wstate & 0x7) << 2)));
+    } else {
+        if (env->cleanwin - env->canrestore == 0) {
+            // XXX Clean windows without trap
+            raise_exception(TT_CLRWIN);
+        } else {
+            env->cansave--;
+            env->canrestore++;
+            set_cwp(cwp);
+        }
+    }
+}
+
+void helper_restore(void)
+{
+    uint32_t cwp;
+
+    cwp = cpu_cwp_inc(env, env->cwp + 1);
+    if (env->canrestore == 0) {
+        raise_exception(TT_FILL | (env->otherwin != 0 ?
+                                   (TT_WOTHER | ((env->wstate & 0x38) >> 1)):
+                                   ((env->wstate & 0x7) << 2)));
+    } else {
+        env->cansave++;
+        env->canrestore--;
+        set_cwp(cwp);
+    }
+}
+
+void helper_flushw(void)
+{
+    if (env->cansave != env->nwindows - 2) {
+        raise_exception(TT_SPILL | (env->otherwin != 0 ?
+                                    (TT_WOTHER | ((env->wstate & 0x38) >> 1)):
+                                    ((env->wstate & 0x7) << 2)));
+    }
+}
+
+void helper_saved(void)
+{
+    env->cansave++;
+    if (env->otherwin == 0)
+        env->canrestore--;
+    else
+        env->otherwin--;
+}
+
+void helper_restored(void)
+{
+    env->canrestore++;
+    if (env->cleanwin < env->nwindows - 1)
+        env->cleanwin++;
+    if (env->otherwin == 0)
+        env->cansave--;
+    else
+        env->otherwin--;
+}
+
+target_ulong helper_rdccr(void)
+{
+    return GET_CCR(env);
+}
+
+void helper_wrccr(target_ulong new_ccr)
+{
+    PUT_CCR(env, new_ccr);
+}
+
+// CWP handling is reversed in V9, but we still use the V8 register
+// order.
+target_ulong helper_rdcwp(void)
+{
+    return GET_CWP64(env);
+}
+
+void helper_wrcwp(target_ulong new_cwp)
+{
+    PUT_CWP64(env, new_cwp);
+}
+
+// This function uses non-native bit order
+#define GET_FIELD(X, FROM, TO)                                  \
+    ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
+
+// This function uses the order in the manuals, i.e. bit 0 is 2^0
+#define GET_FIELD_SP(X, FROM, TO)               \
+    GET_FIELD(X, 63 - (TO), 63 - (FROM))
+
+target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
+{
+    return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
+        (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
+        (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
+        (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
+        (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
+        (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
+        (((pixel_addr >> 55) & 1) << 4) |
+        (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
+        GET_FIELD_SP(pixel_addr, 11, 12);
+}
 
-void do_popc()
+target_ulong helper_alignaddr(target_ulong addr, target_ulong offset)
 {
-    T0 = ctpop64(T1);
+    uint64_t tmp;
+
+    tmp = addr + offset;
+    env->gsr &= ~7ULL;
+    env->gsr |= tmp & 7ULL;
+    return tmp & ~7ULL;
+}
+
+target_ulong helper_popc(target_ulong val)
+{
+    return ctpop64(val);
 }
 
 static inline uint64_t *get_gregset(uint64_t pstate)
@@ -1561,74 +2643,121 @@ static inline void change_pstate(uint64_t new_pstate)
     env->pstate = new_pstate;
 }
 
-void do_wrpstate(void)
+void helper_wrpstate(target_ulong new_state)
 {
-    change_pstate(T0 & 0xf3f);
+    if (!(env->def->features & CPU_FEATURE_GL))
+        change_pstate(new_state & 0xf3f);
 }
 
-void do_done(void)
+void helper_done(void)
 {
+    env->pc = env->tsptr->tpc;
+    env->npc = env->tsptr->tnpc + 4;
+    PUT_CCR(env, env->tsptr->tstate >> 32);
+    env->asi = (env->tsptr->tstate >> 24) & 0xff;
+    change_pstate((env->tsptr->tstate >> 8) & 0xf3f);
+    PUT_CWP64(env, env->tsptr->tstate & 0xff);
     env->tl--;
-    env->pc = env->tnpc[env->tl];
-    env->npc = env->tnpc[env->tl] + 4;
-    PUT_CCR(env, env->tstate[env->tl] >> 32);
-    env->asi = (env->tstate[env->tl] >> 24) & 0xff;
-    change_pstate((env->tstate[env->tl] >> 8) & 0xf3f);
-    PUT_CWP64(env, env->tstate[env->tl] & 0xff);
+    env->tsptr = &env->ts[env->tl & MAXTL_MASK];
 }
 
-void do_retry(void)
+void helper_retry(void)
 {
+    env->pc = env->tsptr->tpc;
+    env->npc = env->tsptr->tnpc;
+    PUT_CCR(env, env->tsptr->tstate >> 32);
+    env->asi = (env->tsptr->tstate >> 24) & 0xff;
+    change_pstate((env->tsptr->tstate >> 8) & 0xf3f);
+    PUT_CWP64(env, env->tsptr->tstate & 0xff);
     env->tl--;
-    env->pc = env->tpc[env->tl];
-    env->npc = env->tnpc[env->tl];
-    PUT_CCR(env, env->tstate[env->tl] >> 32);
-    env->asi = (env->tstate[env->tl] >> 24) & 0xff;
-    change_pstate((env->tstate[env->tl] >> 8) & 0xf3f);
-    PUT_CWP64(env, env->tstate[env->tl] & 0xff);
+    env->tsptr = &env->ts[env->tl & MAXTL_MASK];
 }
-#endif
 
-void set_cwp(int new_cwp)
+void helper_set_softint(uint64_t value)
 {
-    /* put the modified wrap registers at their proper location */
-    if (env->cwp == (NWINDOWS - 1))
-        memcpy32(env->regbase, env->regbase + NWINDOWS * 16);
-    env->cwp = new_cwp;
-    /* put the wrap registers at their temporary location */
-    if (new_cwp == (NWINDOWS - 1))
-        memcpy32(env->regbase + NWINDOWS * 16, env->regbase);
-    env->regwptr = env->regbase + (new_cwp * 16);
-    REGWPTR = env->regwptr;
+    env->softint |= (uint32_t)value;
 }
 
-void cpu_set_cwp(CPUState *env1, int new_cwp)
+void helper_clear_softint(uint64_t value)
 {
-    CPUState *saved_env;
-#ifdef reg_REGWPTR
-    target_ulong *saved_regwptr;
-#endif
+    env->softint &= (uint32_t)~value;
+}
 
-    saved_env = env;
-#ifdef reg_REGWPTR
-    saved_regwptr = REGWPTR;
-#endif
-    env = env1;
-    set_cwp(new_cwp);
-    env = saved_env;
-#ifdef reg_REGWPTR
-    REGWPTR = saved_regwptr;
+void helper_write_softint(uint64_t value)
+{
+    env->softint = (uint32_t)value;
+}
 #endif
+
+void helper_flush(target_ulong addr)
+{
+    addr &= ~7;
+    tb_invalidate_page_range(addr, addr + 8);
 }
 
 #ifdef TARGET_SPARC64
-void do_interrupt(int intno)
+#ifdef DEBUG_PCALL
+static const char * const excp_names[0x80] = {
+    [TT_TFAULT] = "Instruction Access Fault",
+    [TT_TMISS] = "Instruction Access MMU Miss",
+    [TT_CODE_ACCESS] = "Instruction Access Error",
+    [TT_ILL_INSN] = "Illegal Instruction",
+    [TT_PRIV_INSN] = "Privileged Instruction",
+    [TT_NFPU_INSN] = "FPU Disabled",
+    [TT_FP_EXCP] = "FPU Exception",
+    [TT_TOVF] = "Tag Overflow",
+    [TT_CLRWIN] = "Clean Windows",
+    [TT_DIV_ZERO] = "Division By Zero",
+    [TT_DFAULT] = "Data Access Fault",
+    [TT_DMISS] = "Data Access MMU Miss",
+    [TT_DATA_ACCESS] = "Data Access Error",
+    [TT_DPROT] = "Data Protection Error",
+    [TT_UNALIGNED] = "Unaligned Memory Access",
+    [TT_PRIV_ACT] = "Privileged Action",
+    [TT_EXTINT | 0x1] = "External Interrupt 1",
+    [TT_EXTINT | 0x2] = "External Interrupt 2",
+    [TT_EXTINT | 0x3] = "External Interrupt 3",
+    [TT_EXTINT | 0x4] = "External Interrupt 4",
+    [TT_EXTINT | 0x5] = "External Interrupt 5",
+    [TT_EXTINT | 0x6] = "External Interrupt 6",
+    [TT_EXTINT | 0x7] = "External Interrupt 7",
+    [TT_EXTINT | 0x8] = "External Interrupt 8",
+    [TT_EXTINT | 0x9] = "External Interrupt 9",
+    [TT_EXTINT | 0xa] = "External Interrupt 10",
+    [TT_EXTINT | 0xb] = "External Interrupt 11",
+    [TT_EXTINT | 0xc] = "External Interrupt 12",
+    [TT_EXTINT | 0xd] = "External Interrupt 13",
+    [TT_EXTINT | 0xe] = "External Interrupt 14",
+    [TT_EXTINT | 0xf] = "External Interrupt 15",
+};
+#endif
+
+void do_interrupt(CPUState *env)
 {
+    int intno = env->exception_index;
+
 #ifdef DEBUG_PCALL
     if (loglevel & CPU_LOG_INT) {
         static int count;
-        fprintf(logfile, "%6d: v=%04x pc=%016" PRIx64 " npc=%016" PRIx64 " SP=%016" PRIx64 "\n",
-                count, intno,
+        const char *name;
+
+        if (intno < 0 || intno >= 0x180)
+            name = "Unknown";
+        else if (intno >= 0x100)
+            name = "Trap Instruction";
+        else if (intno >= 0xc0)
+            name = "Window Fill";
+        else if (intno >= 0x80)
+            name = "Window Spill";
+        else {
+            name = excp_names[intno];
+            if (!name)
+                name = "Unknown";
+        }
+
+        fprintf(logfile, "%6d: %s (v=%04x) pc=%016" PRIx64 " npc=%016" PRIx64
+                " SP=%016" PRIx64 "\n",
+                count, name, intno,
                 env->pc,
                 env->npc, env->regwptr[6]);
         cpu_dump_state(env, logfile, fprintf, 0);
@@ -1649,47 +2778,112 @@ void do_interrupt(int intno)
     }
 #endif
 #if !defined(CONFIG_USER_ONLY)
-    if (env->tl == MAXTL) {
-        cpu_abort(env, "Trap 0x%04x while trap level is MAXTL, Error state", env->exception_index);
+    if (env->tl >= env->maxtl) {
+        cpu_abort(env, "Trap 0x%04x while trap level (%d) >= MAXTL (%d),"
+                  " Error state", env->exception_index, env->tl, env->maxtl);
         return;
     }
 #endif
-    env->tstate[env->tl] = ((uint64_t)GET_CCR(env) << 32) | ((env->asi & 0xff) << 24) |
-        ((env->pstate & 0xf3f) << 8) | GET_CWP64(env);
-    env->tpc[env->tl] = env->pc;
-    env->tnpc[env->tl] = env->npc;
-    env->tt[env->tl] = intno;
-    change_pstate(PS_PEF | PS_PRIV | PS_AG);
-
-    if (intno == TT_CLRWIN)
-        set_cwp((env->cwp - 1) & (NWINDOWS - 1));
-    else if ((intno & 0x1c0) == TT_SPILL)
-        set_cwp((env->cwp - env->cansave - 2) & (NWINDOWS - 1));
-    else if ((intno & 0x1c0) == TT_FILL)
-        set_cwp((env->cwp + 1) & (NWINDOWS - 1));
-    env->tbr &= ~0x7fffULL;
-    env->tbr |= ((env->tl > 1) ? 1 << 14 : 0) | (intno << 5);
-    if (env->tl < MAXTL - 1) {
+    if (env->tl < env->maxtl - 1) {
         env->tl++;
     } else {
         env->pstate |= PS_RED;
-        if (env->tl != MAXTL)
+        if (env->tl < env->maxtl)
             env->tl++;
     }
+    env->tsptr = &env->ts[env->tl & MAXTL_MASK];
+    env->tsptr->tstate = ((uint64_t)GET_CCR(env) << 32) |
+        ((env->asi & 0xff) << 24) | ((env->pstate & 0xf3f) << 8) |
+        GET_CWP64(env);
+    env->tsptr->tpc = env->pc;
+    env->tsptr->tnpc = env->npc;
+    env->tsptr->tt = intno;
+    if (!(env->def->features & CPU_FEATURE_GL)) {
+        switch (intno) {
+        case TT_IVEC:
+            change_pstate(PS_PEF | PS_PRIV | PS_IG);
+            break;
+        case TT_TFAULT:
+        case TT_TMISS:
+        case TT_DFAULT:
+        case TT_DMISS:
+        case TT_DPROT:
+            change_pstate(PS_PEF | PS_PRIV | PS_MG);
+            break;
+        default:
+            change_pstate(PS_PEF | PS_PRIV | PS_AG);
+            break;
+        }
+    }
+    if (intno == TT_CLRWIN)
+        cpu_set_cwp(env, cpu_cwp_dec(env, env->cwp - 1));
+    else if ((intno & 0x1c0) == TT_SPILL)
+        cpu_set_cwp(env, cpu_cwp_dec(env, env->cwp - env->cansave - 2));
+    else if ((intno & 0x1c0) == TT_FILL)
+        cpu_set_cwp(env, cpu_cwp_inc(env, env->cwp + 1));
+    env->tbr &= ~0x7fffULL;
+    env->tbr |= ((env->tl > 1) ? 1 << 14 : 0) | (intno << 5);
     env->pc = env->tbr;
     env->npc = env->pc + 4;
     env->exception_index = 0;
 }
 #else
-void do_interrupt(int intno)
+#ifdef DEBUG_PCALL
+static const char * const excp_names[0x80] = {
+    [TT_TFAULT] = "Instruction Access Fault",
+    [TT_ILL_INSN] = "Illegal Instruction",
+    [TT_PRIV_INSN] = "Privileged Instruction",
+    [TT_NFPU_INSN] = "FPU Disabled",
+    [TT_WIN_OVF] = "Window Overflow",
+    [TT_WIN_UNF] = "Window Underflow",
+    [TT_UNALIGNED] = "Unaligned Memory Access",
+    [TT_FP_EXCP] = "FPU Exception",
+    [TT_DFAULT] = "Data Access Fault",
+    [TT_TOVF] = "Tag Overflow",
+    [TT_EXTINT | 0x1] = "External Interrupt 1",
+    [TT_EXTINT | 0x2] = "External Interrupt 2",
+    [TT_EXTINT | 0x3] = "External Interrupt 3",
+    [TT_EXTINT | 0x4] = "External Interrupt 4",
+    [TT_EXTINT | 0x5] = "External Interrupt 5",
+    [TT_EXTINT | 0x6] = "External Interrupt 6",
+    [TT_EXTINT | 0x7] = "External Interrupt 7",
+    [TT_EXTINT | 0x8] = "External Interrupt 8",
+    [TT_EXTINT | 0x9] = "External Interrupt 9",
+    [TT_EXTINT | 0xa] = "External Interrupt 10",
+    [TT_EXTINT | 0xb] = "External Interrupt 11",
+    [TT_EXTINT | 0xc] = "External Interrupt 12",
+    [TT_EXTINT | 0xd] = "External Interrupt 13",
+    [TT_EXTINT | 0xe] = "External Interrupt 14",
+    [TT_EXTINT | 0xf] = "External Interrupt 15",
+    [TT_TOVF] = "Tag Overflow",
+    [TT_CODE_ACCESS] = "Instruction Access Error",
+    [TT_DATA_ACCESS] = "Data Access Error",
+    [TT_DIV_ZERO] = "Division By Zero",
+    [TT_NCP_INSN] = "Coprocessor Disabled",
+};
+#endif
+
+void do_interrupt(CPUState *env)
 {
-    int cwp;
+    int cwp, intno = env->exception_index;
 
 #ifdef DEBUG_PCALL
     if (loglevel & CPU_LOG_INT) {
         static int count;
-        fprintf(logfile, "%6d: v=%02x pc=%08x npc=%08x SP=%08x\n",
-                count, intno,
+        const char *name;
+
+        if (intno < 0 || intno >= 0x100)
+            name = "Unknown";
+        else if (intno >= 0x80)
+            name = "Trap Instruction";
+        else {
+            name = excp_names[intno];
+            if (!name)
+                name = "Unknown";
+        }
+
+        fprintf(logfile, "%6d: %s (v=%02x) pc=%08x npc=%08x SP=%08x\n",
+                count, name, intno,
                 env->pc,
                 env->npc, env->regwptr[6]);
         cpu_dump_state(env, logfile, fprintf, 0);
@@ -1711,13 +2905,14 @@ void do_interrupt(int intno)
 #endif
 #if !defined(CONFIG_USER_ONLY)
     if (env->psret == 0) {
-        cpu_abort(env, "Trap 0x%02x while interrupts disabled, Error state", env->exception_index);
+        cpu_abort(env, "Trap 0x%02x while interrupts disabled, Error state",
+                  env->exception_index);
         return;
     }
 #endif
     env->psret = 0;
-    cwp = (env->cwp - 1) & (NWINDOWS - 1);
-    set_cwp(cwp);
+    cwp = cpu_cwp_dec(env, env->cwp - 1);
+    cpu_set_cwp(env, cwp);
     env->regwptr[9] = env->pc;
     env->regwptr[10] = env->npc;
     env->psrps = env->psrs;
@@ -1736,11 +2931,6 @@ static void do_unaligned_access(target_ulong addr, int is_write, int is_user,
 
 #define MMUSUFFIX _mmu
 #define ALIGNED_ONLY
-#ifdef __s390__
-# define GETPC() ((void*)((unsigned long)__builtin_return_address(0) & 0x7fffffffUL))
-#else
-# define GETPC() (__builtin_return_address(0))
-#endif
 
 #define SHIFT 0
 #include "softmmu_template.h"
@@ -1754,12 +2944,32 @@ static void do_unaligned_access(target_ulong addr, int is_write, int is_user,
 #define SHIFT 3
 #include "softmmu_template.h"
 
+/* XXX: make it generic ? */
+static void cpu_restore_state2(void *retaddr)
+{
+    TranslationBlock *tb;
+    unsigned long pc;
+
+    if (retaddr) {
+        /* now we have a real cpu fault */
+        pc = (unsigned long)retaddr;
+        tb = tb_find_pc(pc);
+        if (tb) {
+            /* the PC is inside the translated code. It means that we have
+               a virtual CPU fault */
+            cpu_restore_state(tb, env, pc, (void *)(long)env->cond);
+        }
+    }
+}
+
 static void do_unaligned_access(target_ulong addr, int is_write, int is_user,
                                 void *retaddr)
 {
 #ifdef DEBUG_UNALIGNED
-    printf("Unaligned access to 0x%x from 0x%x\n", addr, env->pc);
+    printf("Unaligned access to 0x" TARGET_FMT_lx " from 0x" TARGET_FMT_lx
+           "\n", addr, env->pc);
 #endif
+    cpu_restore_state2(retaddr);
     raise_exception(TT_UNALIGNED);
 }
 
@@ -1769,9 +2979,7 @@ static void do_unaligned_access(target_ulong addr, int is_write, int is_user,
 /* XXX: fix it to restore all registers */
 void tlb_fill(target_ulong addr, int is_write, int mmu_idx, void *retaddr)
 {
-    TranslationBlock *tb;
     int ret;
-    unsigned long pc;
     CPUState *saved_env;
 
     /* XXX: hack to restore env in all cases, even if not called from
@@ -1781,16 +2989,7 @@ void tlb_fill(target_ulong addr, int is_write, int mmu_idx, void *retaddr)
 
     ret = cpu_sparc_handle_mmu_fault(env, addr, is_write, mmu_idx, 1);
     if (ret) {
-        if (retaddr) {
-            /* now we have a real cpu fault */
-            pc = (unsigned long)retaddr;
-            tb = tb_find_pc(pc);
-            if (tb) {
-                /* the PC is inside the translated code. It means that we have
-                   a virtual CPU fault */
-                cpu_restore_state(tb, env, pc, (void *)T2);
-            }
-        }
+        cpu_restore_state2(retaddr);
         cpu_loop_exit();
     }
     env = saved_env;
@@ -1808,6 +3007,17 @@ void do_unassigned_access(target_phys_addr_t addr, int is_write, int is_exec,
        generated code */
     saved_env = env;
     env = cpu_single_env;
+#ifdef DEBUG_UNASSIGNED
+    if (is_asi)
+        printf("Unassigned mem %s access to " TARGET_FMT_plx
+               " asi 0x%02x from " TARGET_FMT_lx "\n",
+               is_exec ? "exec" : is_write ? "write" : "read", addr, is_asi,
+               env->pc);
+    else
+        printf("Unassigned mem %s access to " TARGET_FMT_plx " from "
+               TARGET_FMT_lx "\n",
+               is_exec ? "exec" : is_write ? "write" : "read", addr, env->pc);
+#endif
     if (env->mmuregs[3]) /* Fault status register */
         env->mmuregs[3] = 1; /* overflow (not read before another fault) */
     if (is_asi)
@@ -1821,10 +3031,6 @@ void do_unassigned_access(target_phys_addr_t addr, int is_write, int is_exec,
     env->mmuregs[3] |= (5 << 2) | 2;
     env->mmuregs[4] = addr; /* Fault address register */
     if ((env->mmuregs[0] & MMU_E) && !(env->mmuregs[0] & MMU_NF)) {
-#ifdef DEBUG_UNASSIGNED
-        printf("Unassigned mem access to " TARGET_FMT_plx " from " TARGET_FMT_lx
-               "\n", addr, env->pc);
-#endif
         if (is_exec)
             raise_exception(TT_CODE_ACCESS);
         else
@@ -1843,8 +3049,8 @@ void do_unassigned_access(target_phys_addr_t addr, int is_write, int is_exec,
        generated code */
     saved_env = env;
     env = cpu_single_env;
-    printf("Unassigned mem access to " TARGET_FMT_plx " from " TARGET_FMT_lx "\n",
-           addr, env->pc);
+    printf("Unassigned mem access to " TARGET_FMT_plx " from " TARGET_FMT_lx
+           "\n", addr, env->pc);
     env = saved_env;
 #endif
     if (is_exec)