Fix typo
[qemu] / target-mips / translate.c
index a3bbd74..6f5bea4 100644 (file)
@@ -38,7 +38,6 @@
 
 //#define MIPS_DEBUG_DISAS
 //#define MIPS_DEBUG_SIGN_EXTENSIONS
-//#define MIPS_SINGLE_STEP
 
 /* MIPS major opcodes */
 #define MASK_OP_MAJOR(op)  (op & (0x3F << 26))
@@ -58,10 +57,12 @@ enum {
     OPC_ADDIU    = (0x09 << 26),
     OPC_SLTI     = (0x0A << 26),
     OPC_SLTIU    = (0x0B << 26),
+    /* logic with immediate */
     OPC_ANDI     = (0x0C << 26),
     OPC_ORI      = (0x0D << 26),
     OPC_XORI     = (0x0E << 26),
     OPC_LUI      = (0x0F << 26),
+    /* arithmetic with immediate */
     OPC_DADDI    = (0x18 << 26),
     OPC_DADDIU   = (0x19 << 26),
     /* Jump and branches */
@@ -429,10 +430,8 @@ enum {
 static TCGv_ptr cpu_env;
 static TCGv cpu_gpr[32], cpu_PC;
 static TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC], cpu_ACX[MIPS_DSP_ACC];
-static TCGv cpu_dspctrl, btarget;
-static TCGv_i32 bcond;
-static TCGv_i32 fpu_fpr32[32], fpu_fpr32h[32];
-static TCGv_i64 fpu_fpr64[32];
+static TCGv cpu_dspctrl, btarget, bcond;
+static TCGv_i32 hflags;
 static TCGv_i32 fpu_fcr0, fpu_fcr31;
 
 #include "gen-icount.h"
@@ -501,26 +500,14 @@ static const char *fregnames[] =
       "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
       "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", };
 
-static const char *fregnames_64[] =
-    { "F0",  "F1",  "F2",  "F3",  "F4",  "F5",  "F6",  "F7",
-      "F8",  "F9",  "F10", "F11", "F12", "F13", "F14", "F15",
-      "F16", "F17", "F18", "F19", "F20", "F21", "F22", "F23",
-      "F24", "F25", "F26", "F27", "F28", "F29", "F30", "F31", };
-
-static const char *fregnames_h[] =
-    { "h0",  "h1",  "h2",  "h3",  "h4",  "h5",  "h6",  "h7",
-      "h8",  "h9",  "h10", "h11", "h12", "h13", "h14", "h15",
-      "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
-      "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31", };
-
 #ifdef MIPS_DEBUG_DISAS
-#define MIPS_DEBUG(fmt, args...)                         \
+#define MIPS_DEBUG(fmt, ...)                         \
         qemu_log_mask(CPU_LOG_TB_IN_ASM,                \
                        TARGET_FMT_lx ": %08x " fmt "\n", \
-                       ctx->pc, ctx->opcode , ##args)
+                       ctx->pc, ctx->opcode , ## __VA_ARGS__)
 #define LOG_DISAS(...) qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__)
 #else
-#define MIPS_DEBUG(fmt, args...) do { } while(0)
+#define MIPS_DEBUG(fmt, ...) do { } while(0)
 #define LOG_DISAS(...) do { } while (0)
 #endif
 
@@ -559,104 +546,110 @@ static inline void gen_store_ACX (TCGv t, int reg)
 /* Moves to/from shadow registers. */
 static inline void gen_load_srsgpr (int from, int to)
 {
-    TCGv r_tmp1 = tcg_temp_new();
+    TCGv t0 = tcg_temp_new();
 
     if (from == 0)
-        tcg_gen_movi_tl(r_tmp1, 0);
+        tcg_gen_movi_tl(t0, 0);
     else {
-        TCGv_i32 r_tmp2 = tcg_temp_new_i32();
+        TCGv_i32 t2 = tcg_temp_new_i32();
         TCGv_ptr addr = tcg_temp_new_ptr();
 
-        tcg_gen_ld_i32(r_tmp2, cpu_env, offsetof(CPUState, CP0_SRSCtl));
-        tcg_gen_shri_i32(r_tmp2, r_tmp2, CP0SRSCtl_PSS);
-        tcg_gen_andi_i32(r_tmp2, r_tmp2, 0xf);
-        tcg_gen_muli_i32(r_tmp2, r_tmp2, sizeof(target_ulong) * 32);
-        tcg_gen_ext_i32_ptr(addr, r_tmp2);
+        tcg_gen_ld_i32(t2, cpu_env, offsetof(CPUState, CP0_SRSCtl));
+        tcg_gen_shri_i32(t2, t2, CP0SRSCtl_PSS);
+        tcg_gen_andi_i32(t2, t2, 0xf);
+        tcg_gen_muli_i32(t2, t2, sizeof(target_ulong) * 32);
+        tcg_gen_ext_i32_ptr(addr, t2);
         tcg_gen_add_ptr(addr, cpu_env, addr);
 
-        tcg_gen_ld_tl(r_tmp1, addr, sizeof(target_ulong) * from);
+        tcg_gen_ld_tl(t0, addr, sizeof(target_ulong) * from);
         tcg_temp_free_ptr(addr);
-        tcg_temp_free_i32(r_tmp2);
+        tcg_temp_free_i32(t2);
     }
-    gen_store_gpr(r_tmp1, to);
-    tcg_temp_free(r_tmp1);
+    gen_store_gpr(t0, to);
+    tcg_temp_free(t0);
 }
 
 static inline void gen_store_srsgpr (int from, int to)
 {
     if (to != 0) {
-        TCGv r_tmp1 = tcg_temp_new();
-        TCGv_i32 r_tmp2 = tcg_temp_new_i32();
+        TCGv t0 = tcg_temp_new();
+        TCGv_i32 t2 = tcg_temp_new_i32();
         TCGv_ptr addr = tcg_temp_new_ptr();
 
-        gen_load_gpr(r_tmp1, from);
-        tcg_gen_ld_i32(r_tmp2, cpu_env, offsetof(CPUState, CP0_SRSCtl));
-        tcg_gen_shri_i32(r_tmp2, r_tmp2, CP0SRSCtl_PSS);
-        tcg_gen_andi_i32(r_tmp2, r_tmp2, 0xf);
-        tcg_gen_muli_i32(r_tmp2, r_tmp2, sizeof(target_ulong) * 32);
-        tcg_gen_ext_i32_ptr(addr, r_tmp2);
+        gen_load_gpr(t0, from);
+        tcg_gen_ld_i32(t2, cpu_env, offsetof(CPUState, CP0_SRSCtl));
+        tcg_gen_shri_i32(t2, t2, CP0SRSCtl_PSS);
+        tcg_gen_andi_i32(t2, t2, 0xf);
+        tcg_gen_muli_i32(t2, t2, sizeof(target_ulong) * 32);
+        tcg_gen_ext_i32_ptr(addr, t2);
         tcg_gen_add_ptr(addr, cpu_env, addr);
 
-        tcg_gen_st_tl(r_tmp1, addr, sizeof(target_ulong) * to);
+        tcg_gen_st_tl(t0, addr, sizeof(target_ulong) * to);
         tcg_temp_free_ptr(addr);
-        tcg_temp_free_i32(r_tmp2);
-        tcg_temp_free(r_tmp1);
+        tcg_temp_free_i32(t2);
+        tcg_temp_free(t0);
     }
 }
 
 /* Floating point register moves. */
 static inline void gen_load_fpr32 (TCGv_i32 t, int reg)
 {
-    tcg_gen_mov_i32(t, fpu_fpr32[reg]);
+    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
 }
 
 static inline void gen_store_fpr32 (TCGv_i32 t, int reg)
 {
-    tcg_gen_mov_i32(fpu_fpr32[reg], t);
+    tcg_gen_st_i32(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
 }
 
-static inline void gen_load_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
+static inline void gen_load_fpr32h (TCGv_i32 t, int reg)
 {
-    if (ctx->hflags & MIPS_HFLAG_F64)
-        tcg_gen_mov_i64(t, fpu_fpr64[reg]);
-    else {
-        tcg_gen_concat_i32_i64(t, fpu_fpr32[reg & ~1], fpu_fpr32[reg | 1]);
-    }
+    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
 }
 
-static inline void gen_store_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
+static inline void gen_store_fpr32h (TCGv_i32 t, int reg)
 {
-    if (ctx->hflags & MIPS_HFLAG_F64)
-        tcg_gen_mov_i64(fpu_fpr64[reg], t);
-    else {
-        tcg_gen_trunc_i64_i32(fpu_fpr32[reg & ~1], t);
-        tcg_gen_shri_i64(t, t, 32);
-        tcg_gen_trunc_i64_i32(fpu_fpr32[reg | 1], t);
-    }
+    tcg_gen_st_i32(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
 }
 
-static inline void gen_load_fpr32h (TCGv_i32 t, int reg)
+static inline void gen_load_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
 {
-    tcg_gen_mov_i32(t, fpu_fpr32h[reg]);
+    if (ctx->hflags & MIPS_HFLAG_F64) {
+        tcg_gen_ld_i64(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].d));
+    } else {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        TCGv_i32 t1 = tcg_temp_new_i32();
+        gen_load_fpr32(t0, reg & ~1);
+        gen_load_fpr32(t1, reg | 1);
+        tcg_gen_concat_i32_i64(t, t0, t1);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+    }
 }
 
-static inline void gen_store_fpr32h (TCGv_i32 t, int reg)
+static inline void gen_store_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
 {
-    tcg_gen_mov_i32(fpu_fpr32h[reg], t);
+    if (ctx->hflags & MIPS_HFLAG_F64) {
+        tcg_gen_st_i64(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].d));
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i32 t1 = tcg_temp_new_i32();
+        tcg_gen_trunc_i64_i32(t1, t);
+        gen_store_fpr32(t1, reg & ~1);
+        tcg_gen_shri_i64(t0, t, 32);
+        tcg_gen_trunc_i64_i32(t1, t0);
+        gen_store_fpr32(t1, reg | 1);
+        tcg_temp_free_i32(t1);
+        tcg_temp_free_i64(t0);
+    }
 }
 
-static inline void get_fp_cond (TCGv_i32 t)
+static inline int get_fp_bit (int cc)
 {
-    TCGv_i32 r_tmp1 = tcg_temp_new_i32();
-    TCGv_i32 r_tmp2 = tcg_temp_new_i32();
-
-    tcg_gen_shri_i32(r_tmp2, fpu_fcr31, 24);
-    tcg_gen_andi_i32(r_tmp2, r_tmp2, 0xfe);
-    tcg_gen_shri_i32(r_tmp1, fpu_fcr31, 23);
-    tcg_gen_andi_i32(r_tmp1, r_tmp1, 0x1);
-    tcg_gen_or_i32(t, r_tmp1, r_tmp2);
-    tcg_temp_free_i32(r_tmp1);
-    tcg_temp_free_i32(r_tmp2);
+    if (cc)
+        return 24 + cc;
+    else
+        return 23;
 }
 
 #define FOP_CONDS(type, fmt, bits)                                            \
@@ -693,18 +686,18 @@ FOP_CONDS(abs, ps, 64)
 #undef FOP_CONDS
 
 /* Tests */
-#define OP_COND(name, cond)                                   \
-static inline void glue(gen_op_, name) (TCGv t0, TCGv t1)     \
-{                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcond_tl(cond, t0, t1, l1);                      \
-    tcg_gen_movi_tl(t0, 0);                                   \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t0, 1);                                   \
-    gen_set_label(l2);                                        \
+#define OP_COND(name, cond)                                         \
+static inline void glue(gen_op_, name) (TCGv ret, TCGv t0, TCGv t1) \
+{                                                                   \
+    int l1 = gen_new_label();                                       \
+    int l2 = gen_new_label();                                       \
+                                                                    \
+    tcg_gen_brcond_tl(cond, t0, t1, l1);                            \
+    tcg_gen_movi_tl(ret, 0);                                        \
+    tcg_gen_br(l2);                                                 \
+    gen_set_label(l1);                                              \
+    tcg_gen_movi_tl(ret, 1);                                        \
+    gen_set_label(l2);                                              \
 }
 OP_COND(eq, TCG_COND_EQ);
 OP_COND(ne, TCG_COND_NE);
@@ -714,34 +707,34 @@ OP_COND(lt, TCG_COND_LT);
 OP_COND(ltu, TCG_COND_LTU);
 #undef OP_COND
 
-#define OP_CONDI(name, cond)                                  \
-static inline void glue(gen_op_, name) (TCGv t, target_ulong val) \
-{                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcondi_tl(cond, t, val, l1);                     \
-    tcg_gen_movi_tl(t, 0);                                    \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t, 1);                                    \
-    gen_set_label(l2);                                        \
+#define OP_CONDI(name, cond)                                                 \
+static inline void glue(gen_op_, name) (TCGv ret, TCGv t0, target_ulong val) \
+{                                                                            \
+    int l1 = gen_new_label();                                                \
+    int l2 = gen_new_label();                                                \
+                                                                             \
+    tcg_gen_brcondi_tl(cond, t0, val, l1);                                   \
+    tcg_gen_movi_tl(ret, 0);                                                 \
+    tcg_gen_br(l2);                                                          \
+    gen_set_label(l1);                                                       \
+    tcg_gen_movi_tl(ret, 1);                                                 \
+    gen_set_label(l2);                                                       \
 }
 OP_CONDI(lti, TCG_COND_LT);
 OP_CONDI(ltiu, TCG_COND_LTU);
 #undef OP_CONDI
 
 #define OP_CONDZ(name, cond)                                  \
-static inline void glue(gen_op_, name) (TCGv t)               \
+static inline void glue(gen_op_, name) (TCGv ret, TCGv t0)    \
 {                                                             \
     int l1 = gen_new_label();                                 \
     int l2 = gen_new_label();                                 \
                                                               \
-    tcg_gen_brcondi_tl(cond, t, 0, l1);                       \
-    tcg_gen_movi_tl(t, 0);                                    \
+    tcg_gen_brcondi_tl(cond, t0, 0, l1);                      \
+    tcg_gen_movi_tl(ret, 0);                                  \
     tcg_gen_br(l2);                                           \
     gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t, 1);                                    \
+    tcg_gen_movi_tl(ret, 1);                                  \
     gen_set_label(l2);                                        \
 }
 OP_CONDZ(gez, TCG_COND_GE);
@@ -763,11 +756,7 @@ static inline void save_cpu_state (DisasContext *ctx, int do_save_pc)
         ctx->saved_pc = ctx->pc;
     }
     if (ctx->hflags != ctx->saved_hflags) {
-        TCGv_i32 r_tmp = tcg_temp_new_i32();
-
-        tcg_gen_movi_i32(r_tmp, ctx->hflags);
-        tcg_gen_st_i32(r_tmp, cpu_env, offsetof(CPUState, hflags));
-        tcg_temp_free_i32(r_tmp);
+        tcg_gen_movi_i32(hflags, ctx->hflags);
         ctx->saved_hflags = ctx->hflags;
         switch (ctx->hflags & MIPS_HFLAG_BMASK) {
         case MIPS_HFLAG_BR:
@@ -804,8 +793,6 @@ generate_exception_err (DisasContext *ctx, int excp, int err)
     gen_helper_raise_exception_err(texcp, terr);
     tcg_temp_free_i32(terr);
     tcg_temp_free_i32(texcp);
-    gen_helper_interrupt_restart();
-    tcg_gen_exit_tb(0);
 }
 
 static inline void
@@ -813,8 +800,6 @@ generate_exception (DisasContext *ctx, int excp)
 {
     save_cpu_state(ctx, 1);
     gen_helper_0i(raise_exception, excp);
-    gen_helper_interrupt_restart();
-    tcg_gen_exit_tb(0);
 }
 
 /* Addresses computation */
@@ -898,10 +883,10 @@ static inline void check_mips_64(DisasContext *ctx)
 }
 
 /* load/store instructions. */
-#define OP_LD(insn,fname)                                        \
-static inline void op_ldst_##insn(TCGv t0, DisasContext *ctx)    \
-{                                                                \
-    tcg_gen_qemu_##fname(t0, t0, ctx->mem_idx);                  \
+#define OP_LD(insn,fname)                                                 \
+static inline void op_ldst_##insn(TCGv ret, TCGv arg1, DisasContext *ctx) \
+{                                                                         \
+    tcg_gen_qemu_##fname(ret, arg1, ctx->mem_idx);                        \
 }
 OP_LD(lb,ld8s);
 OP_LD(lbu,ld8u);
@@ -914,10 +899,10 @@ OP_LD(ld,ld64);
 #endif
 #undef OP_LD
 
-#define OP_ST(insn,fname)                                        \
-static inline void op_ldst_##insn(TCGv t0, TCGv t1, DisasContext *ctx) \
-{                                                                \
-    tcg_gen_qemu_##fname(t1, t0, ctx->mem_idx);                  \
+#define OP_ST(insn,fname)                                                  \
+static inline void op_ldst_##insn(TCGv arg1, TCGv arg2, DisasContext *ctx) \
+{                                                                          \
+    tcg_gen_qemu_##fname(arg1, arg2, ctx->mem_idx);                        \
 }
 OP_ST(sb,st8);
 OP_ST(sh,st16);
@@ -927,12 +912,14 @@ OP_ST(sd,st64);
 #endif
 #undef OP_ST
 
-#define OP_LD_ATOMIC(insn,fname)                                        \
-static inline void op_ldst_##insn(TCGv t0, TCGv t1, DisasContext *ctx)  \
-{                                                                       \
-    tcg_gen_mov_tl(t1, t0);                                             \
-    tcg_gen_qemu_##fname(t0, t0, ctx->mem_idx);                         \
-    tcg_gen_st_tl(t1, cpu_env, offsetof(CPUState, CP0_LLAddr));         \
+#define OP_LD_ATOMIC(insn,fname)                                           \
+static inline void op_ldst_##insn(TCGv ret, TCGv arg1, DisasContext *ctx)  \
+{                                                                          \
+    TCGv t0 = tcg_temp_new();                                              \
+    tcg_gen_mov_tl(t0, arg1);                                              \
+    tcg_gen_qemu_##fname(ret, arg1, ctx->mem_idx);                         \
+    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, CP0_LLAddr));            \
+    tcg_temp_free(t0);                                                     \
 }
 OP_LD_ATOMIC(ll,ld32s);
 #if defined(TARGET_MIPS64)
@@ -940,28 +927,28 @@ OP_LD_ATOMIC(lld,ld64);
 #endif
 #undef OP_LD_ATOMIC
 
-#define OP_ST_ATOMIC(insn,fname,almask)                                 \
-static inline void op_ldst_##insn(TCGv t0, TCGv t1, DisasContext *ctx)  \
-{                                                                       \
-    TCGv r_tmp = tcg_temp_local_new();                       \
-    int l1 = gen_new_label();                                           \
-    int l2 = gen_new_label();                                           \
-    int l3 = gen_new_label();                                           \
-                                                                        \
-    tcg_gen_andi_tl(r_tmp, t0, almask);                                 \
-    tcg_gen_brcondi_tl(TCG_COND_EQ, r_tmp, 0, l1);                      \
-    tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, CP0_BadVAddr));       \
-    generate_exception(ctx, EXCP_AdES);                                 \
-    gen_set_label(l1);                                                  \
-    tcg_gen_ld_tl(r_tmp, cpu_env, offsetof(CPUState, CP0_LLAddr));      \
-    tcg_gen_brcond_tl(TCG_COND_NE, t0, r_tmp, l2);                      \
-    tcg_gen_qemu_##fname(t1, t0, ctx->mem_idx);                         \
-    tcg_gen_movi_tl(t0, 1);                                             \
-    tcg_gen_br(l3);                                                     \
-    gen_set_label(l2);                                                  \
-    tcg_gen_movi_tl(t0, 0);                                             \
-    gen_set_label(l3);                                                  \
-    tcg_temp_free(r_tmp);                                               \
+#define OP_ST_ATOMIC(insn,fname,almask)                                              \
+static inline void op_ldst_##insn(TCGv ret, TCGv arg1, TCGv arg2, DisasContext *ctx) \
+{                                                                                    \
+    TCGv t0 = tcg_temp_new();                                                        \
+    int l1 = gen_new_label();                                                        \
+    int l2 = gen_new_label();                                                        \
+    int l3 = gen_new_label();                                                        \
+                                                                                     \
+    tcg_gen_andi_tl(t0, arg2, almask);                                               \
+    tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);                                      \
+    tcg_gen_st_tl(arg2, cpu_env, offsetof(CPUState, CP0_BadVAddr));                  \
+    generate_exception(ctx, EXCP_AdES);                                              \
+    gen_set_label(l1);                                                               \
+    tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_LLAddr));                      \
+    tcg_gen_brcond_tl(TCG_COND_NE, arg2, t0, l2);                                    \
+    tcg_temp_free(t0);                                                               \
+    tcg_gen_qemu_##fname(arg1, arg2, ctx->mem_idx);                                  \
+    tcg_gen_movi_tl(ret, 1);                                                         \
+    tcg_gen_br(l3);                                                                  \
+    gen_set_label(l2);                                                               \
+    tcg_gen_movi_tl(ret, 0);                                                         \
+    gen_set_label(l3);                                                               \
 }
 OP_ST_ATOMIC(sc,st32,0x3);
 #if defined(TARGET_MIPS64)
@@ -974,8 +961,8 @@ static void gen_ldst (DisasContext *ctx, uint32_t opc, int rt,
                       int base, int16_t offset)
 {
     const char *opn = "ldst";
-    TCGv t0 = tcg_temp_local_new();
-    TCGv t1 = tcg_temp_local_new();
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
 
     if (base == 0) {
         tcg_gen_movi_tl(t0, offset);
@@ -990,146 +977,182 @@ static void gen_ldst (DisasContext *ctx, uint32_t opc, int rt,
     switch (opc) {
 #if defined(TARGET_MIPS64)
     case OPC_LWU:
-        op_ldst_lwu(t0, ctx);
+        save_cpu_state(ctx, 0);
+        op_ldst_lwu(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "lwu";
         break;
     case OPC_LD:
-        op_ldst_ld(t0, ctx);
+        save_cpu_state(ctx, 0);
+        op_ldst_ld(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "ld";
         break;
     case OPC_LLD:
-        op_ldst_lld(t0, t1, ctx);
+        save_cpu_state(ctx, 0);
+        op_ldst_lld(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "lld";
         break;
     case OPC_SD:
+        save_cpu_state(ctx, 0);
         gen_load_gpr(t1, rt);
-        op_ldst_sd(t0, t1, ctx);
+        op_ldst_sd(t1, t0, ctx);
         opn = "sd";
         break;
-    case OPC_SCD:
-        save_cpu_state(ctx, 1);
-        gen_load_gpr(t1, rt);
-        op_ldst_scd(t0, t1, ctx);
-        gen_store_gpr(t0, rt);
-        opn = "scd";
-        break;
     case OPC_LDL:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_3i(ldl, t1, t0, t1, ctx->mem_idx);
+        gen_helper_3i(ldl, t1, t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rt);
         opn = "ldl";
         break;
     case OPC_SDL:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_2i(sdl, t0, t1, ctx->mem_idx);
+        gen_helper_2i(sdl, t1, t0, ctx->mem_idx);
         opn = "sdl";
         break;
     case OPC_LDR:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_3i(ldr, t1, t0, t1, ctx->mem_idx);
+        gen_helper_3i(ldr, t1, t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rt);
         opn = "ldr";
         break;
     case OPC_SDR:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_2i(sdr, t0, t1, ctx->mem_idx);
+        gen_helper_2i(sdr, t1, t0, ctx->mem_idx);
         opn = "sdr";
         break;
 #endif
     case OPC_LW:
-        op_ldst_lw(t0, ctx);
+        save_cpu_state(ctx, 0);
+        op_ldst_lw(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "lw";
         break;
     case OPC_SW:
+        save_cpu_state(ctx, 0);
         gen_load_gpr(t1, rt);
-        op_ldst_sw(t0, t1, ctx);
+        op_ldst_sw(t1, t0, ctx);
         opn = "sw";
         break;
     case OPC_LH:
-        op_ldst_lh(t0, ctx);
+        save_cpu_state(ctx, 0);
+        op_ldst_lh(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "lh";
         break;
     case OPC_SH:
+        save_cpu_state(ctx, 0);
         gen_load_gpr(t1, rt);
-        op_ldst_sh(t0, t1, ctx);
+        op_ldst_sh(t1, t0, ctx);
         opn = "sh";
         break;
     case OPC_LHU:
-        op_ldst_lhu(t0, ctx);
+        save_cpu_state(ctx, 0);
+        op_ldst_lhu(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "lhu";
         break;
     case OPC_LB:
-        op_ldst_lb(t0, ctx);
+        save_cpu_state(ctx, 0);
+        op_ldst_lb(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "lb";
         break;
     case OPC_SB:
+        save_cpu_state(ctx, 0);
         gen_load_gpr(t1, rt);
-        op_ldst_sb(t0, t1, ctx);
+        op_ldst_sb(t1, t0, ctx);
         opn = "sb";
         break;
     case OPC_LBU:
-        op_ldst_lbu(t0, ctx);
+        save_cpu_state(ctx, 0);
+        op_ldst_lbu(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "lbu";
         break;
     case OPC_LWL:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_3i(lwl, t1, t0, t1, ctx->mem_idx);
+        gen_helper_3i(lwl, t1, t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rt);
         opn = "lwl";
         break;
     case OPC_SWL:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_2i(swl, t0, t1, ctx->mem_idx);
+        gen_helper_2i(swl, t1, t0, ctx->mem_idx);
         opn = "swr";
         break;
     case OPC_LWR:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_3i(lwr, t1, t0, t1, ctx->mem_idx);
+        gen_helper_3i(lwr, t1, t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rt);
         opn = "lwr";
         break;
     case OPC_SWR:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_2i(swr, t0, t1, ctx->mem_idx);
+        gen_helper_2i(swr, t1, t0, ctx->mem_idx);
         opn = "swr";
         break;
     case OPC_LL:
-        op_ldst_ll(t0, t1, ctx);
+        save_cpu_state(ctx, 0);
+        op_ldst_ll(t0, t0, ctx);
         gen_store_gpr(t0, rt);
         opn = "ll";
         break;
+    }
+    MIPS_DEBUG("%s %s, %d(%s)", opn, regnames[rt], offset, regnames[base]);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
+
+/* Store conditional */
+static void gen_st_cond (DisasContext *ctx, uint32_t opc, int rt,
+                         int base, int16_t offset)
+{
+    const char *opn = "st_cond";
+    TCGv t0, t1;
+
+    t0 = tcg_temp_local_new();
+
+    if (base == 0) {
+        tcg_gen_movi_tl(t0, offset);
+    } else if (offset == 0) {
+        gen_load_gpr(t0, base);
+    } else {
+        tcg_gen_movi_tl(t0, offset);
+        gen_op_addr_add(ctx, t0, cpu_gpr[base]);
+    }
+    /* Don't do NOP if destination is zero: we must perform the actual
+       memory access. */
+
+    t1 = tcg_temp_local_new();
+    gen_load_gpr(t1, rt);
+    switch (opc) {
+#if defined(TARGET_MIPS64)
+    case OPC_SCD:
+        save_cpu_state(ctx, 0);
+        op_ldst_scd(t0, t1, t0, ctx);
+        opn = "scd";
+        break;
+#endif
     case OPC_SC:
-        save_cpu_state(ctx, 1);
-        gen_load_gpr(t1, rt);
-        op_ldst_sc(t0, t1, ctx);
-        gen_store_gpr(t0, rt);
+        save_cpu_state(ctx, 0);
+        op_ldst_sc(t0, t1, t0, ctx);
         opn = "sc";
         break;
-    default:
-        MIPS_INVAL(opn);
-        generate_exception(ctx, EXCP_RI);
-        goto out;
     }
     MIPS_DEBUG("%s %s, %d(%s)", opn, regnames[rt], offset, regnames[base]);
- out:
-    tcg_temp_free(t0);
     tcg_temp_free(t1);
+    gen_store_gpr(t0, rt);
+    tcg_temp_free(t0);
 }
 
 /* Load and store */
@@ -1137,7 +1160,7 @@ static void gen_flt_ldst (DisasContext *ctx, uint32_t opc, int ft,
                           int base, int16_t offset)
 {
     const char *opn = "flt_ldst";
-    TCGv t0 = tcg_temp_local_new();
+    TCGv t0 = tcg_temp_new();
 
     if (base == 0) {
         tcg_gen_movi_tl(t0, offset);
@@ -1153,12 +1176,10 @@ static void gen_flt_ldst (DisasContext *ctx, uint32_t opc, int ft,
     case OPC_LWC1:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
-            TCGv t1 = tcg_temp_new();
 
-            tcg_gen_qemu_ld32s(t1, t0, ctx->mem_idx);
-            tcg_gen_trunc_tl_i32(fp0, t1);
+            tcg_gen_qemu_ld32s(t0, t0, ctx->mem_idx);
+            tcg_gen_trunc_tl_i32(fp0, t0);
             gen_store_fpr32(fp0, ft);
-            tcg_temp_free(t1);
             tcg_temp_free_i32(fp0);
         }
         opn = "lwc1";
@@ -1210,140 +1231,184 @@ static void gen_flt_ldst (DisasContext *ctx, uint32_t opc, int ft,
 static void gen_arith_imm (CPUState *env, DisasContext *ctx, uint32_t opc,
                            int rt, int rs, int16_t imm)
 {
-    target_ulong uimm;
+    target_ulong uimm = (target_long)imm; /* Sign extend to 32/64 bits */
     const char *opn = "imm arith";
-    TCGv t0 = tcg_temp_local_new();
 
     if (rt == 0 && opc != OPC_ADDI && opc != OPC_DADDI) {
         /* If no destination, treat it as a NOP.
            For addi, we must generate the overflow exception when needed. */
         MIPS_DEBUG("NOP");
-        goto out;
-    }
-    uimm = (uint16_t)imm;
-    switch (opc) {
-    case OPC_ADDI:
-    case OPC_ADDIU:
-#if defined(TARGET_MIPS64)
-    case OPC_DADDI:
-    case OPC_DADDIU:
-#endif
-    case OPC_SLTI:
-    case OPC_SLTIU:
-        uimm = (target_long)imm; /* Sign extend to 32/64 bits */
-        /* Fall through. */
-    case OPC_ANDI:
-    case OPC_ORI:
-    case OPC_XORI:
-        gen_load_gpr(t0, rs);
-        break;
-    case OPC_LUI:
-        tcg_gen_movi_tl(t0, imm << 16);
-        break;
-    case OPC_SLL:
-    case OPC_SRA:
-    case OPC_SRL:
-#if defined(TARGET_MIPS64)
-    case OPC_DSLL:
-    case OPC_DSRA:
-    case OPC_DSRL:
-    case OPC_DSLL32:
-    case OPC_DSRA32:
-    case OPC_DSRL32:
-#endif
-        uimm &= 0x1f;
-        gen_load_gpr(t0, rs);
-        break;
+        return;
     }
     switch (opc) {
     case OPC_ADDI:
         {
-            TCGv r_tmp1 = tcg_temp_new();
-            TCGv r_tmp2 = tcg_temp_new();
+            TCGv t0 = tcg_temp_local_new();
+            TCGv t1 = tcg_temp_new();
+            TCGv t2 = tcg_temp_new();
             int l1 = gen_new_label();
 
-            save_cpu_state(ctx, 1);
-            tcg_gen_ext32s_tl(r_tmp1, t0);
-            tcg_gen_addi_tl(t0, r_tmp1, uimm);
-
-            tcg_gen_xori_tl(r_tmp1, r_tmp1, ~uimm);
-            tcg_gen_xori_tl(r_tmp2, t0, uimm);
-            tcg_gen_and_tl(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free(r_tmp2);
-            tcg_gen_brcondi_tl(TCG_COND_GE, r_tmp1, 0, l1);
+            gen_load_gpr(t1, rs);
+            tcg_gen_addi_tl(t0, t1, uimm);
+            tcg_gen_ext32s_tl(t0, t0);
+
+            tcg_gen_xori_tl(t1, t1, ~uimm);
+            tcg_gen_xori_tl(t2, t0, uimm);
+            tcg_gen_and_tl(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
+            tcg_temp_free(t1);
             /* operands of same sign, result different sign */
             generate_exception(ctx, EXCP_OVERFLOW);
             gen_set_label(l1);
-            tcg_temp_free(r_tmp1);
-
             tcg_gen_ext32s_tl(t0, t0);
+            gen_store_gpr(t0, rt);
+            tcg_temp_free(t0);
         }
         opn = "addi";
         break;
     case OPC_ADDIU:
-        tcg_gen_addi_tl(t0, t0, uimm);
-        tcg_gen_ext32s_tl(t0, t0);
+        if (rs != 0) {
+            tcg_gen_addi_tl(cpu_gpr[rt], cpu_gpr[rs], uimm);
+            tcg_gen_ext32s_tl(cpu_gpr[rt], cpu_gpr[rt]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rt], uimm);
+        }
         opn = "addiu";
         break;
 #if defined(TARGET_MIPS64)
     case OPC_DADDI:
         {
-            TCGv r_tmp1 = tcg_temp_new();
-            TCGv r_tmp2 = tcg_temp_new();
+            TCGv t0 = tcg_temp_local_new();
+            TCGv t1 = tcg_temp_new();
+            TCGv t2 = tcg_temp_new();
             int l1 = gen_new_label();
 
-            save_cpu_state(ctx, 1);
-            tcg_gen_mov_tl(r_tmp1, t0);
-            tcg_gen_addi_tl(t0, t0, uimm);
-
-            tcg_gen_xori_tl(r_tmp1, r_tmp1, ~uimm);
-            tcg_gen_xori_tl(r_tmp2, t0, uimm);
-            tcg_gen_and_tl(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free(r_tmp2);
-            tcg_gen_brcondi_tl(TCG_COND_GE, r_tmp1, 0, l1);
+            gen_load_gpr(t1, rs);
+            tcg_gen_addi_tl(t0, t1, uimm);
+
+            tcg_gen_xori_tl(t1, t1, ~uimm);
+            tcg_gen_xori_tl(t2, t0, uimm);
+            tcg_gen_and_tl(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
+            tcg_temp_free(t1);
             /* operands of same sign, result different sign */
             generate_exception(ctx, EXCP_OVERFLOW);
             gen_set_label(l1);
-            tcg_temp_free(r_tmp1);
+            gen_store_gpr(t0, rt);
+            tcg_temp_free(t0);
         }
         opn = "daddi";
         break;
     case OPC_DADDIU:
-        tcg_gen_addi_tl(t0, t0, uimm);
+        if (rs != 0) {
+            tcg_gen_addi_tl(cpu_gpr[rt], cpu_gpr[rs], uimm);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rt], uimm);
+        }
         opn = "daddiu";
         break;
 #endif
-    case OPC_SLTI:
-        gen_op_lti(t0, uimm);
-        opn = "slti";
-        break;
-    case OPC_SLTIU:
-        gen_op_ltiu(t0, uimm);
-        opn = "sltiu";
-        break;
+    }
+    MIPS_DEBUG("%s %s, %s, " TARGET_FMT_lx, opn, regnames[rt], regnames[rs], uimm);
+}
+
+/* Logic with immediate operand */
+static void gen_logic_imm (CPUState *env, uint32_t opc, int rt, int rs, int16_t imm)
+{
+    target_ulong uimm;
+    const char *opn = "imm logic";
+
+    if (rt == 0) {
+        /* If no destination, treat it as a NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+    uimm = (uint16_t)imm;
+    switch (opc) {
     case OPC_ANDI:
-        tcg_gen_andi_tl(t0, t0, uimm);
+        if (likely(rs != 0))
+            tcg_gen_andi_tl(cpu_gpr[rt], cpu_gpr[rs], uimm);
+        else
+            tcg_gen_movi_tl(cpu_gpr[rt], 0);
         opn = "andi";
         break;
     case OPC_ORI:
-        tcg_gen_ori_tl(t0, t0, uimm);
+        if (rs != 0)
+            tcg_gen_ori_tl(cpu_gpr[rt], cpu_gpr[rs], uimm);
+        else
+            tcg_gen_movi_tl(cpu_gpr[rt], uimm);
         opn = "ori";
         break;
     case OPC_XORI:
-        tcg_gen_xori_tl(t0, t0, uimm);
+        if (likely(rs != 0))
+            tcg_gen_xori_tl(cpu_gpr[rt], cpu_gpr[rs], uimm);
+        else
+            tcg_gen_movi_tl(cpu_gpr[rt], uimm);
         opn = "xori";
         break;
     case OPC_LUI:
+        tcg_gen_movi_tl(cpu_gpr[rt], imm << 16);
         opn = "lui";
         break;
+    }
+    MIPS_DEBUG("%s %s, %s, " TARGET_FMT_lx, opn, regnames[rt], regnames[rs], uimm);
+}
+
+/* Set on less than with immediate operand */
+static void gen_slt_imm (CPUState *env, uint32_t opc, int rt, int rs, int16_t imm)
+{
+    target_ulong uimm = (target_long)imm; /* Sign extend to 32/64 bits */
+    const char *opn = "imm arith";
+    TCGv t0;
+
+    if (rt == 0) {
+        /* If no destination, treat it as a NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+    t0 = tcg_temp_new();
+    gen_load_gpr(t0, rs);
+    switch (opc) {
+    case OPC_SLTI:
+        gen_op_lti(cpu_gpr[rt], t0, uimm);
+        opn = "slti";
+        break;
+    case OPC_SLTIU:
+        gen_op_ltiu(cpu_gpr[rt], t0, uimm);
+        opn = "sltiu";
+        break;
+    }
+    MIPS_DEBUG("%s %s, %s, " TARGET_FMT_lx, opn, regnames[rt], regnames[rs], uimm);
+    tcg_temp_free(t0);
+}
+
+/* Shifts with immediate operand */
+static void gen_shift_imm(CPUState *env, DisasContext *ctx, uint32_t opc,
+                          int rt, int rs, int16_t imm)
+{
+    target_ulong uimm = ((uint16_t)imm) & 0x1f;
+    const char *opn = "imm shift";
+    TCGv t0;
+
+    if (rt == 0) {
+        /* If no destination, treat it as a NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    gen_load_gpr(t0, rs);
+    switch (opc) {
     case OPC_SLL:
         tcg_gen_shli_tl(t0, t0, uimm);
-        tcg_gen_ext32s_tl(t0, t0);
+        tcg_gen_ext32s_tl(cpu_gpr[rt], t0);
         opn = "sll";
         break;
     case OPC_SRA:
         tcg_gen_ext32s_tl(t0, t0);
-        tcg_gen_sari_tl(t0, t0, uimm);
+        tcg_gen_sari_tl(cpu_gpr[rt], t0, uimm);
         opn = "sra";
         break;
     case OPC_SRL:
@@ -1351,9 +1416,9 @@ static void gen_arith_imm (CPUState *env, DisasContext *ctx, uint32_t opc,
         case 0:
             if (uimm != 0) {
                 tcg_gen_ext32u_tl(t0, t0);
-                tcg_gen_shri_tl(t0, t0, uimm);
+                tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm);
             } else {
-                tcg_gen_ext32s_tl(t0, t0);
+                tcg_gen_ext32s_tl(cpu_gpr[rt], t0);
             }
             opn = "srl";
             break;
@@ -1361,20 +1426,20 @@ static void gen_arith_imm (CPUState *env, DisasContext *ctx, uint32_t opc,
             /* rotr is decoded as srl on non-R2 CPUs */
             if (env->insn_flags & ISA_MIPS32R2) {
                 if (uimm != 0) {
-                    TCGv_i32 r_tmp1 = tcg_temp_new_i32();
+                    TCGv_i32 t1 = tcg_temp_new_i32();
 
-                    tcg_gen_trunc_tl_i32(r_tmp1, t0);
-                    tcg_gen_rotri_i32(r_tmp1, r_tmp1, uimm);
-                    tcg_gen_ext_i32_tl(t0, r_tmp1);
-                    tcg_temp_free_i32(r_tmp1);
+                    tcg_gen_trunc_tl_i32(t1, t0);
+                    tcg_gen_rotri_i32(t1, t1, uimm);
+                    tcg_gen_ext_i32_tl(cpu_gpr[rt], t1);
+                    tcg_temp_free_i32(t1);
                 }
                 opn = "rotr";
             } else {
                 if (uimm != 0) {
                     tcg_gen_ext32u_tl(t0, t0);
-                    tcg_gen_shri_tl(t0, t0, uimm);
+                    tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm);
                 } else {
-                    tcg_gen_ext32s_tl(t0, t0);
+                    tcg_gen_ext32s_tl(cpu_gpr[rt], t0);
                 }
                 opn = "srl";
             }
@@ -1387,28 +1452,28 @@ static void gen_arith_imm (CPUState *env, DisasContext *ctx, uint32_t opc,
         break;
 #if defined(TARGET_MIPS64)
     case OPC_DSLL:
-        tcg_gen_shli_tl(t0, t0, uimm);
+        tcg_gen_shli_tl(cpu_gpr[rt], t0, uimm);
         opn = "dsll";
         break;
     case OPC_DSRA:
-        tcg_gen_sari_tl(t0, t0, uimm);
+        tcg_gen_sari_tl(cpu_gpr[rt], t0, uimm);
         opn = "dsra";
         break;
     case OPC_DSRL:
         switch ((ctx->opcode >> 21) & 0x1f) {
         case 0:
-            tcg_gen_shri_tl(t0, t0, uimm);
+            tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm);
             opn = "dsrl";
             break;
         case 1:
             /* drotr is decoded as dsrl on non-R2 CPUs */
             if (env->insn_flags & ISA_MIPS32R2) {
                 if (uimm != 0) {
-                    tcg_gen_rotri_tl(t0, t0, uimm);
+                    tcg_gen_rotri_tl(cpu_gpr[rt], t0, uimm);
                 }
                 opn = "drotr";
             } else {
-                tcg_gen_shri_tl(t0, t0, uimm);
+                tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm);
                 opn = "dsrl";
             }
             break;
@@ -1419,26 +1484,26 @@ static void gen_arith_imm (CPUState *env, DisasContext *ctx, uint32_t opc,
         }
         break;
     case OPC_DSLL32:
-        tcg_gen_shli_tl(t0, t0, uimm + 32);
+        tcg_gen_shli_tl(cpu_gpr[rt], t0, uimm + 32);
         opn = "dsll32";
         break;
     case OPC_DSRA32:
-        tcg_gen_sari_tl(t0, t0, uimm + 32);
+        tcg_gen_sari_tl(cpu_gpr[rt], t0, uimm + 32);
         opn = "dsra32";
         break;
     case OPC_DSRL32:
         switch ((ctx->opcode >> 21) & 0x1f) {
         case 0:
-            tcg_gen_shri_tl(t0, t0, uimm + 32);
+            tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm + 32);
             opn = "dsrl32";
             break;
         case 1:
             /* drotr32 is decoded as dsrl32 on non-R2 CPUs */
             if (env->insn_flags & ISA_MIPS32R2) {
-                tcg_gen_rotri_tl(t0, t0, uimm + 32);
+                tcg_gen_rotri_tl(cpu_gpr[rt], t0, uimm + 32);
                 opn = "drotr32";
             } else {
-                tcg_gen_shri_tl(t0, t0, uimm + 32);
+                tcg_gen_shri_tl(cpu_gpr[rt], t0, uimm + 32);
                 opn = "dsrl32";
             }
             break;
@@ -1449,14 +1514,8 @@ static void gen_arith_imm (CPUState *env, DisasContext *ctx, uint32_t opc,
         }
         break;
 #endif
-    default:
-        MIPS_INVAL(opn);
-        generate_exception(ctx, EXCP_RI);
-        goto out;
     }
-    gen_store_gpr(t0, rt);
     MIPS_DEBUG("%s %s, %s, " TARGET_FMT_lx, opn, regnames[rt], regnames[rs], uimm);
- out:
     tcg_temp_free(t0);
 }
 
@@ -1465,201 +1524,339 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
                        int rd, int rs, int rt)
 {
     const char *opn = "arith";
-    TCGv t0 = tcg_temp_local_new();
-    TCGv t1 = tcg_temp_local_new();
 
     if (rd == 0 && opc != OPC_ADD && opc != OPC_SUB
        && opc != OPC_DADD && opc != OPC_DSUB) {
         /* If no destination, treat it as a NOP.
            For add & sub, we must generate the overflow exception when needed. */
         MIPS_DEBUG("NOP");
-        goto out;
-    }
-    gen_load_gpr(t0, rs);
-    /* Specialcase the conventional move operation. */
-    if (rt == 0 && (opc == OPC_ADDU || opc == OPC_DADDU
-                    || opc == OPC_SUBU || opc == OPC_DSUBU)) {
-        gen_store_gpr(t0, rd);
-        goto out;
+        return;
     }
-    gen_load_gpr(t1, rt);
+
     switch (opc) {
     case OPC_ADD:
         {
-            TCGv r_tmp1 = tcg_temp_new();
-            TCGv r_tmp2 = tcg_temp_new();
+            TCGv t0 = tcg_temp_local_new();
+            TCGv t1 = tcg_temp_new();
+            TCGv t2 = tcg_temp_new();
             int l1 = gen_new_label();
 
-            save_cpu_state(ctx, 1);
-            tcg_gen_ext32s_tl(r_tmp1, t0);
-            tcg_gen_ext32s_tl(r_tmp2, t1);
-            tcg_gen_add_tl(t0, r_tmp1, r_tmp2);
-
-            tcg_gen_xor_tl(r_tmp1, r_tmp1, t1);
-            tcg_gen_xori_tl(r_tmp1, r_tmp1, -1);
-            tcg_gen_xor_tl(r_tmp2, t0, t1);
-            tcg_gen_and_tl(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free(r_tmp2);
-            tcg_gen_brcondi_tl(TCG_COND_GE, r_tmp1, 0, l1);
+            gen_load_gpr(t1, rs);
+            gen_load_gpr(t2, rt);
+            tcg_gen_add_tl(t0, t1, t2);
+            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_xor_tl(t1, t1, t2);
+            tcg_gen_not_tl(t1, t1);
+            tcg_gen_xor_tl(t2, t0, t2);
+            tcg_gen_and_tl(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
+            tcg_temp_free(t1);
             /* operands of same sign, result different sign */
             generate_exception(ctx, EXCP_OVERFLOW);
             gen_set_label(l1);
-            tcg_temp_free(r_tmp1);
-
-            tcg_gen_ext32s_tl(t0, t0);
+            gen_store_gpr(t0, rd);
+            tcg_temp_free(t0);
         }
         opn = "add";
         break;
     case OPC_ADDU:
-        tcg_gen_add_tl(t0, t0, t1);
-        tcg_gen_ext32s_tl(t0, t0);
+        if (rs != 0 && rt != 0) {
+            tcg_gen_add_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+        } else if (rs == 0 && rt != 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rt]);
+        } else if (rs != 0 && rt == 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+        }
         opn = "addu";
         break;
     case OPC_SUB:
         {
-            TCGv r_tmp1 = tcg_temp_new();
-            TCGv r_tmp2 = tcg_temp_new();
+            TCGv t0 = tcg_temp_local_new();
+            TCGv t1 = tcg_temp_new();
+            TCGv t2 = tcg_temp_new();
             int l1 = gen_new_label();
 
-            save_cpu_state(ctx, 1);
-            tcg_gen_ext32s_tl(r_tmp1, t0);
-            tcg_gen_ext32s_tl(r_tmp2, t1);
-            tcg_gen_sub_tl(t0, r_tmp1, r_tmp2);
-
-            tcg_gen_xor_tl(r_tmp2, r_tmp1, t1);
-            tcg_gen_xor_tl(r_tmp1, r_tmp1, t0);
-            tcg_gen_and_tl(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free(r_tmp2);
-            tcg_gen_brcondi_tl(TCG_COND_GE, r_tmp1, 0, l1);
-            /* operands of different sign, first operand and result different sign */
+            gen_load_gpr(t1, rs);
+            gen_load_gpr(t2, rt);
+            tcg_gen_sub_tl(t0, t1, t2);
+            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_xor_tl(t2, t1, t2);
+            tcg_gen_xor_tl(t1, t0, t1);
+            tcg_gen_and_tl(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
+            tcg_temp_free(t1);
+           /* operands of different sign, first operand and result different sign */
             generate_exception(ctx, EXCP_OVERFLOW);
             gen_set_label(l1);
-            tcg_temp_free(r_tmp1);
-
-            tcg_gen_ext32s_tl(t0, t0);
+            gen_store_gpr(t0, rd);
+            tcg_temp_free(t0);
         }
         opn = "sub";
         break;
     case OPC_SUBU:
-        tcg_gen_sub_tl(t0, t0, t1);
-        tcg_gen_ext32s_tl(t0, t0);
+        if (rs != 0 && rt != 0) {
+            tcg_gen_sub_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+        } else if (rs == 0 && rt != 0) {
+            tcg_gen_neg_tl(cpu_gpr[rd], cpu_gpr[rt]);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+        } else if (rs != 0 && rt == 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+        }
         opn = "subu";
         break;
 #if defined(TARGET_MIPS64)
     case OPC_DADD:
         {
-            TCGv r_tmp1 = tcg_temp_new();
-            TCGv r_tmp2 = tcg_temp_new();
+            TCGv t0 = tcg_temp_local_new();
+            TCGv t1 = tcg_temp_new();
+            TCGv t2 = tcg_temp_new();
             int l1 = gen_new_label();
 
-            save_cpu_state(ctx, 1);
-            tcg_gen_mov_tl(r_tmp1, t0);
-            tcg_gen_add_tl(t0, t0, t1);
-
-            tcg_gen_xor_tl(r_tmp1, r_tmp1, t1);
-            tcg_gen_xori_tl(r_tmp1, r_tmp1, -1);
-            tcg_gen_xor_tl(r_tmp2, t0, t1);
-            tcg_gen_and_tl(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free(r_tmp2);
-            tcg_gen_brcondi_tl(TCG_COND_GE, r_tmp1, 0, l1);
+            gen_load_gpr(t1, rs);
+            gen_load_gpr(t2, rt);
+            tcg_gen_add_tl(t0, t1, t2);
+            tcg_gen_xor_tl(t1, t1, t2);
+            tcg_gen_not_tl(t1, t1);
+            tcg_gen_xor_tl(t2, t0, t2);
+            tcg_gen_and_tl(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
+            tcg_temp_free(t1);
             /* operands of same sign, result different sign */
             generate_exception(ctx, EXCP_OVERFLOW);
             gen_set_label(l1);
-            tcg_temp_free(r_tmp1);
+            gen_store_gpr(t0, rd);
+            tcg_temp_free(t0);
         }
         opn = "dadd";
         break;
     case OPC_DADDU:
-        tcg_gen_add_tl(t0, t0, t1);
+        if (rs != 0 && rt != 0) {
+            tcg_gen_add_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
+        } else if (rs == 0 && rt != 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rt]);
+        } else if (rs != 0 && rt == 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+        }
         opn = "daddu";
         break;
     case OPC_DSUB:
         {
-            TCGv r_tmp1 = tcg_temp_new();
-            TCGv r_tmp2 = tcg_temp_new();
+            TCGv t0 = tcg_temp_local_new();
+            TCGv t1 = tcg_temp_new();
+            TCGv t2 = tcg_temp_new();
             int l1 = gen_new_label();
 
-            save_cpu_state(ctx, 1);
-            tcg_gen_mov_tl(r_tmp1, t0);
-            tcg_gen_sub_tl(t0, t0, t1);
-
-            tcg_gen_xor_tl(r_tmp2, r_tmp1, t1);
-            tcg_gen_xor_tl(r_tmp1, r_tmp1, t0);
-            tcg_gen_and_tl(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free(r_tmp2);
-            tcg_gen_brcondi_tl(TCG_COND_GE, r_tmp1, 0, l1);
-            /* operands of different sign, first operand and result different sign */
+            gen_load_gpr(t1, rs);
+            gen_load_gpr(t2, rt);
+            tcg_gen_sub_tl(t0, t1, t2);
+            tcg_gen_xor_tl(t2, t1, t2);
+            tcg_gen_xor_tl(t1, t0, t1);
+            tcg_gen_and_tl(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_brcondi_tl(TCG_COND_GE, t1, 0, l1);
+            tcg_temp_free(t1);
+           /* operands of different sign, first operand and result different sign */
             generate_exception(ctx, EXCP_OVERFLOW);
             gen_set_label(l1);
-            tcg_temp_free(r_tmp1);
+            gen_store_gpr(t0, rd);
+            tcg_temp_free(t0);
         }
         opn = "dsub";
         break;
     case OPC_DSUBU:
-        tcg_gen_sub_tl(t0, t0, t1);
+        if (rs != 0 && rt != 0) {
+            tcg_gen_sub_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
+        } else if (rs == 0 && rt != 0) {
+            tcg_gen_neg_tl(cpu_gpr[rd], cpu_gpr[rt]);
+        } else if (rs != 0 && rt == 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+        }
         opn = "dsubu";
         break;
 #endif
-    case OPC_SLT:
-        gen_op_lt(t0, t1);
-        opn = "slt";
+    case OPC_MUL:
+        if (likely(rs != 0 && rt != 0)) {
+            tcg_gen_mul_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+        }
+        opn = "mul";
         break;
-    case OPC_SLTU:
-        gen_op_ltu(t0, t1);
-        opn = "sltu";
+    }
+    MIPS_DEBUG("%s %s, %s, %s", opn, regnames[rd], regnames[rs], regnames[rt]);
+}
+
+/* Conditional move */
+static void gen_cond_move (CPUState *env, uint32_t opc, int rd, int rs, int rt)
+{
+    const char *opn = "cond move";
+    int l1;
+
+    if (rd == 0) {
+        /* If no destination, treat it as a NOP.
+           For add & sub, we must generate the overflow exception when needed. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    l1 = gen_new_label();
+    switch (opc) {
+    case OPC_MOVN:
+        if (likely(rt != 0))
+            tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rt], 0, l1);
+        else
+            tcg_gen_br(l1);
+        opn = "movn";
+        break;
+    case OPC_MOVZ:
+        if (likely(rt != 0))
+            tcg_gen_brcondi_tl(TCG_COND_NE, cpu_gpr[rt], 0, l1);
+        opn = "movz";
         break;
+    }
+    if (rs != 0)
+        tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
+    else
+        tcg_gen_movi_tl(cpu_gpr[rd], 0);
+    gen_set_label(l1);
+
+    MIPS_DEBUG("%s %s, %s, %s", opn, regnames[rd], regnames[rs], regnames[rt]);
+}
+
+/* Logic */
+static void gen_logic (CPUState *env, uint32_t opc, int rd, int rs, int rt)
+{
+    const char *opn = "logic";
+
+    if (rd == 0) {
+        /* If no destination, treat it as a NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    switch (opc) {
     case OPC_AND:
-        tcg_gen_and_tl(t0, t0, t1);
+        if (likely(rs != 0 && rt != 0)) {
+            tcg_gen_and_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+        }
         opn = "and";
         break;
     case OPC_NOR:
-        tcg_gen_or_tl(t0, t0, t1);
-        tcg_gen_not_tl(t0, t0);
+        if (rs != 0 && rt != 0) {
+            tcg_gen_nor_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
+        } else if (rs == 0 && rt != 0) {
+            tcg_gen_not_tl(cpu_gpr[rd], cpu_gpr[rt]);
+        } else if (rs != 0 && rt == 0) {
+            tcg_gen_not_tl(cpu_gpr[rd], cpu_gpr[rs]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rd], ~((target_ulong)0));
+        }
         opn = "nor";
         break;
     case OPC_OR:
-        tcg_gen_or_tl(t0, t0, t1);
+        if (likely(rs != 0 && rt != 0)) {
+            tcg_gen_or_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
+        } else if (rs == 0 && rt != 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rt]);
+        } else if (rs != 0 && rt == 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+        }
         opn = "or";
         break;
     case OPC_XOR:
-        tcg_gen_xor_tl(t0, t0, t1);
+        if (likely(rs != 0 && rt != 0)) {
+            tcg_gen_xor_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
+        } else if (rs == 0 && rt != 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rt]);
+        } else if (rs != 0 && rt == 0) {
+            tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
+        } else {
+            tcg_gen_movi_tl(cpu_gpr[rd], 0);
+        }
         opn = "xor";
         break;
-    case OPC_MUL:
-        tcg_gen_mul_tl(t0, t0, t1);
-        tcg_gen_ext32s_tl(t0, t0);
-        opn = "mul";
+    }
+    MIPS_DEBUG("%s %s, %s, %s", opn, regnames[rd], regnames[rs], regnames[rt]);
+}
+
+/* Set on lower than */
+static void gen_slt (CPUState *env, uint32_t opc, int rd, int rs, int rt)
+{
+    const char *opn = "slt";
+    TCGv t0, t1;
+
+    if (rd == 0) {
+        /* If no destination, treat it as a NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    gen_load_gpr(t0, rs);
+    gen_load_gpr(t1, rt);
+    switch (opc) {
+    case OPC_SLT:
+        gen_op_lt(cpu_gpr[rd], t0, t1);
+        opn = "slt";
         break;
-    case OPC_MOVN:
-        {
-            int l1 = gen_new_label();
+    case OPC_SLTU:
+        gen_op_ltu(cpu_gpr[rd], t0, t1);
+        opn = "sltu";
+        break;
+    }
+    MIPS_DEBUG("%s %s, %s, %s", opn, regnames[rd], regnames[rs], regnames[rt]);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+}
 
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            gen_store_gpr(t0, rd);
-            gen_set_label(l1);
-        }
-        opn = "movn";
-        goto print;
-    case OPC_MOVZ:
-        {
-            int l1 = gen_new_label();
+/* Shifts */
+static void gen_shift (CPUState *env, DisasContext *ctx, uint32_t opc,
+                       int rd, int rs, int rt)
+{
+    const char *opn = "shifts";
+    TCGv t0, t1;
 
-            tcg_gen_brcondi_tl(TCG_COND_NE, t1, 0, l1);
-            gen_store_gpr(t0, rd);
-            gen_set_label(l1);
-        }
-        opn = "movz";
-        goto print;
+    if (rd == 0) {
+        /* If no destination, treat it as a NOP.
+           For add & sub, we must generate the overflow exception when needed. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    t1 = tcg_temp_new();
+    gen_load_gpr(t0, rs);
+    gen_load_gpr(t1, rt);
+    switch (opc) {
     case OPC_SLLV:
         tcg_gen_andi_tl(t0, t0, 0x1f);
         tcg_gen_shl_tl(t0, t1, t0);
-        tcg_gen_ext32s_tl(t0, t0);
+        tcg_gen_ext32s_tl(cpu_gpr[rd], t0);
         opn = "sllv";
         break;
     case OPC_SRAV:
         tcg_gen_ext32s_tl(t1, t1);
         tcg_gen_andi_tl(t0, t0, 0x1f);
-        tcg_gen_sar_tl(t0, t1, t0);
+        tcg_gen_sar_tl(cpu_gpr[rd], t1, t0);
         opn = "srav";
         break;
     case OPC_SRLV:
@@ -1668,37 +1865,28 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
             tcg_gen_ext32u_tl(t1, t1);
             tcg_gen_andi_tl(t0, t0, 0x1f);
             tcg_gen_shr_tl(t0, t1, t0);
-            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], t0);
             opn = "srlv";
             break;
         case 1:
             /* rotrv is decoded as srlv on non-R2 CPUs */
             if (env->insn_flags & ISA_MIPS32R2) {
-                int l1 = gen_new_label();
-                int l2 = gen_new_label();
-
-                tcg_gen_andi_tl(t0, t0, 0x1f);
-                tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);
-                {
-                    TCGv_i32 r_tmp1 = tcg_temp_new_i32();
-                    TCGv_i32 r_tmp2 = tcg_temp_new_i32();
-
-                    tcg_gen_trunc_tl_i32(r_tmp1, t0);
-                    tcg_gen_trunc_tl_i32(r_tmp2, t1);
-                    tcg_gen_rotr_i32(r_tmp1, r_tmp1, r_tmp2);
-                    tcg_temp_free_i32(r_tmp1);
-                    tcg_temp_free_i32(r_tmp2);
-                    tcg_gen_br(l2);
-                }
-                gen_set_label(l1);
-                tcg_gen_mov_tl(t0, t1);
-                gen_set_label(l2);
+                TCGv_i32 t2 = tcg_temp_new_i32();
+                TCGv_i32 t3 = tcg_temp_new_i32();
+
+                tcg_gen_trunc_tl_i32(t2, t0);
+                tcg_gen_trunc_tl_i32(t3, t1);
+                tcg_gen_andi_i32(t2, t2, 0x1f);
+                tcg_gen_rotr_i32(t2, t3, t2);
+                tcg_gen_ext_i32_tl(cpu_gpr[rd], t2);
+                tcg_temp_free_i32(t2);
+                tcg_temp_free_i32(t3);
                 opn = "rotrv";
             } else {
                 tcg_gen_ext32u_tl(t1, t1);
                 tcg_gen_andi_tl(t0, t0, 0x1f);
                 tcg_gen_shr_tl(t0, t1, t0);
-                tcg_gen_ext32s_tl(t0, t0);
+                tcg_gen_ext32s_tl(cpu_gpr[rd], t0);
                 opn = "srlv";
             }
             break;
@@ -1711,36 +1899,26 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
 #if defined(TARGET_MIPS64)
     case OPC_DSLLV:
         tcg_gen_andi_tl(t0, t0, 0x3f);
-        tcg_gen_shl_tl(t0, t1, t0);
+        tcg_gen_shl_tl(cpu_gpr[rd], t1, t0);
         opn = "dsllv";
         break;
     case OPC_DSRAV:
         tcg_gen_andi_tl(t0, t0, 0x3f);
-        tcg_gen_sar_tl(t0, t1, t0);
+        tcg_gen_sar_tl(cpu_gpr[rd], t1, t0);
         opn = "dsrav";
         break;
     case OPC_DSRLV:
         switch ((ctx->opcode >> 6) & 0x1f) {
         case 0:
             tcg_gen_andi_tl(t0, t0, 0x3f);
-            tcg_gen_shr_tl(t0, t1, t0);
+            tcg_gen_shr_tl(cpu_gpr[rd], t1, t0);
             opn = "dsrlv";
             break;
         case 1:
             /* drotrv is decoded as dsrlv on non-R2 CPUs */
             if (env->insn_flags & ISA_MIPS32R2) {
-                int l1 = gen_new_label();
-                int l2 = gen_new_label();
-
                 tcg_gen_andi_tl(t0, t0, 0x3f);
-                tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);
-                {
-                    tcg_gen_rotr_tl(t0, t1, t0);
-                    tcg_gen_br(l2);
-                }
-                gen_set_label(l1);
-                tcg_gen_mov_tl(t0, t1);
-                gen_set_label(l2);
+                tcg_gen_rotr_tl(cpu_gpr[rd], t1, t0);
                 opn = "drotrv";
             } else {
                 tcg_gen_andi_tl(t0, t0, 0x3f);
@@ -1755,15 +1933,8 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
         }
         break;
 #endif
-    default:
-        MIPS_INVAL(opn);
-        generate_exception(ctx, EXCP_RI);
-        goto out;
     }
-    gen_store_gpr(t0, rd);
- print:
     MIPS_DEBUG("%s %s, %s, %s", opn, regnames[rd], regnames[rs], regnames[rt]);
- out:
     tcg_temp_free(t0);
     tcg_temp_free(t1);
 }
@@ -1772,50 +1943,60 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
 static void gen_HILO (DisasContext *ctx, uint32_t opc, int reg)
 {
     const char *opn = "hilo";
-    TCGv t0 = tcg_temp_local_new();
 
     if (reg == 0 && (opc == OPC_MFHI || opc == OPC_MFLO)) {
         /* Treat as NOP. */
         MIPS_DEBUG("NOP");
-        goto out;
+        return;
     }
     switch (opc) {
     case OPC_MFHI:
-        tcg_gen_mov_tl(t0, cpu_HI[0]);
-        gen_store_gpr(t0, reg);
+        tcg_gen_mov_tl(cpu_gpr[reg], cpu_HI[0]);
         opn = "mfhi";
         break;
     case OPC_MFLO:
-        tcg_gen_mov_tl(t0, cpu_LO[0]);
-        gen_store_gpr(t0, reg);
+        tcg_gen_mov_tl(cpu_gpr[reg], cpu_LO[0]);
         opn = "mflo";
         break;
     case OPC_MTHI:
-        gen_load_gpr(t0, reg);
-        tcg_gen_mov_tl(cpu_HI[0], t0);
+        if (reg != 0)
+            tcg_gen_mov_tl(cpu_HI[0], cpu_gpr[reg]);
+        else
+            tcg_gen_movi_tl(cpu_HI[0], 0);
         opn = "mthi";
         break;
     case OPC_MTLO:
-        gen_load_gpr(t0, reg);
-        tcg_gen_mov_tl(cpu_LO[0], t0);
+        if (reg != 0)
+            tcg_gen_mov_tl(cpu_LO[0], cpu_gpr[reg]);
+        else
+            tcg_gen_movi_tl(cpu_LO[0], 0);
         opn = "mtlo";
         break;
-    default:
-        MIPS_INVAL(opn);
-        generate_exception(ctx, EXCP_RI);
-        goto out;
     }
     MIPS_DEBUG("%s %s", opn, regnames[reg]);
- out:
-    tcg_temp_free(t0);
 }
 
 static void gen_muldiv (DisasContext *ctx, uint32_t opc,
                         int rs, int rt)
 {
     const char *opn = "mul/div";
-    TCGv t0 = tcg_temp_local_new();
-    TCGv t1 = tcg_temp_local_new();
+    TCGv t0, t1;
+
+    switch (opc) {
+    case OPC_DIV:
+    case OPC_DIVU:
+#if defined(TARGET_MIPS64)
+    case OPC_DDIV:
+    case OPC_DDIVU:
+#endif
+        t0 = tcg_temp_local_new();
+        t1 = tcg_temp_local_new();
+        break;
+    default:
+        t0 = tcg_temp_new();
+        t1 = tcg_temp_new();
+        break;
+    }
 
     gen_load_gpr(t0, rs);
     gen_load_gpr(t1, rt);
@@ -1823,30 +2004,22 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
     case OPC_DIV:
         {
             int l1 = gen_new_label();
+            int l2 = gen_new_label();
 
+            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ext32s_tl(t1, t1);
             tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            {
-                int l2 = gen_new_label();
-                TCGv_i32 r_tmp1 = tcg_temp_local_new_i32();
-                TCGv_i32 r_tmp2 = tcg_temp_local_new_i32();
-                TCGv_i32 r_tmp3 = tcg_temp_local_new_i32();
-
-                tcg_gen_trunc_tl_i32(r_tmp1, t0);
-                tcg_gen_trunc_tl_i32(r_tmp2, t1);
-                tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp1, -1 << 31, l2);
-                tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp2, -1, l2);
-                tcg_gen_ext32s_tl(cpu_LO[0], t0);
-                tcg_gen_movi_tl(cpu_HI[0], 0);
-                tcg_gen_br(l1);
-                gen_set_label(l2);
-                tcg_gen_div_i32(r_tmp3, r_tmp1, r_tmp2);
-                tcg_gen_rem_i32(r_tmp2, r_tmp1, r_tmp2);
-                tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3);
-                tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp2);
-                tcg_temp_free_i32(r_tmp1);
-                tcg_temp_free_i32(r_tmp2);
-                tcg_temp_free_i32(r_tmp3);
-            }
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, INT_MIN, l2);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1, l2);
+
+            tcg_gen_mov_tl(cpu_LO[0], t0);
+            tcg_gen_movi_tl(cpu_HI[0], 0);
+            tcg_gen_br(l1);
+            gen_set_label(l2);
+            tcg_gen_div_tl(cpu_LO[0], t0, t1);
+            tcg_gen_rem_tl(cpu_HI[0], t0, t1);
+            tcg_gen_ext32s_tl(cpu_LO[0], cpu_LO[0]);
+            tcg_gen_ext32s_tl(cpu_HI[0], cpu_HI[0]);
             gen_set_label(l1);
         }
         opn = "div";
@@ -1855,40 +2028,30 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         {
             int l1 = gen_new_label();
 
-            tcg_gen_ext32s_tl(t1, t1);
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_ext32u_tl(t1, t1);
             tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            {
-                TCGv_i32 r_tmp1 = tcg_temp_new_i32();
-                TCGv_i32 r_tmp2 = tcg_temp_new_i32();
-                TCGv_i32 r_tmp3 = tcg_temp_new_i32();
-
-                tcg_gen_trunc_tl_i32(r_tmp1, t0);
-                tcg_gen_trunc_tl_i32(r_tmp2, t1);
-                tcg_gen_divu_i32(r_tmp3, r_tmp1, r_tmp2);
-                tcg_gen_remu_i32(r_tmp1, r_tmp1, r_tmp2);
-                tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3);
-                tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp1);
-                tcg_temp_free_i32(r_tmp1);
-                tcg_temp_free_i32(r_tmp2);
-                tcg_temp_free_i32(r_tmp3);
-            }
+            tcg_gen_divu_tl(cpu_LO[0], t0, t1);
+            tcg_gen_remu_tl(cpu_HI[0], t0, t1);
+            tcg_gen_ext32s_tl(cpu_LO[0], cpu_LO[0]);
+            tcg_gen_ext32s_tl(cpu_HI[0], cpu_HI[0]);
             gen_set_label(l1);
         }
         opn = "divu";
         break;
     case OPC_MULT:
         {
-            TCGv_i64 r_tmp1 = tcg_temp_new_i64();
-            TCGv_i64 r_tmp2 = tcg_temp_new_i64();
-
-            tcg_gen_ext_tl_i64(r_tmp1, t0);
-            tcg_gen_ext_tl_i64(r_tmp2, t1);
-            tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free_i64(r_tmp2);
-            tcg_gen_trunc_i64_tl(t0, r_tmp1);
-            tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
-            tcg_gen_trunc_i64_tl(t1, r_tmp1);
-            tcg_temp_free_i64(r_tmp1);
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
+
+            tcg_gen_ext_tl_i64(t2, t0);
+            tcg_gen_ext_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            tcg_gen_trunc_i64_tl(t0, t2);
+            tcg_gen_shri_i64(t2, t2, 32);
+            tcg_gen_trunc_i64_tl(t1, t2);
+            tcg_temp_free_i64(t2);
             tcg_gen_ext32s_tl(cpu_LO[0], t0);
             tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
@@ -1896,19 +2059,19 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         break;
     case OPC_MULTU:
         {
-            TCGv_i64 r_tmp1 = tcg_temp_new_i64();
-            TCGv_i64 r_tmp2 = tcg_temp_new_i64();
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
 
             tcg_gen_ext32u_tl(t0, t0);
             tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_extu_tl_i64(r_tmp1, t0);
-            tcg_gen_extu_tl_i64(r_tmp2, t1);
-            tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free_i64(r_tmp2);
-            tcg_gen_trunc_i64_tl(t0, r_tmp1);
-            tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
-            tcg_gen_trunc_i64_tl(t1, r_tmp1);
-            tcg_temp_free_i64(r_tmp1);
+            tcg_gen_extu_tl_i64(t2, t0);
+            tcg_gen_extu_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            tcg_gen_trunc_i64_tl(t0, t2);
+            tcg_gen_shri_i64(t2, t2, 32);
+            tcg_gen_trunc_i64_tl(t1, t2);
+            tcg_temp_free_i64(t2);
             tcg_gen_ext32s_tl(cpu_LO[0], t0);
             tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
@@ -1918,20 +2081,17 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
     case OPC_DDIV:
         {
             int l1 = gen_new_label();
+            int l2 = gen_new_label();
 
             tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            {
-                int l2 = gen_new_label();
-
-                tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
-                tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
-                tcg_gen_mov_tl(cpu_LO[0], t0);
-                tcg_gen_movi_tl(cpu_HI[0], 0);
-                tcg_gen_br(l1);
-                gen_set_label(l2);
-                tcg_gen_div_i64(cpu_LO[0], t0, t1);
-                tcg_gen_rem_i64(cpu_HI[0], t0, t1);
-            }
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
+            tcg_gen_mov_tl(cpu_LO[0], t0);
+            tcg_gen_movi_tl(cpu_HI[0], 0);
+            tcg_gen_br(l1);
+            gen_set_label(l2);
+            tcg_gen_div_i64(cpu_LO[0], t0, t1);
+            tcg_gen_rem_i64(cpu_HI[0], t0, t1);
             gen_set_label(l1);
         }
         opn = "ddiv";
@@ -1958,19 +2118,19 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
 #endif
     case OPC_MADD:
         {
-            TCGv_i64 r_tmp1 = tcg_temp_new_i64();
-            TCGv_i64 r_tmp2 = tcg_temp_new_i64();
-
-            tcg_gen_ext_tl_i64(r_tmp1, t0);
-            tcg_gen_ext_tl_i64(r_tmp2, t1);
-            tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
-            tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free_i64(r_tmp2);
-            tcg_gen_trunc_i64_tl(t0, r_tmp1);
-            tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
-            tcg_gen_trunc_i64_tl(t1, r_tmp1);
-            tcg_temp_free_i64(r_tmp1);
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
+
+            tcg_gen_ext_tl_i64(t2, t0);
+            tcg_gen_ext_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_add_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            tcg_gen_trunc_i64_tl(t0, t2);
+            tcg_gen_shri_i64(t2, t2, 32);
+            tcg_gen_trunc_i64_tl(t1, t2);
+            tcg_temp_free_i64(t2);
             tcg_gen_ext32s_tl(cpu_LO[0], t0);
             tcg_gen_ext32s_tl(cpu_LO[1], t1);
         }
@@ -1978,21 +2138,21 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         break;
     case OPC_MADDU:
        {
-            TCGv_i64 r_tmp1 = tcg_temp_new_i64();
-            TCGv_i64 r_tmp2 = tcg_temp_new_i64();
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
 
             tcg_gen_ext32u_tl(t0, t0);
             tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_extu_tl_i64(r_tmp1, t0);
-            tcg_gen_extu_tl_i64(r_tmp2, t1);
-            tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
-            tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free_i64(r_tmp2);
-            tcg_gen_trunc_i64_tl(t0, r_tmp1);
-            tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
-            tcg_gen_trunc_i64_tl(t1, r_tmp1);
-            tcg_temp_free_i64(r_tmp1);
+            tcg_gen_extu_tl_i64(t2, t0);
+            tcg_gen_extu_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_add_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            tcg_gen_trunc_i64_tl(t0, t2);
+            tcg_gen_shri_i64(t2, t2, 32);
+            tcg_gen_trunc_i64_tl(t1, t2);
+            tcg_temp_free_i64(t2);
             tcg_gen_ext32s_tl(cpu_LO[0], t0);
             tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
@@ -2000,19 +2160,19 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         break;
     case OPC_MSUB:
         {
-            TCGv_i64 r_tmp1 = tcg_temp_new_i64();
-            TCGv_i64 r_tmp2 = tcg_temp_new_i64();
-
-            tcg_gen_ext_tl_i64(r_tmp1, t0);
-            tcg_gen_ext_tl_i64(r_tmp2, t1);
-            tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
-            tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free_i64(r_tmp2);
-            tcg_gen_trunc_i64_tl(t0, r_tmp1);
-            tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
-            tcg_gen_trunc_i64_tl(t1, r_tmp1);
-            tcg_temp_free_i64(r_tmp1);
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
+
+            tcg_gen_ext_tl_i64(t2, t0);
+            tcg_gen_ext_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_sub_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            tcg_gen_trunc_i64_tl(t0, t2);
+            tcg_gen_shri_i64(t2, t2, 32);
+            tcg_gen_trunc_i64_tl(t1, t2);
+            tcg_temp_free_i64(t2);
             tcg_gen_ext32s_tl(cpu_LO[0], t0);
             tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
@@ -2020,21 +2180,21 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         break;
     case OPC_MSUBU:
         {
-            TCGv_i64 r_tmp1 = tcg_temp_new_i64();
-            TCGv_i64 r_tmp2 = tcg_temp_new_i64();
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
 
             tcg_gen_ext32u_tl(t0, t0);
             tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_extu_tl_i64(r_tmp1, t0);
-            tcg_gen_extu_tl_i64(r_tmp2, t1);
-            tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
-            tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free_i64(r_tmp2);
-            tcg_gen_trunc_i64_tl(t0, r_tmp1);
-            tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
-            tcg_gen_trunc_i64_tl(t1, r_tmp1);
-            tcg_temp_free_i64(r_tmp1);
+            tcg_gen_extu_tl_i64(t2, t0);
+            tcg_gen_extu_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_sub_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            tcg_gen_trunc_i64_tl(t0, t2);
+            tcg_gen_shri_i64(t2, t2, 32);
+            tcg_gen_trunc_i64_tl(t1, t2);
+            tcg_temp_free_i64(t2);
             tcg_gen_ext32s_tl(cpu_LO[0], t0);
             tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
@@ -2055,8 +2215,8 @@ static void gen_mul_vr54xx (DisasContext *ctx, uint32_t opc,
                             int rd, int rs, int rt)
 {
     const char *opn = "mul vr54xx";
-    TCGv t0 = tcg_temp_local_new();
-    TCGv t1 = tcg_temp_local_new();
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
 
     gen_load_gpr(t0, rs);
     gen_load_gpr(t1, rt);
@@ -2135,42 +2295,36 @@ static void gen_cl (DisasContext *ctx, uint32_t opc,
                     int rd, int rs)
 {
     const char *opn = "CLx";
-    TCGv t0 = tcg_temp_local_new();
+    TCGv t0;
 
     if (rd == 0) {
         /* Treat as NOP. */
         MIPS_DEBUG("NOP");
-        goto out;
+        return;
     }
+    t0 = tcg_temp_new();
     gen_load_gpr(t0, rs);
     switch (opc) {
     case OPC_CLO:
-        gen_helper_clo(t0, t0);
+        gen_helper_clo(cpu_gpr[rd], t0);
         opn = "clo";
         break;
     case OPC_CLZ:
-        gen_helper_clz(t0, t0);
+        gen_helper_clz(cpu_gpr[rd], t0);
         opn = "clz";
         break;
 #if defined(TARGET_MIPS64)
     case OPC_DCLO:
-        gen_helper_dclo(t0, t0);
+        gen_helper_dclo(cpu_gpr[rd], t0);
         opn = "dclo";
         break;
     case OPC_DCLZ:
-        gen_helper_dclz(t0, t0);
+        gen_helper_dclz(cpu_gpr[rd], t0);
         opn = "dclz";
         break;
 #endif
-    default:
-        MIPS_INVAL(opn);
-        generate_exception(ctx, EXCP_RI);
-        goto out;
     }
-    gen_store_gpr(t0, rd);
     MIPS_DEBUG("%s %s, %s", opn, regnames[rd], regnames[rs]);
-
- out:
     tcg_temp_free(t0);
 }
 
@@ -2179,8 +2333,8 @@ static void gen_trap (DisasContext *ctx, uint32_t opc,
                       int rs, int rt, int16_t imm)
 {
     int cond;
-    TCGv t0 = tcg_temp_local_new();
-    TCGv t1 = tcg_temp_local_new();
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
 
     cond = 0;
     /* Load needed operands */
@@ -2221,7 +2375,7 @@ static void gen_trap (DisasContext *ctx, uint32_t opc,
         case OPC_TGEU:  /* rs >= rs unsigned */
         case OPC_TGEIU: /* r0 >= 0  unsigned */
             /* Always trap */
-            tcg_gen_movi_tl(t0, 1);
+            generate_exception(ctx, EXCP_TRAP);
             break;
         case OPC_TLT:   /* rs < rs           */
         case OPC_TLTI:  /* r0 < 0            */
@@ -2230,54 +2384,40 @@ static void gen_trap (DisasContext *ctx, uint32_t opc,
         case OPC_TNE:   /* rs != rs          */
         case OPC_TNEI:  /* r0 != 0           */
             /* Never trap: treat as NOP. */
-            goto out;
-        default:
-            MIPS_INVAL("trap");
-            generate_exception(ctx, EXCP_RI);
-            goto out;
+            break;
         }
     } else {
+        int l1 = gen_new_label();
+
         switch (opc) {
         case OPC_TEQ:
         case OPC_TEQI:
-            gen_op_eq(t0, t1);
+            tcg_gen_brcond_tl(TCG_COND_NE, t0, t1, l1);
             break;
         case OPC_TGE:
         case OPC_TGEI:
-            gen_op_ge(t0, t1);
+            tcg_gen_brcond_tl(TCG_COND_LT, t0, t1, l1);
             break;
         case OPC_TGEU:
         case OPC_TGEIU:
-            gen_op_geu(t0, t1);
+            tcg_gen_brcond_tl(TCG_COND_LTU, t0, t1, l1);
             break;
         case OPC_TLT:
         case OPC_TLTI:
-            gen_op_lt(t0, t1);
+            tcg_gen_brcond_tl(TCG_COND_GE, t0, t1, l1);
             break;
         case OPC_TLTU:
         case OPC_TLTIU:
-            gen_op_ltu(t0, t1);
+            tcg_gen_brcond_tl(TCG_COND_GEU, t0, t1, l1);
             break;
         case OPC_TNE:
         case OPC_TNEI:
-            gen_op_ne(t0, t1);
+            tcg_gen_brcond_tl(TCG_COND_EQ, t0, t1, l1);
             break;
-        default:
-            MIPS_INVAL("trap");
-            generate_exception(ctx, EXCP_RI);
-            goto out;
         }
-    }
-    save_cpu_state(ctx, 1);
-    {
-        int l1 = gen_new_label();
-
-        tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);
-        gen_helper_0i(raise_exception, EXCP_TRAP);
+        generate_exception(ctx, EXCP_TRAP);
         gen_set_label(l1);
     }
-    ctx->bstate = BS_STOP;
- out:
     tcg_temp_free(t0);
     tcg_temp_free(t1);
 }
@@ -2303,8 +2443,8 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
     target_ulong btgt = -1;
     int blink = 0;
     int bcond_compute = 0;
-    TCGv t0 = tcg_temp_local_new();
-    TCGv t1 = tcg_temp_local_new();
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
 
     if (ctx->hflags & MIPS_HFLAG_BMASK) {
 #ifdef MIPS_DEBUG_DISAS
@@ -2396,13 +2536,11 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
             MIPS_DEBUG("bnever (NOP)");
             goto out;
         case OPC_BLTZAL:  /* 0 < 0           */
-            tcg_gen_movi_tl(t0, ctx->pc + 8);
-            gen_store_gpr(t0, 31);
+            tcg_gen_movi_tl(cpu_gpr[31], ctx->pc + 8);
             MIPS_DEBUG("bnever and link");
             goto out;
         case OPC_BLTZALL: /* 0 < 0 likely */
-            tcg_gen_movi_tl(t0, ctx->pc + 8);
-            gen_store_gpr(t0, 31);
+            tcg_gen_movi_tl(cpu_gpr[31], ctx->pc + 8);
             /* Skip the instruction in the delay slot */
             MIPS_DEBUG("bnever, link and skip");
             ctx->pc += 4;
@@ -2440,82 +2578,80 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
     } else {
         switch (opc) {
         case OPC_BEQ:
-            gen_op_eq(t0, t1);
+            gen_op_eq(bcond, t0, t1);
             MIPS_DEBUG("beq %s, %s, " TARGET_FMT_lx,
                        regnames[rs], regnames[rt], btgt);
             goto not_likely;
         case OPC_BEQL:
-            gen_op_eq(t0, t1);
+            gen_op_eq(bcond, t0, t1);
             MIPS_DEBUG("beql %s, %s, " TARGET_FMT_lx,
                        regnames[rs], regnames[rt], btgt);
             goto likely;
         case OPC_BNE:
-            gen_op_ne(t0, t1);
+            gen_op_ne(bcond, t0, t1);
             MIPS_DEBUG("bne %s, %s, " TARGET_FMT_lx,
                        regnames[rs], regnames[rt], btgt);
             goto not_likely;
         case OPC_BNEL:
-            gen_op_ne(t0, t1);
+            gen_op_ne(bcond, t0, t1);
             MIPS_DEBUG("bnel %s, %s, " TARGET_FMT_lx,
                        regnames[rs], regnames[rt], btgt);
             goto likely;
         case OPC_BGEZ:
-            gen_op_gez(t0);
+            gen_op_gez(bcond, t0);
             MIPS_DEBUG("bgez %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto not_likely;
         case OPC_BGEZL:
-            gen_op_gez(t0);
+            gen_op_gez(bcond, t0);
             MIPS_DEBUG("bgezl %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto likely;
         case OPC_BGEZAL:
-            gen_op_gez(t0);
+            gen_op_gez(bcond, t0);
             MIPS_DEBUG("bgezal %s, " TARGET_FMT_lx, regnames[rs], btgt);
             blink = 31;
             goto not_likely;
         case OPC_BGEZALL:
-            gen_op_gez(t0);
+            gen_op_gez(bcond, t0);
             blink = 31;
             MIPS_DEBUG("bgezall %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto likely;
         case OPC_BGTZ:
-            gen_op_gtz(t0);
+            gen_op_gtz(bcond, t0);
             MIPS_DEBUG("bgtz %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto not_likely;
         case OPC_BGTZL:
-            gen_op_gtz(t0);
+            gen_op_gtz(bcond, t0);
             MIPS_DEBUG("bgtzl %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto likely;
         case OPC_BLEZ:
-            gen_op_lez(t0);
+            gen_op_lez(bcond, t0);
             MIPS_DEBUG("blez %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto not_likely;
         case OPC_BLEZL:
-            gen_op_lez(t0);
+            gen_op_lez(bcond, t0);
             MIPS_DEBUG("blezl %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto likely;
         case OPC_BLTZ:
-            gen_op_ltz(t0);
+            gen_op_ltz(bcond, t0);
             MIPS_DEBUG("bltz %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto not_likely;
         case OPC_BLTZL:
-            gen_op_ltz(t0);
+            gen_op_ltz(bcond, t0);
             MIPS_DEBUG("bltzl %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto likely;
         case OPC_BLTZAL:
-            gen_op_ltz(t0);
+            gen_op_ltz(bcond, t0);
             blink = 31;
             MIPS_DEBUG("bltzal %s, " TARGET_FMT_lx, regnames[rs], btgt);
         not_likely:
             ctx->hflags |= MIPS_HFLAG_BC;
-            tcg_gen_trunc_tl_i32(bcond, t0);
             break;
         case OPC_BLTZALL:
-            gen_op_ltz(t0);
+            gen_op_ltz(bcond, t0);
             blink = 31;
             MIPS_DEBUG("bltzall %s, " TARGET_FMT_lx, regnames[rs], btgt);
         likely:
             ctx->hflags |= MIPS_HFLAG_BL;
-            tcg_gen_trunc_tl_i32(bcond, t0);
             break;
         default:
             MIPS_INVAL("conditional branch/jump");
@@ -2528,8 +2664,7 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
 
     ctx->btarget = btgt;
     if (blink > 0) {
-        tcg_gen_movi_tl(t0, ctx->pc + 8);
-        gen_store_gpr(t0, blink);
+        tcg_gen_movi_tl(cpu_gpr[blink], ctx->pc + 8);
     }
 
  out:
@@ -2632,91 +2767,107 @@ fail:
 
 static void gen_bshfl (DisasContext *ctx, uint32_t op2, int rt, int rd)
 {
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
+    TCGv t0;
 
-    gen_load_gpr(t1, rt);
+    if (rd == 0) {
+        /* If no destination, treat it as a NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    gen_load_gpr(t0, rt);
     switch (op2) {
     case OPC_WSBH:
-        tcg_gen_shri_tl(t0, t1, 8);
-        tcg_gen_andi_tl(t0, t0, 0x00FF00FF);
-        tcg_gen_shli_tl(t1, t1, 8);
-        tcg_gen_andi_tl(t1, t1, ~0x00FF00FF);
-        tcg_gen_or_tl(t0, t0, t1);
-        tcg_gen_ext32s_tl(t0, t0);
+        {
+            TCGv t1 = tcg_temp_new();
+
+            tcg_gen_shri_tl(t1, t0, 8);
+            tcg_gen_andi_tl(t1, t1, 0x00FF00FF);
+            tcg_gen_shli_tl(t0, t0, 8);
+            tcg_gen_andi_tl(t0, t0, ~0x00FF00FF);
+            tcg_gen_or_tl(t0, t0, t1);
+            tcg_temp_free(t1);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], t0);
+        }
         break;
     case OPC_SEB:
-        tcg_gen_ext8s_tl(t0, t1);
+        tcg_gen_ext8s_tl(cpu_gpr[rd], t0);
         break;
     case OPC_SEH:
-        tcg_gen_ext16s_tl(t0, t1);
+        tcg_gen_ext16s_tl(cpu_gpr[rd], t0);
         break;
 #if defined(TARGET_MIPS64)
     case OPC_DSBH:
-        gen_load_gpr(t1, rt);
-        tcg_gen_shri_tl(t0, t1, 8);
-        tcg_gen_andi_tl(t0, t0, 0x00FF00FF00FF00FFULL);
-        tcg_gen_shli_tl(t1, t1, 8);
-        tcg_gen_andi_tl(t1, t1, ~0x00FF00FF00FF00FFULL);
-        tcg_gen_or_tl(t0, t0, t1);
+        {
+            TCGv t1 = tcg_temp_new();
+
+            tcg_gen_shri_tl(t1, t0, 8);
+            tcg_gen_andi_tl(t1, t1, 0x00FF00FF00FF00FFULL);
+            tcg_gen_shli_tl(t0, t0, 8);
+            tcg_gen_andi_tl(t0, t0, ~0x00FF00FF00FF00FFULL);
+            tcg_gen_or_tl(cpu_gpr[rd], t0, t1);
+            tcg_temp_free(t1);
+        }
         break;
     case OPC_DSHD:
-        gen_load_gpr(t1, rt);
-        tcg_gen_shri_tl(t0, t1, 16);
-        tcg_gen_andi_tl(t0, t0, 0x0000FFFF0000FFFFULL);
-        tcg_gen_shli_tl(t1, t1, 16);
-        tcg_gen_andi_tl(t1, t1, ~0x0000FFFF0000FFFFULL);
-        tcg_gen_or_tl(t1, t0, t1);
-        tcg_gen_shri_tl(t0, t1, 32);
-        tcg_gen_shli_tl(t1, t1, 32);
-        tcg_gen_or_tl(t0, t0, t1);
+        {
+            TCGv t1 = tcg_temp_new();
+
+            tcg_gen_shri_tl(t1, t0, 16);
+            tcg_gen_andi_tl(t1, t1, 0x0000FFFF0000FFFFULL);
+            tcg_gen_shli_tl(t0, t0, 16);
+            tcg_gen_andi_tl(t0, t0, ~0x0000FFFF0000FFFFULL);
+            tcg_gen_or_tl(t0, t0, t1);
+            tcg_gen_shri_tl(t1, t0, 32);
+            tcg_gen_shli_tl(t0, t0, 32);
+            tcg_gen_or_tl(cpu_gpr[rd], t0, t1);
+            tcg_temp_free(t1);
+        }
         break;
 #endif
     default:
         MIPS_INVAL("bsfhl");
         generate_exception(ctx, EXCP_RI);
         tcg_temp_free(t0);
-        tcg_temp_free(t1);
         return;
     }
-    gen_store_gpr(t0, rd);
     tcg_temp_free(t0);
-    tcg_temp_free(t1);
 }
 
 #ifndef CONFIG_USER_ONLY
 /* CP0 (MMU and control) */
-static inline void gen_mfc0_load32 (TCGv t, target_ulong off)
+static inline void gen_mfc0_load32 (TCGv arg, target_ulong off)
 {
-    TCGv_i32 r_tmp = tcg_temp_new_i32();
+    TCGv_i32 t0 = tcg_temp_new_i32();
 
-    tcg_gen_ld_i32(r_tmp, cpu_env, off);
-    tcg_gen_ext_i32_tl(t, r_tmp);
-    tcg_temp_free_i32(r_tmp);
+    tcg_gen_ld_i32(t0, cpu_env, off);
+    tcg_gen_ext_i32_tl(arg, t0);
+    tcg_temp_free_i32(t0);
 }
 
-static inline void gen_mfc0_load64 (TCGv t, target_ulong off)
+static inline void gen_mfc0_load64 (TCGv arg, target_ulong off)
 {
-    tcg_gen_ld_tl(t, cpu_env, off);
-    tcg_gen_ext32s_tl(t, t);
+    tcg_gen_ld_tl(arg, cpu_env, off);
+    tcg_gen_ext32s_tl(arg, arg);
 }
 
-static inline void gen_mtc0_store32 (TCGv t, target_ulong off)
+static inline void gen_mtc0_store32 (TCGv arg, target_ulong off)
 {
-    TCGv_i32 r_tmp = tcg_temp_new_i32();
+    TCGv_i32 t0 = tcg_temp_new_i32();
 
-    tcg_gen_trunc_tl_i32(r_tmp, t);
-    tcg_gen_st_i32(r_tmp, cpu_env, off);
-    tcg_temp_free_i32(r_tmp);
+    tcg_gen_trunc_tl_i32(t0, arg);
+    tcg_gen_st_i32(t0, cpu_env, off);
+    tcg_temp_free_i32(t0);
 }
 
-static inline void gen_mtc0_store64 (TCGv t, target_ulong off)
+static inline void gen_mtc0_store64 (TCGv arg, target_ulong off)
 {
-    tcg_gen_ext32s_tl(t, t);
-    tcg_gen_st_tl(t, cpu_env, off);
+    tcg_gen_ext32s_tl(arg, arg);
+    tcg_gen_st_tl(arg, cpu_env, off);
 }
 
-static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int sel)
+static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv arg, int reg, int sel)
 {
     const char *rn = "invalid";
 
@@ -2727,22 +2878,22 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 0:
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Index));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Index));
             rn = "Index";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpcontrol(t0);
+            gen_helper_mfc0_mvpcontrol(arg);
             rn = "MVPControl";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpconf0(t0);
+            gen_helper_mfc0_mvpconf0(arg);
             rn = "MVPConf0";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpconf1(t0);
+            gen_helper_mfc0_mvpconf1(arg);
             rn = "MVPConf1";
             break;
         default:
@@ -2752,42 +2903,42 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 1:
         switch (sel) {
         case 0:
-            gen_helper_mfc0_random(t0);
+            gen_helper_mfc0_random(arg);
             rn = "Random";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_VPEControl));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_VPEControl));
             rn = "VPEControl";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_VPEConf0));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_VPEConf0));
             rn = "VPEConf0";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_VPEConf1));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_VPEConf1));
             rn = "VPEConf1";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_mfc0_load64(t0, offsetof(CPUState, CP0_YQMask));
+            gen_mfc0_load64(arg, offsetof(CPUState, CP0_YQMask));
             rn = "YQMask";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            gen_mfc0_load64(t0, offsetof(CPUState, CP0_VPESchedule));
+            gen_mfc0_load64(arg, offsetof(CPUState, CP0_VPESchedule));
             rn = "VPESchedule";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            gen_mfc0_load64(t0, offsetof(CPUState, CP0_VPEScheFBack));
+            gen_mfc0_load64(arg, offsetof(CPUState, CP0_VPEScheFBack));
             rn = "VPEScheFBack";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_VPEOpt));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_VPEOpt));
             rn = "VPEOpt";
             break;
         default:
@@ -2797,43 +2948,43 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 2:
         switch (sel) {
         case 0:
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_EntryLo0));
-            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_EntryLo0));
+            tcg_gen_ext32s_tl(arg, arg);
             rn = "EntryLo0";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcstatus(t0);
+            gen_helper_mfc0_tcstatus(arg);
             rn = "TCStatus";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcbind(t0);
+            gen_helper_mfc0_tcbind(arg);
             rn = "TCBind";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcrestart(t0);
+            gen_helper_mfc0_tcrestart(arg);
             rn = "TCRestart";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tchalt(t0);
+            gen_helper_mfc0_tchalt(arg);
             rn = "TCHalt";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tccontext(t0);
+            gen_helper_mfc0_tccontext(arg);
             rn = "TCContext";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcschedule(t0);
+            gen_helper_mfc0_tcschedule(arg);
             rn = "TCSchedule";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcschefback(t0);
+            gen_helper_mfc0_tcschefback(arg);
             rn = "TCScheFBack";
             break;
         default:
@@ -2843,8 +2994,8 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 3:
         switch (sel) {
         case 0:
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_EntryLo1));
-            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_EntryLo1));
+            tcg_gen_ext32s_tl(arg, arg);
             rn = "EntryLo1";
             break;
         default:
@@ -2854,12 +3005,12 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 4:
         switch (sel) {
         case 0:
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_Context));
-            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_Context));
+            tcg_gen_ext32s_tl(arg, arg);
             rn = "Context";
             break;
         case 1:
-//            gen_helper_mfc0_contextconfig(t0); /* SmartMIPS ASE */
+//            gen_helper_mfc0_contextconfig(arg); /* SmartMIPS ASE */
             rn = "ContextConfig";
 //            break;
         default:
@@ -2869,12 +3020,12 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 5:
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_PageMask));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_PageMask));
             rn = "PageMask";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_PageGrain));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_PageGrain));
             rn = "PageGrain";
             break;
         default:
@@ -2884,32 +3035,32 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 6:
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Wired));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Wired));
             rn = "Wired";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_SRSConf0));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_SRSConf0));
             rn = "SRSConf0";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_SRSConf1));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_SRSConf1));
             rn = "SRSConf1";
             break;
         case 3:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_SRSConf2));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_SRSConf2));
             rn = "SRSConf2";
             break;
         case 4:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_SRSConf3));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_SRSConf3));
             rn = "SRSConf3";
             break;
         case 5:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_SRSConf4));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_SRSConf4));
             rn = "SRSConf4";
             break;
         default:
@@ -2920,7 +3071,7 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
         switch (sel) {
         case 0:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_HWREna));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_HWREna));
             rn = "HWREna";
             break;
         default:
@@ -2930,8 +3081,8 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 8:
         switch (sel) {
         case 0:
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_BadVAddr));
-            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_BadVAddr));
+            tcg_gen_ext32s_tl(arg, arg);
             rn = "BadVAddr";
             break;
         default:
@@ -2944,7 +3095,7 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
             /* Mark as an IO operation because we read the time.  */
             if (use_icount)
                 gen_io_start();
-            gen_helper_mfc0_count(t0);
+            gen_helper_mfc0_count(arg);
             if (use_icount) {
                 gen_io_end();
                 ctx->bstate = BS_STOP;
@@ -2959,8 +3110,8 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 10:
         switch (sel) {
         case 0:
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_EntryHi));
-            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_EntryHi));
+            tcg_gen_ext32s_tl(arg, arg);
             rn = "EntryHi";
             break;
         default:
@@ -2970,7 +3121,7 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 11:
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Compare));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Compare));
             rn = "Compare";
             break;
         /* 6,7 are implementation dependent */
@@ -2981,22 +3132,22 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 12:
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Status));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Status));
             rn = "Status";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_IntCtl));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_IntCtl));
             rn = "IntCtl";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_SRSCtl));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_SRSCtl));
             rn = "SRSCtl";
             break;
         case 3:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_SRSMap));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_SRSMap));
             rn = "SRSMap";
             break;
         default:
@@ -3006,7 +3157,7 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 13:
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Cause));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Cause));
             rn = "Cause";
             break;
         default:
@@ -3016,8 +3167,8 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 14:
         switch (sel) {
         case 0:
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_EPC));
-            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_EPC));
+            tcg_gen_ext32s_tl(arg, arg);
             rn = "EPC";
             break;
         default:
@@ -3027,12 +3178,12 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 15:
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_PRid));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_PRid));
             rn = "PRid";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_EBase));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_EBase));
             rn = "EBase";
             break;
         default:
@@ -3042,29 +3193,29 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 16:
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Config0));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Config0));
             rn = "Config";
             break;
         case 1:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Config1));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Config1));
             rn = "Config1";
             break;
         case 2:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Config2));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Config2));
             rn = "Config2";
             break;
         case 3:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Config3));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Config3));
             rn = "Config3";
             break;
         /* 4,5 are reserved */
         /* 6,7 are implementation dependent */
         case 6:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Config6));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Config6));
             rn = "Config6";
             break;
         case 7:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Config7));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Config7));
             rn = "Config7";
             break;
         default:
@@ -3074,7 +3225,7 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 17:
         switch (sel) {
         case 0:
-            gen_helper_mfc0_lladdr(t0);
+            gen_helper_mfc0_lladdr(arg);
             rn = "LLAddr";
             break;
         default:
@@ -3084,7 +3235,7 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 18:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mfc0_watchlo, t0, sel);
+            gen_helper_1i(mfc0_watchlo, arg, sel);
             rn = "WatchLo";
             break;
         default:
@@ -3094,7 +3245,7 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 19:
         switch (sel) {
         case 0 ...7:
-            gen_helper_1i(mfc0_watchhi, t0, sel);
+            gen_helper_1i(mfc0_watchhi, arg, sel);
             rn = "WatchHi";
             break;
         default:
@@ -3106,8 +3257,8 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
         case 0:
 #if defined(TARGET_MIPS64)
             check_insn(env, ctx, ISA_MIPS3);
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_XContext));
-            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_XContext));
+            tcg_gen_ext32s_tl(arg, arg);
             rn = "XContext";
             break;
 #endif
@@ -3119,7 +3270,7 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
        /* Officially reserved, but sel 0 is used for R1x000 framemask */
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Framemask));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Framemask));
             rn = "Framemask";
             break;
         default:
@@ -3127,29 +3278,29 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
         }
         break;
     case 22:
-        tcg_gen_movi_tl(t0, 0); /* unimplemented */
+        tcg_gen_movi_tl(arg, 0); /* unimplemented */
         rn = "'Diagnostic"; /* implementation dependent */
         break;
     case 23:
         switch (sel) {
         case 0:
-            gen_helper_mfc0_debug(t0); /* EJTAG support */
+            gen_helper_mfc0_debug(arg); /* EJTAG support */
             rn = "Debug";
             break;
         case 1:
-//            gen_helper_mfc0_tracecontrol(t0); /* PDtrace support */
+//            gen_helper_mfc0_tracecontrol(arg); /* PDtrace support */
             rn = "TraceControl";
 //            break;
         case 2:
-//            gen_helper_mfc0_tracecontrol2(t0); /* PDtrace support */
+//            gen_helper_mfc0_tracecontrol2(arg); /* PDtrace support */
             rn = "TraceControl2";
 //            break;
         case 3:
-//            gen_helper_mfc0_usertracedata(t0); /* PDtrace support */
+//            gen_helper_mfc0_usertracedata(arg); /* PDtrace support */
             rn = "UserTraceData";
 //            break;
         case 4:
-//            gen_helper_mfc0_tracebpc(t0); /* PDtrace support */
+//            gen_helper_mfc0_tracebpc(arg); /* PDtrace support */
             rn = "TraceBPC";
 //            break;
         default:
@@ -3160,8 +3311,8 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
         switch (sel) {
         case 0:
             /* EJTAG support */
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_DEPC));
-            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_DEPC));
+            tcg_gen_ext32s_tl(arg, arg);
             rn = "DEPC";
             break;
         default:
@@ -3171,35 +3322,35 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 25:
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Performance0));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Performance0));
             rn = "Performance0";
             break;
         case 1:
-//            gen_helper_mfc0_performance1(t0);
+//            gen_helper_mfc0_performance1(arg);
             rn = "Performance1";
 //            break;
         case 2:
-//            gen_helper_mfc0_performance2(t0);
+//            gen_helper_mfc0_performance2(arg);
             rn = "Performance2";
 //            break;
         case 3:
-//            gen_helper_mfc0_performance3(t0);
+//            gen_helper_mfc0_performance3(arg);
             rn = "Performance3";
 //            break;
         case 4:
-//            gen_helper_mfc0_performance4(t0);
+//            gen_helper_mfc0_performance4(arg);
             rn = "Performance4";
 //            break;
         case 5:
-//            gen_helper_mfc0_performance5(t0);
+//            gen_helper_mfc0_performance5(arg);
             rn = "Performance5";
 //            break;
         case 6:
-//            gen_helper_mfc0_performance6(t0);
+//            gen_helper_mfc0_performance6(arg);
             rn = "Performance6";
 //            break;
         case 7:
-//            gen_helper_mfc0_performance7(t0);
+//            gen_helper_mfc0_performance7(arg);
             rn = "Performance7";
 //            break;
         default:
@@ -3207,13 +3358,13 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
         }
         break;
     case 26:
-        tcg_gen_movi_tl(t0, 0); /* unimplemented */
+        tcg_gen_movi_tl(arg, 0); /* unimplemented */
         rn = "ECC";
         break;
     case 27:
         switch (sel) {
         case 0 ... 3:
-            tcg_gen_movi_tl(t0, 0); /* unimplemented */
+            tcg_gen_movi_tl(arg, 0); /* unimplemented */
             rn = "CacheErr";
             break;
         default:
@@ -3226,14 +3377,14 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
         case 2:
         case 4:
         case 6:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_TagLo));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_TagLo));
             rn = "TagLo";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_DataLo));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_DataLo));
             rn = "DataLo";
             break;
         default:
@@ -3246,14 +3397,14 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
         case 2:
         case 4:
         case 6:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_TagHi));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_TagHi));
             rn = "TagHi";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_DataHi));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_DataHi));
             rn = "DataHi";
             break;
         default:
@@ -3263,8 +3414,8 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 30:
         switch (sel) {
         case 0:
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_ErrorEPC));
-            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_ErrorEPC));
+            tcg_gen_ext32s_tl(arg, arg);
             rn = "ErrorEPC";
             break;
         default:
@@ -3275,7 +3426,7 @@ static void gen_mfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
         switch (sel) {
         case 0:
             /* EJTAG support */
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_DESAVE));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_DESAVE));
             rn = "DESAVE";
             break;
         default:
@@ -3293,7 +3444,7 @@ die:
     generate_exception(ctx, EXCP_RI);
 }
 
-static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int sel)
+static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv arg, int reg, int sel)
 {
     const char *rn = "invalid";
 
@@ -3307,12 +3458,12 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 0:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_index(t0);
+            gen_helper_mtc0_index(arg);
             rn = "Index";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_mvpcontrol(t0);
+            gen_helper_mtc0_mvpcontrol(arg);
             rn = "MVPControl";
             break;
         case 2:
@@ -3337,37 +3488,37 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpecontrol(t0);
+            gen_helper_mtc0_vpecontrol(arg);
             rn = "VPEControl";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeconf0(t0);
+            gen_helper_mtc0_vpeconf0(arg);
             rn = "VPEConf0";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeconf1(t0);
+            gen_helper_mtc0_vpeconf1(arg);
             rn = "VPEConf1";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_yqmask(t0);
+            gen_helper_mtc0_yqmask(arg);
             rn = "YQMask";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            gen_mtc0_store64(t0, offsetof(CPUState, CP0_VPESchedule));
+            gen_mtc0_store64(arg, offsetof(CPUState, CP0_VPESchedule));
             rn = "VPESchedule";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            gen_mtc0_store64(t0, offsetof(CPUState, CP0_VPEScheFBack));
+            gen_mtc0_store64(arg, offsetof(CPUState, CP0_VPEScheFBack));
             rn = "VPEScheFBack";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeopt(t0);
+            gen_helper_mtc0_vpeopt(arg);
             rn = "VPEOpt";
             break;
         default:
@@ -3377,42 +3528,42 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 2:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entrylo0(t0);
+            gen_helper_mtc0_entrylo0(arg);
             rn = "EntryLo0";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcstatus(t0);
+            gen_helper_mtc0_tcstatus(arg);
             rn = "TCStatus";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcbind(t0);
+            gen_helper_mtc0_tcbind(arg);
             rn = "TCBind";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcrestart(t0);
+            gen_helper_mtc0_tcrestart(arg);
             rn = "TCRestart";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tchalt(t0);
+            gen_helper_mtc0_tchalt(arg);
             rn = "TCHalt";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tccontext(t0);
+            gen_helper_mtc0_tccontext(arg);
             rn = "TCContext";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcschedule(t0);
+            gen_helper_mtc0_tcschedule(arg);
             rn = "TCSchedule";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcschefback(t0);
+            gen_helper_mtc0_tcschefback(arg);
             rn = "TCScheFBack";
             break;
         default:
@@ -3422,7 +3573,7 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 3:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entrylo1(t0);
+            gen_helper_mtc0_entrylo1(arg);
             rn = "EntryLo1";
             break;
         default:
@@ -3432,11 +3583,11 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 4:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_context(t0);
+            gen_helper_mtc0_context(arg);
             rn = "Context";
             break;
         case 1:
-//            gen_helper_mtc0_contextconfig(t0); /* SmartMIPS ASE */
+//            gen_helper_mtc0_contextconfig(arg); /* SmartMIPS ASE */
             rn = "ContextConfig";
 //            break;
         default:
@@ -3446,12 +3597,12 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 5:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_pagemask(t0);
+            gen_helper_mtc0_pagemask(arg);
             rn = "PageMask";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_pagegrain(t0);
+            gen_helper_mtc0_pagegrain(arg);
             rn = "PageGrain";
             break;
         default:
@@ -3461,32 +3612,32 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 6:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_wired(t0);
+            gen_helper_mtc0_wired(arg);
             rn = "Wired";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf0(t0);
+            gen_helper_mtc0_srsconf0(arg);
             rn = "SRSConf0";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf1(t0);
+            gen_helper_mtc0_srsconf1(arg);
             rn = "SRSConf1";
             break;
         case 3:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf2(t0);
+            gen_helper_mtc0_srsconf2(arg);
             rn = "SRSConf2";
             break;
         case 4:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf3(t0);
+            gen_helper_mtc0_srsconf3(arg);
             rn = "SRSConf3";
             break;
         case 5:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf4(t0);
+            gen_helper_mtc0_srsconf4(arg);
             rn = "SRSConf4";
             break;
         default:
@@ -3497,7 +3648,7 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
         switch (sel) {
         case 0:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_hwrena(t0);
+            gen_helper_mtc0_hwrena(arg);
             rn = "HWREna";
             break;
         default:
@@ -3511,20 +3662,18 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 9:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_count(t0);
+            gen_helper_mtc0_count(arg);
             rn = "Count";
             break;
         /* 6,7 are implementation dependent */
         default:
             goto die;
         }
-        /* Stop translation as we may have switched the execution mode */
-        ctx->bstate = BS_STOP;
         break;
     case 10:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entryhi(t0);
+            gen_helper_mtc0_entryhi(arg);
             rn = "EntryHi";
             break;
         default:
@@ -3534,20 +3683,19 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 11:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_compare(t0);
+            gen_helper_mtc0_compare(arg);
             rn = "Compare";
             break;
         /* 6,7 are implementation dependent */
         default:
             goto die;
         }
-        /* Stop translation as we may have switched the execution mode */
-        ctx->bstate = BS_STOP;
         break;
     case 12:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_status(t0);
+            save_cpu_state(ctx, 1);
+            gen_helper_mtc0_status(arg);
             /* BS_STOP isn't good enough here, hflags may have changed. */
             gen_save_pc(ctx->pc + 4);
             ctx->bstate = BS_EXCP;
@@ -3555,21 +3703,21 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_intctl(t0);
+            gen_helper_mtc0_intctl(arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "IntCtl";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsctl(t0);
+            gen_helper_mtc0_srsctl(arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "SRSCtl";
             break;
         case 3:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mtc0_store32(t0, offsetof(CPUState, CP0_SRSMap));
+            gen_mtc0_store32(arg, offsetof(CPUState, CP0_SRSMap));
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "SRSMap";
@@ -3581,19 +3729,18 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 13:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_cause(t0);
+            save_cpu_state(ctx, 1);
+            gen_helper_mtc0_cause(arg);
             rn = "Cause";
             break;
         default:
             goto die;
         }
-        /* Stop translation as we may have switched the execution mode */
-        ctx->bstate = BS_STOP;
         break;
     case 14:
         switch (sel) {
         case 0:
-            gen_mtc0_store64(t0, offsetof(CPUState, CP0_EPC));
+            gen_mtc0_store64(arg, offsetof(CPUState, CP0_EPC));
             rn = "EPC";
             break;
         default:
@@ -3608,7 +3755,7 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_ebase(t0);
+            gen_helper_mtc0_ebase(arg);
             rn = "EBase";
             break;
         default:
@@ -3618,7 +3765,7 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 16:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_config0(t0);
+            gen_helper_mtc0_config0(arg);
             rn = "Config";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -3628,7 +3775,7 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
             rn = "Config1";
             break;
         case 2:
-            gen_helper_mtc0_config2(t0);
+            gen_helper_mtc0_config2(arg);
             rn = "Config2";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -3665,7 +3812,7 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 18:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mtc0_watchlo, t0, sel);
+            gen_helper_1i(mtc0_watchlo, arg, sel);
             rn = "WatchLo";
             break;
         default:
@@ -3675,7 +3822,7 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 19:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mtc0_watchhi, t0, sel);
+            gen_helper_1i(mtc0_watchhi, arg, sel);
             rn = "WatchHi";
             break;
         default:
@@ -3687,7 +3834,7 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
         case 0:
 #if defined(TARGET_MIPS64)
             check_insn(env, ctx, ISA_MIPS3);
-            gen_helper_mtc0_xcontext(t0);
+            gen_helper_mtc0_xcontext(arg);
             rn = "XContext";
             break;
 #endif
@@ -3699,7 +3846,7 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
        /* Officially reserved, but sel 0 is used for R1x000 framemask */
         switch (sel) {
         case 0:
-            gen_helper_mtc0_framemask(t0);
+            gen_helper_mtc0_framemask(arg);
             rn = "Framemask";
             break;
         default:
@@ -3713,20 +3860,20 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 23:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_debug(t0); /* EJTAG support */
+            gen_helper_mtc0_debug(arg); /* EJTAG support */
             /* BS_STOP isn't good enough here, hflags may have changed. */
             gen_save_pc(ctx->pc + 4);
             ctx->bstate = BS_EXCP;
             rn = "Debug";
             break;
         case 1:
-//            gen_helper_mtc0_tracecontrol(t0); /* PDtrace support */
+//            gen_helper_mtc0_tracecontrol(arg); /* PDtrace support */
             rn = "TraceControl";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
 //            break;
         case 2:
-//            gen_helper_mtc0_tracecontrol2(t0); /* PDtrace support */
+//            gen_helper_mtc0_tracecontrol2(arg); /* PDtrace support */
             rn = "TraceControl2";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -3734,13 +3881,13 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
         case 3:
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
-//            gen_helper_mtc0_usertracedata(t0); /* PDtrace support */
+//            gen_helper_mtc0_usertracedata(arg); /* PDtrace support */
             rn = "UserTraceData";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
 //            break;
         case 4:
-//            gen_helper_mtc0_tracebpc(t0); /* PDtrace support */
+//            gen_helper_mtc0_tracebpc(arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "TraceBPC";
@@ -3753,7 +3900,7 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
         switch (sel) {
         case 0:
             /* EJTAG support */
-            gen_mtc0_store64(t0, offsetof(CPUState, CP0_DEPC));
+            gen_mtc0_store64(arg, offsetof(CPUState, CP0_DEPC));
             rn = "DEPC";
             break;
         default:
@@ -3763,35 +3910,35 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 25:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_performance0(t0);
+            gen_helper_mtc0_performance0(arg);
             rn = "Performance0";
             break;
         case 1:
-//            gen_helper_mtc0_performance1(t0);
+//            gen_helper_mtc0_performance1(arg);
             rn = "Performance1";
 //            break;
         case 2:
-//            gen_helper_mtc0_performance2(t0);
+//            gen_helper_mtc0_performance2(arg);
             rn = "Performance2";
 //            break;
         case 3:
-//            gen_helper_mtc0_performance3(t0);
+//            gen_helper_mtc0_performance3(arg);
             rn = "Performance3";
 //            break;
         case 4:
-//            gen_helper_mtc0_performance4(t0);
+//            gen_helper_mtc0_performance4(arg);
             rn = "Performance4";
 //            break;
         case 5:
-//            gen_helper_mtc0_performance5(t0);
+//            gen_helper_mtc0_performance5(arg);
             rn = "Performance5";
 //            break;
         case 6:
-//            gen_helper_mtc0_performance6(t0);
+//            gen_helper_mtc0_performance6(arg);
             rn = "Performance6";
 //            break;
         case 7:
-//            gen_helper_mtc0_performance7(t0);
+//            gen_helper_mtc0_performance7(arg);
             rn = "Performance7";
 //            break;
         default:
@@ -3818,14 +3965,14 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
         case 2:
         case 4:
         case 6:
-            gen_helper_mtc0_taglo(t0);
+            gen_helper_mtc0_taglo(arg);
             rn = "TagLo";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_helper_mtc0_datalo(t0);
+            gen_helper_mtc0_datalo(arg);
             rn = "DataLo";
             break;
         default:
@@ -3838,14 +3985,14 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
         case 2:
         case 4:
         case 6:
-            gen_helper_mtc0_taghi(t0);
+            gen_helper_mtc0_taghi(arg);
             rn = "TagHi";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_helper_mtc0_datahi(t0);
+            gen_helper_mtc0_datahi(arg);
             rn = "DataHi";
             break;
         default:
@@ -3856,7 +4003,7 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
     case 30:
         switch (sel) {
         case 0:
-            gen_mtc0_store64(t0, offsetof(CPUState, CP0_ErrorEPC));
+            gen_mtc0_store64(arg, offsetof(CPUState, CP0_ErrorEPC));
             rn = "ErrorEPC";
             break;
         default:
@@ -3867,7 +4014,7 @@ static void gen_mtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int se
         switch (sel) {
         case 0:
             /* EJTAG support */
-            gen_mtc0_store32(t0, offsetof(CPUState, CP0_DESAVE));
+            gen_mtc0_store32(arg, offsetof(CPUState, CP0_DESAVE));
             rn = "DESAVE";
             break;
         default:
@@ -3893,7 +4040,7 @@ die:
 }
 
 #if defined(TARGET_MIPS64)
-static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int sel)
+static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv arg, int reg, int sel)
 {
     const char *rn = "invalid";
 
@@ -3904,22 +4051,22 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 0:
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Index));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Index));
             rn = "Index";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpcontrol(t0);
+            gen_helper_mfc0_mvpcontrol(arg);
             rn = "MVPControl";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpconf0(t0);
+            gen_helper_mfc0_mvpconf0(arg);
             rn = "MVPConf0";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpconf1(t0);
+            gen_helper_mfc0_mvpconf1(arg);
             rn = "MVPConf1";
             break;
         default:
@@ -3929,42 +4076,42 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 1:
         switch (sel) {
         case 0:
-            gen_helper_mfc0_random(t0);
+            gen_helper_mfc0_random(arg);
             rn = "Random";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_VPEControl));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_VPEControl));
             rn = "VPEControl";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_VPEConf0));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_VPEConf0));
             rn = "VPEConf0";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_VPEConf1));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_VPEConf1));
             rn = "VPEConf1";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_YQMask));
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_YQMask));
             rn = "YQMask";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_VPESchedule));
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_VPESchedule));
             rn = "VPESchedule";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_VPEScheFBack));
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_VPEScheFBack));
             rn = "VPEScheFBack";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_VPEOpt));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_VPEOpt));
             rn = "VPEOpt";
             break;
         default:
@@ -3974,42 +4121,42 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 2:
         switch (sel) {
         case 0:
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_EntryLo0));
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_EntryLo0));
             rn = "EntryLo0";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcstatus(t0);
+            gen_helper_mfc0_tcstatus(arg);
             rn = "TCStatus";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcbind(t0);
+            gen_helper_mfc0_tcbind(arg);
             rn = "TCBind";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tcrestart(t0);
+            gen_helper_dmfc0_tcrestart(arg);
             rn = "TCRestart";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tchalt(t0);
+            gen_helper_dmfc0_tchalt(arg);
             rn = "TCHalt";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tccontext(t0);
+            gen_helper_dmfc0_tccontext(arg);
             rn = "TCContext";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tcschedule(t0);
+            gen_helper_dmfc0_tcschedule(arg);
             rn = "TCSchedule";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tcschefback(t0);
+            gen_helper_dmfc0_tcschefback(arg);
             rn = "TCScheFBack";
             break;
         default:
@@ -4019,7 +4166,7 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 3:
         switch (sel) {
         case 0:
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_EntryLo1));
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_EntryLo1));
             rn = "EntryLo1";
             break;
         default:
@@ -4029,11 +4176,11 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 4:
         switch (sel) {
         case 0:
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_Context));
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_Context));
             rn = "Context";
             break;
         case 1:
-//            gen_helper_dmfc0_contextconfig(t0); /* SmartMIPS ASE */
+//            gen_helper_dmfc0_contextconfig(arg); /* SmartMIPS ASE */
             rn = "ContextConfig";
 //            break;
         default:
@@ -4043,12 +4190,12 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 5:
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_PageMask));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_PageMask));
             rn = "PageMask";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_PageGrain));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_PageGrain));
             rn = "PageGrain";
             break;
         default:
@@ -4058,32 +4205,32 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 6:
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Wired));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Wired));
             rn = "Wired";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_SRSConf0));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_SRSConf0));
             rn = "SRSConf0";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_SRSConf1));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_SRSConf1));
             rn = "SRSConf1";
             break;
         case 3:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_SRSConf2));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_SRSConf2));
             rn = "SRSConf2";
             break;
         case 4:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_SRSConf3));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_SRSConf3));
             rn = "SRSConf3";
             break;
         case 5:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_SRSConf4));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_SRSConf4));
             rn = "SRSConf4";
             break;
         default:
@@ -4094,7 +4241,7 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
         switch (sel) {
         case 0:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_HWREna));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_HWREna));
             rn = "HWREna";
             break;
         default:
@@ -4104,7 +4251,7 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 8:
         switch (sel) {
         case 0:
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_BadVAddr));
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_BadVAddr));
             rn = "BadVAddr";
             break;
         default:
@@ -4117,7 +4264,7 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
             /* Mark as an IO operation because we read the time.  */
             if (use_icount)
                 gen_io_start();
-            gen_helper_mfc0_count(t0);
+            gen_helper_mfc0_count(arg);
             if (use_icount) {
                 gen_io_end();
                 ctx->bstate = BS_STOP;
@@ -4132,7 +4279,7 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 10:
         switch (sel) {
         case 0:
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_EntryHi));
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_EntryHi));
             rn = "EntryHi";
             break;
         default:
@@ -4142,7 +4289,7 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 11:
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Compare));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Compare));
             rn = "Compare";
             break;
         /* 6,7 are implementation dependent */
@@ -4153,22 +4300,22 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 12:
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Status));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Status));
             rn = "Status";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_IntCtl));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_IntCtl));
             rn = "IntCtl";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_SRSCtl));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_SRSCtl));
             rn = "SRSCtl";
             break;
         case 3:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_SRSMap));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_SRSMap));
             rn = "SRSMap";
             break;
         default:
@@ -4178,7 +4325,7 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 13:
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Cause));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Cause));
             rn = "Cause";
             break;
         default:
@@ -4188,7 +4335,7 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 14:
         switch (sel) {
         case 0:
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_EPC));
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_EPC));
             rn = "EPC";
             break;
         default:
@@ -4198,12 +4345,12 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 15:
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_PRid));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_PRid));
             rn = "PRid";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_EBase));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_EBase));
             rn = "EBase";
             break;
         default:
@@ -4213,28 +4360,28 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 16:
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Config0));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Config0));
             rn = "Config";
             break;
         case 1:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Config1));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Config1));
             rn = "Config1";
             break;
         case 2:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Config2));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Config2));
             rn = "Config2";
             break;
         case 3:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Config3));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Config3));
             rn = "Config3";
             break;
        /* 6,7 are implementation dependent */
         case 6:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Config6));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Config6));
             rn = "Config6";
             break;
         case 7:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Config7));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Config7));
             rn = "Config7";
             break;
         default:
@@ -4244,7 +4391,7 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 17:
         switch (sel) {
         case 0:
-            gen_helper_dmfc0_lladdr(t0);
+            gen_helper_dmfc0_lladdr(arg);
             rn = "LLAddr";
             break;
         default:
@@ -4254,7 +4401,7 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 18:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(dmfc0_watchlo, t0, sel);
+            gen_helper_1i(dmfc0_watchlo, arg, sel);
             rn = "WatchLo";
             break;
         default:
@@ -4264,7 +4411,7 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 19:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mfc0_watchhi, t0, sel);
+            gen_helper_1i(mfc0_watchhi, arg, sel);
             rn = "WatchHi";
             break;
         default:
@@ -4275,7 +4422,7 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
         switch (sel) {
         case 0:
             check_insn(env, ctx, ISA_MIPS3);
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_XContext));
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_XContext));
             rn = "XContext";
             break;
         default:
@@ -4286,7 +4433,7 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
        /* Officially reserved, but sel 0 is used for R1x000 framemask */
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Framemask));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Framemask));
             rn = "Framemask";
             break;
         default:
@@ -4294,29 +4441,29 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
         }
         break;
     case 22:
-        tcg_gen_movi_tl(t0, 0); /* unimplemented */
+        tcg_gen_movi_tl(arg, 0); /* unimplemented */
         rn = "'Diagnostic"; /* implementation dependent */
         break;
     case 23:
         switch (sel) {
         case 0:
-            gen_helper_mfc0_debug(t0); /* EJTAG support */
+            gen_helper_mfc0_debug(arg); /* EJTAG support */
             rn = "Debug";
             break;
         case 1:
-//            gen_helper_dmfc0_tracecontrol(t0); /* PDtrace support */
+//            gen_helper_dmfc0_tracecontrol(arg); /* PDtrace support */
             rn = "TraceControl";
 //            break;
         case 2:
-//            gen_helper_dmfc0_tracecontrol2(t0); /* PDtrace support */
+//            gen_helper_dmfc0_tracecontrol2(arg); /* PDtrace support */
             rn = "TraceControl2";
 //            break;
         case 3:
-//            gen_helper_dmfc0_usertracedata(t0); /* PDtrace support */
+//            gen_helper_dmfc0_usertracedata(arg); /* PDtrace support */
             rn = "UserTraceData";
 //            break;
         case 4:
-//            gen_helper_dmfc0_tracebpc(t0); /* PDtrace support */
+//            gen_helper_dmfc0_tracebpc(arg); /* PDtrace support */
             rn = "TraceBPC";
 //            break;
         default:
@@ -4327,7 +4474,7 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
         switch (sel) {
         case 0:
             /* EJTAG support */
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_DEPC));
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_DEPC));
             rn = "DEPC";
             break;
         default:
@@ -4337,35 +4484,35 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 25:
         switch (sel) {
         case 0:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_Performance0));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_Performance0));
             rn = "Performance0";
             break;
         case 1:
-//            gen_helper_dmfc0_performance1(t0);
+//            gen_helper_dmfc0_performance1(arg);
             rn = "Performance1";
 //            break;
         case 2:
-//            gen_helper_dmfc0_performance2(t0);
+//            gen_helper_dmfc0_performance2(arg);
             rn = "Performance2";
 //            break;
         case 3:
-//            gen_helper_dmfc0_performance3(t0);
+//            gen_helper_dmfc0_performance3(arg);
             rn = "Performance3";
 //            break;
         case 4:
-//            gen_helper_dmfc0_performance4(t0);
+//            gen_helper_dmfc0_performance4(arg);
             rn = "Performance4";
 //            break;
         case 5:
-//            gen_helper_dmfc0_performance5(t0);
+//            gen_helper_dmfc0_performance5(arg);
             rn = "Performance5";
 //            break;
         case 6:
-//            gen_helper_dmfc0_performance6(t0);
+//            gen_helper_dmfc0_performance6(arg);
             rn = "Performance6";
 //            break;
         case 7:
-//            gen_helper_dmfc0_performance7(t0);
+//            gen_helper_dmfc0_performance7(arg);
             rn = "Performance7";
 //            break;
         default:
@@ -4373,14 +4520,14 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
         }
         break;
     case 26:
-        tcg_gen_movi_tl(t0, 0); /* unimplemented */
+        tcg_gen_movi_tl(arg, 0); /* unimplemented */
         rn = "ECC";
         break;
     case 27:
         switch (sel) {
         /* ignored */
         case 0 ... 3:
-            tcg_gen_movi_tl(t0, 0); /* unimplemented */
+            tcg_gen_movi_tl(arg, 0); /* unimplemented */
             rn = "CacheErr";
             break;
         default:
@@ -4393,14 +4540,14 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
         case 2:
         case 4:
         case 6:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_TagLo));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_TagLo));
             rn = "TagLo";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_DataLo));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_DataLo));
             rn = "DataLo";
             break;
         default:
@@ -4413,14 +4560,14 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
         case 2:
         case 4:
         case 6:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_TagHi));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_TagHi));
             rn = "TagHi";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_DataHi));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_DataHi));
             rn = "DataHi";
             break;
         default:
@@ -4430,7 +4577,7 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 30:
         switch (sel) {
         case 0:
-            tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, CP0_ErrorEPC));
+            tcg_gen_ld_tl(arg, cpu_env, offsetof(CPUState, CP0_ErrorEPC));
             rn = "ErrorEPC";
             break;
         default:
@@ -4441,7 +4588,7 @@ static void gen_dmfc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
         switch (sel) {
         case 0:
             /* EJTAG support */
-            gen_mfc0_load32(t0, offsetof(CPUState, CP0_DESAVE));
+            gen_mfc0_load32(arg, offsetof(CPUState, CP0_DESAVE));
             rn = "DESAVE";
             break;
         default:
@@ -4459,7 +4606,7 @@ die:
     generate_exception(ctx, EXCP_RI);
 }
 
-static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int sel)
+static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv arg, int reg, int sel)
 {
     const char *rn = "invalid";
 
@@ -4473,12 +4620,12 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 0:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_index(t0);
+            gen_helper_mtc0_index(arg);
             rn = "Index";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_mvpcontrol(t0);
+            gen_helper_mtc0_mvpcontrol(arg);
             rn = "MVPControl";
             break;
         case 2:
@@ -4503,37 +4650,37 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpecontrol(t0);
+            gen_helper_mtc0_vpecontrol(arg);
             rn = "VPEControl";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeconf0(t0);
+            gen_helper_mtc0_vpeconf0(arg);
             rn = "VPEConf0";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeconf1(t0);
+            gen_helper_mtc0_vpeconf1(arg);
             rn = "VPEConf1";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_yqmask(t0);
+            gen_helper_mtc0_yqmask(arg);
             rn = "YQMask";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, CP0_VPESchedule));
+            tcg_gen_st_tl(arg, cpu_env, offsetof(CPUState, CP0_VPESchedule));
             rn = "VPESchedule";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, CP0_VPEScheFBack));
+            tcg_gen_st_tl(arg, cpu_env, offsetof(CPUState, CP0_VPEScheFBack));
             rn = "VPEScheFBack";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeopt(t0);
+            gen_helper_mtc0_vpeopt(arg);
             rn = "VPEOpt";
             break;
         default:
@@ -4543,42 +4690,42 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 2:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entrylo0(t0);
+            gen_helper_mtc0_entrylo0(arg);
             rn = "EntryLo0";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcstatus(t0);
+            gen_helper_mtc0_tcstatus(arg);
             rn = "TCStatus";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcbind(t0);
+            gen_helper_mtc0_tcbind(arg);
             rn = "TCBind";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcrestart(t0);
+            gen_helper_mtc0_tcrestart(arg);
             rn = "TCRestart";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tchalt(t0);
+            gen_helper_mtc0_tchalt(arg);
             rn = "TCHalt";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tccontext(t0);
+            gen_helper_mtc0_tccontext(arg);
             rn = "TCContext";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcschedule(t0);
+            gen_helper_mtc0_tcschedule(arg);
             rn = "TCSchedule";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcschefback(t0);
+            gen_helper_mtc0_tcschefback(arg);
             rn = "TCScheFBack";
             break;
         default:
@@ -4588,7 +4735,7 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 3:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entrylo1(t0);
+            gen_helper_mtc0_entrylo1(arg);
             rn = "EntryLo1";
             break;
         default:
@@ -4598,11 +4745,11 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 4:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_context(t0);
+            gen_helper_mtc0_context(arg);
             rn = "Context";
             break;
         case 1:
-//           gen_helper_mtc0_contextconfig(t0); /* SmartMIPS ASE */
+//           gen_helper_mtc0_contextconfig(arg); /* SmartMIPS ASE */
             rn = "ContextConfig";
 //           break;
         default:
@@ -4612,12 +4759,12 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 5:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_pagemask(t0);
+            gen_helper_mtc0_pagemask(arg);
             rn = "PageMask";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_pagegrain(t0);
+            gen_helper_mtc0_pagegrain(arg);
             rn = "PageGrain";
             break;
         default:
@@ -4627,32 +4774,32 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 6:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_wired(t0);
+            gen_helper_mtc0_wired(arg);
             rn = "Wired";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf0(t0);
+            gen_helper_mtc0_srsconf0(arg);
             rn = "SRSConf0";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf1(t0);
+            gen_helper_mtc0_srsconf1(arg);
             rn = "SRSConf1";
             break;
         case 3:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf2(t0);
+            gen_helper_mtc0_srsconf2(arg);
             rn = "SRSConf2";
             break;
         case 4:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf3(t0);
+            gen_helper_mtc0_srsconf3(arg);
             rn = "SRSConf3";
             break;
         case 5:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf4(t0);
+            gen_helper_mtc0_srsconf4(arg);
             rn = "SRSConf4";
             break;
         default:
@@ -4663,7 +4810,7 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
         switch (sel) {
         case 0:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_hwrena(t0);
+            gen_helper_mtc0_hwrena(arg);
             rn = "HWREna";
             break;
         default:
@@ -4677,7 +4824,7 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 9:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_count(t0);
+            gen_helper_mtc0_count(arg);
             rn = "Count";
             break;
         /* 6,7 are implementation dependent */
@@ -4690,7 +4837,7 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 10:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entryhi(t0);
+            gen_helper_mtc0_entryhi(arg);
             rn = "EntryHi";
             break;
         default:
@@ -4700,7 +4847,7 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 11:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_compare(t0);
+            gen_helper_mtc0_compare(arg);
             rn = "Compare";
             break;
         /* 6,7 are implementation dependent */
@@ -4713,7 +4860,8 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 12:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_status(t0);
+            save_cpu_state(ctx, 1);
+            gen_helper_mtc0_status(arg);
             /* BS_STOP isn't good enough here, hflags may have changed. */
             gen_save_pc(ctx->pc + 4);
             ctx->bstate = BS_EXCP;
@@ -4721,21 +4869,21 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_intctl(t0);
+            gen_helper_mtc0_intctl(arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "IntCtl";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsctl(t0);
+            gen_helper_mtc0_srsctl(arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "SRSCtl";
             break;
         case 3:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_mtc0_store32(t0, offsetof(CPUState, CP0_SRSMap));
+            gen_mtc0_store32(arg, offsetof(CPUState, CP0_SRSMap));
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "SRSMap";
@@ -4747,19 +4895,18 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 13:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_cause(t0);
+            save_cpu_state(ctx, 1);
+            gen_helper_mtc0_cause(arg);
             rn = "Cause";
             break;
         default:
             goto die;
         }
-        /* Stop translation as we may have switched the execution mode */
-        ctx->bstate = BS_STOP;
         break;
     case 14:
         switch (sel) {
         case 0:
-            tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, CP0_EPC));
+            tcg_gen_st_tl(arg, cpu_env, offsetof(CPUState, CP0_EPC));
             rn = "EPC";
             break;
         default:
@@ -4774,7 +4921,7 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_ebase(t0);
+            gen_helper_mtc0_ebase(arg);
             rn = "EBase";
             break;
         default:
@@ -4784,17 +4931,17 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 16:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_config0(t0);
+            gen_helper_mtc0_config0(arg);
             rn = "Config";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             break;
         case 1:
-            /* ignored */
+            /* ignored, read only */
             rn = "Config1";
             break;
         case 2:
-            gen_helper_mtc0_config2(t0);
+            gen_helper_mtc0_config2(arg);
             rn = "Config2";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -4822,7 +4969,7 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 18:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mtc0_watchlo, t0, sel);
+            gen_helper_1i(mtc0_watchlo, arg, sel);
             rn = "WatchLo";
             break;
         default:
@@ -4832,7 +4979,7 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 19:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mtc0_watchhi, t0, sel);
+            gen_helper_1i(mtc0_watchhi, arg, sel);
             rn = "WatchHi";
             break;
         default:
@@ -4843,7 +4990,7 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
         switch (sel) {
         case 0:
             check_insn(env, ctx, ISA_MIPS3);
-            gen_helper_mtc0_xcontext(t0);
+            gen_helper_mtc0_xcontext(arg);
             rn = "XContext";
             break;
         default:
@@ -4854,7 +5001,7 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
        /* Officially reserved, but sel 0 is used for R1x000 framemask */
         switch (sel) {
         case 0:
-            gen_helper_mtc0_framemask(t0);
+            gen_helper_mtc0_framemask(arg);
             rn = "Framemask";
             break;
         default:
@@ -4868,32 +5015,32 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 23:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_debug(t0); /* EJTAG support */
+            gen_helper_mtc0_debug(arg); /* EJTAG support */
             /* BS_STOP isn't good enough here, hflags may have changed. */
             gen_save_pc(ctx->pc + 4);
             ctx->bstate = BS_EXCP;
             rn = "Debug";
             break;
         case 1:
-//            gen_helper_mtc0_tracecontrol(t0); /* PDtrace support */
+//            gen_helper_mtc0_tracecontrol(arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "TraceControl";
 //            break;
         case 2:
-//            gen_helper_mtc0_tracecontrol2(t0); /* PDtrace support */
+//            gen_helper_mtc0_tracecontrol2(arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "TraceControl2";
 //            break;
         case 3:
-//            gen_helper_mtc0_usertracedata(t0); /* PDtrace support */
+//            gen_helper_mtc0_usertracedata(arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "UserTraceData";
 //            break;
         case 4:
-//            gen_helper_mtc0_tracebpc(t0); /* PDtrace support */
+//            gen_helper_mtc0_tracebpc(arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "TraceBPC";
@@ -4906,7 +5053,7 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
         switch (sel) {
         case 0:
             /* EJTAG support */
-            tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, CP0_DEPC));
+            tcg_gen_st_tl(arg, cpu_env, offsetof(CPUState, CP0_DEPC));
             rn = "DEPC";
             break;
         default:
@@ -4916,35 +5063,35 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 25:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_performance0(t0);
+            gen_helper_mtc0_performance0(arg);
             rn = "Performance0";
             break;
         case 1:
-//            gen_helper_mtc0_performance1(t0);
+//            gen_helper_mtc0_performance1(arg);
             rn = "Performance1";
 //            break;
         case 2:
-//            gen_helper_mtc0_performance2(t0);
+//            gen_helper_mtc0_performance2(arg);
             rn = "Performance2";
 //            break;
         case 3:
-//            gen_helper_mtc0_performance3(t0);
+//            gen_helper_mtc0_performance3(arg);
             rn = "Performance3";
 //            break;
         case 4:
-//            gen_helper_mtc0_performance4(t0);
+//            gen_helper_mtc0_performance4(arg);
             rn = "Performance4";
 //            break;
         case 5:
-//            gen_helper_mtc0_performance5(t0);
+//            gen_helper_mtc0_performance5(arg);
             rn = "Performance5";
 //            break;
         case 6:
-//            gen_helper_mtc0_performance6(t0);
+//            gen_helper_mtc0_performance6(arg);
             rn = "Performance6";
 //            break;
         case 7:
-//            gen_helper_mtc0_performance7(t0);
+//            gen_helper_mtc0_performance7(arg);
             rn = "Performance7";
 //            break;
         default:
@@ -4971,14 +5118,14 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
         case 2:
         case 4:
         case 6:
-            gen_helper_mtc0_taglo(t0);
+            gen_helper_mtc0_taglo(arg);
             rn = "TagLo";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_helper_mtc0_datalo(t0);
+            gen_helper_mtc0_datalo(arg);
             rn = "DataLo";
             break;
         default:
@@ -4991,14 +5138,14 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
         case 2:
         case 4:
         case 6:
-            gen_helper_mtc0_taghi(t0);
+            gen_helper_mtc0_taghi(arg);
             rn = "TagHi";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_helper_mtc0_datahi(t0);
+            gen_helper_mtc0_datahi(arg);
             rn = "DataHi";
             break;
         default:
@@ -5009,7 +5156,7 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
     case 30:
         switch (sel) {
         case 0:
-            tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, CP0_ErrorEPC));
+            tcg_gen_st_tl(arg, cpu_env, offsetof(CPUState, CP0_ErrorEPC));
             rn = "ErrorEPC";
             break;
         default:
@@ -5020,7 +5167,7 @@ static void gen_dmtc0 (CPUState *env, DisasContext *ctx, TCGv t0, int reg, int s
         switch (sel) {
         case 0:
             /* EJTAG support */
-            gen_mtc0_store32(t0, offsetof(CPUState, CP0_DESAVE));
+            gen_mtc0_store32(arg, offsetof(CPUState, CP0_DESAVE));
             rn = "DESAVE";
             break;
         default:
@@ -5384,21 +5531,14 @@ static void gen_cp0 (CPUState *env, DisasContext *ctx, uint32_t opc, int rt, int
             /* Treat as NOP. */
             return;
         }
-        {
-            TCGv t0 = tcg_temp_local_new();
-
-            gen_mfc0(env, ctx, t0, rd, ctx->opcode & 0x7);
-            gen_store_gpr(t0, rt);
-            tcg_temp_free(t0);
-        }
+        gen_mfc0(env, ctx, cpu_gpr[rt], rd, ctx->opcode & 0x7);
         opn = "mfc0";
         break;
     case OPC_MTC0:
         {
-            TCGv t0 = tcg_temp_local_new();
+            TCGv t0 = tcg_temp_new();
 
             gen_load_gpr(t0, rt);
-            save_cpu_state(ctx, 1);
             gen_mtc0(env, ctx, t0, rd, ctx->opcode & 0x7);
             tcg_temp_free(t0);
         }
@@ -5411,22 +5551,15 @@ static void gen_cp0 (CPUState *env, DisasContext *ctx, uint32_t opc, int rt, int
             /* Treat as NOP. */
             return;
         }
-        {
-            TCGv t0 = tcg_temp_local_new();
-
-            gen_dmfc0(env, ctx, t0, rd, ctx->opcode & 0x7);
-            gen_store_gpr(t0, rt);
-            tcg_temp_free(t0);
-        }
+        gen_dmfc0(env, ctx, cpu_gpr[rt], rd, ctx->opcode & 0x7);
         opn = "dmfc0";
         break;
     case OPC_DMTC0:
         check_insn(env, ctx, ISA_MIPS3);
         {
-            TCGv t0 = tcg_temp_local_new();
+            TCGv t0 = tcg_temp_new();
 
             gen_load_gpr(t0, rt);
-            save_cpu_state(ctx, 1);
             gen_dmtc0(env, ctx, t0, rd, ctx->opcode & 0x7);
             tcg_temp_free(t0);
         }
@@ -5451,32 +5584,31 @@ static void gen_cp0 (CPUState *env, DisasContext *ctx, uint32_t opc, int rt, int
         break;
     case OPC_TLBWI:
         opn = "tlbwi";
-        if (!env->tlb->do_tlbwi)
+        if (!env->tlb->helper_tlbwi)
             goto die;
         gen_helper_tlbwi();
         break;
     case OPC_TLBWR:
         opn = "tlbwr";
-        if (!env->tlb->do_tlbwr)
+        if (!env->tlb->helper_tlbwr)
             goto die;
         gen_helper_tlbwr();
         break;
     case OPC_TLBP:
         opn = "tlbp";
-        if (!env->tlb->do_tlbp)
+        if (!env->tlb->helper_tlbp)
             goto die;
         gen_helper_tlbp();
         break;
     case OPC_TLBR:
         opn = "tlbr";
-        if (!env->tlb->do_tlbr)
+        if (!env->tlb->helper_tlbr)
             goto die;
         gen_helper_tlbr();
         break;
     case OPC_ERET:
         opn = "eret";
         check_insn(env, ctx, ISA_MIPS2);
-        save_cpu_state(ctx, 1);
         gen_helper_eret();
         ctx->bstate = BS_EXCP;
         break;
@@ -5487,7 +5619,6 @@ static void gen_cp0 (CPUState *env, DisasContext *ctx, uint32_t opc, int rt, int
             MIPS_INVAL(opn);
             generate_exception(ctx, EXCP_RI);
         } else {
-            save_cpu_state(ctx, 1);
             gen_helper_deret();
             ctx->bstate = BS_EXCP;
         }
@@ -5527,132 +5658,88 @@ static void gen_compute_branch1 (CPUState *env, DisasContext *ctx, uint32_t op,
 
     switch (op) {
     case OPC_BC1F:
-        {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0x1 << cc);
-            tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
-            tcg_gen_movi_i32(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_i32(bcond, 1);
-            gen_set_label(l2);
-        }
+        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+        tcg_gen_not_i32(t0, t0);
+        tcg_gen_andi_i32(t0, t0, 1);
+        tcg_gen_extu_i32_tl(bcond, t0);
         opn = "bc1f";
         goto not_likely;
     case OPC_BC1FL:
-        {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0x1 << cc);
-            tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
-            tcg_gen_movi_i32(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_i32(bcond, 1);
-            gen_set_label(l2);
-        }
+        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+        tcg_gen_not_i32(t0, t0);
+        tcg_gen_andi_i32(t0, t0, 1);
+        tcg_gen_extu_i32_tl(bcond, t0);
         opn = "bc1fl";
         goto likely;
     case OPC_BC1T:
-        {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0x1 << cc);
-            tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, l1);
-            tcg_gen_movi_i32(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_i32(bcond, 1);
-            gen_set_label(l2);
-        }
+        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+        tcg_gen_andi_i32(t0, t0, 1);
+        tcg_gen_extu_i32_tl(bcond, t0);
         opn = "bc1t";
         goto not_likely;
     case OPC_BC1TL:
-        {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0x1 << cc);
-            tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, l1);
-            tcg_gen_movi_i32(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_i32(bcond, 1);
-            gen_set_label(l2);
-        }
+        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+        tcg_gen_andi_i32(t0, t0, 1);
+        tcg_gen_extu_i32_tl(bcond, t0);
         opn = "bc1tl";
     likely:
         ctx->hflags |= MIPS_HFLAG_BL;
         break;
     case OPC_BC1FANY2:
         {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0x3 << cc);
-            tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
-            tcg_gen_movi_i32(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_i32(bcond, 1);
-            gen_set_label(l2);
+            TCGv_i32 t1 = tcg_temp_new_i32();
+            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+1));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_temp_free_i32(t1);
+            tcg_gen_not_i32(t0, t0);
+            tcg_gen_andi_i32(t0, t0, 1);
+            tcg_gen_extu_i32_tl(bcond, t0);
         }
         opn = "bc1any2f";
         goto not_likely;
     case OPC_BC1TANY2:
         {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0x3 << cc);
-            tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, l1);
-            tcg_gen_movi_i32(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_i32(bcond, 1);
-            gen_set_label(l2);
+            TCGv_i32 t1 = tcg_temp_new_i32();
+            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+1));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_temp_free_i32(t1);
+            tcg_gen_andi_i32(t0, t0, 1);
+            tcg_gen_extu_i32_tl(bcond, t0);
         }
         opn = "bc1any2t";
         goto not_likely;
     case OPC_BC1FANY4:
         {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0xf << cc);
-            tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
-            tcg_gen_movi_i32(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_i32(bcond, 1);
-            gen_set_label(l2);
+            TCGv_i32 t1 = tcg_temp_new_i32();
+            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+1));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+2));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+3));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_temp_free_i32(t1);
+            tcg_gen_not_i32(t0, t0);
+            tcg_gen_andi_i32(t0, t0, 1);
+            tcg_gen_extu_i32_tl(bcond, t0);
         }
         opn = "bc1any4f";
         goto not_likely;
     case OPC_BC1TANY4:
         {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0xf << cc);
-            tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, l1);
-            tcg_gen_movi_i32(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_i32(bcond, 1);
-            gen_set_label(l2);
+            TCGv_i32 t1 = tcg_temp_new_i32();
+            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+1));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+2));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+3));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_temp_free_i32(t1);
+            tcg_gen_andi_i32(t0, t0, 1);
+            tcg_gen_extu_i32_tl(bcond, t0);
         }
         opn = "bc1any4t";
     not_likely:
@@ -5678,7 +5765,7 @@ static void gen_compute_branch1 (CPUState *env, DisasContext *ctx, uint32_t op,
 static void gen_cp1 (DisasContext *ctx, uint32_t opc, int rt, int fs)
 {
     const char *opn = "cp1 move";
-    TCGv t0 = tcg_temp_local_new();
+    TCGv t0 = tcg_temp_new();
 
     switch (opc) {
     case OPC_MFC1:
@@ -5713,28 +5800,18 @@ static void gen_cp1 (DisasContext *ctx, uint32_t opc, int rt, int fs)
         gen_helper_1i(ctc1, t0, fs);
         opn = "ctc1";
         break;
+#if defined(TARGET_MIPS64)
     case OPC_DMFC1:
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            gen_load_fpr64(ctx, fp0, fs);
-            tcg_gen_trunc_i64_tl(t0, fp0);
-            tcg_temp_free_i64(fp0);
-        }
+        gen_load_fpr64(ctx, t0, fs);
         gen_store_gpr(t0, rt);
         opn = "dmfc1";
         break;
     case OPC_DMTC1:
         gen_load_gpr(t0, rt);
-        {
-            TCGv_i64 fp0 = tcg_temp_new_i64();
-
-            tcg_gen_extu_tl_i64(fp0, t0);
-            gen_store_fpr64(ctx, fp0, fs);
-            tcg_temp_free_i64(fp0);
-        }
+        gen_store_fpr64(ctx, t0, fs);
         opn = "dmtc1";
         break;
+#endif
     case OPC_MFHC1:
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
@@ -5770,125 +5847,98 @@ static void gen_cp1 (DisasContext *ctx, uint32_t opc, int rt, int fs)
 
 static void gen_movci (DisasContext *ctx, int rd, int rs, int cc, int tf)
 {
-    int l1 = gen_new_label();
-    uint32_t ccbit;
+    int l1;
     TCGCond cond;
-    TCGv t0 = tcg_temp_local_new();
-    TCGv_i32 r_tmp = tcg_temp_new_i32();
+    TCGv_i32 t0;
+
+    if (rd == 0) {
+        /* Treat as NOP. */
+        return;
+    }
 
-    if (cc)
-        ccbit = 1 << (24 + cc);
-    else
-        ccbit = 1 << 23;
     if (tf)
         cond = TCG_COND_EQ;
     else
         cond = TCG_COND_NE;
 
-    gen_load_gpr(t0, rd);
-    tcg_gen_andi_i32(r_tmp, fpu_fcr31, ccbit);
-    tcg_gen_brcondi_i32(cond, r_tmp, 0, l1);
-    tcg_temp_free_i32(r_tmp);
-    gen_load_gpr(t0, rs);
+    l1 = gen_new_label();
+    t0 = tcg_temp_new_i32();
+    tcg_gen_andi_i32(t0, fpu_fcr31, get_fp_bit(cc));
+    tcg_gen_brcondi_i32(cond, t0, 0, l1);
+    tcg_temp_free_i32(t0);
+    if (rs == 0) {
+        tcg_gen_movi_tl(cpu_gpr[rd], 0);
+    } else {
+        tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
+    }
     gen_set_label(l1);
-    gen_store_gpr(t0, rd);
-    tcg_temp_free(t0);
 }
 
 static inline void gen_movcf_s (int fs, int fd, int cc, int tf)
 {
-    uint32_t ccbit;
     int cond;
-    TCGv_i32 r_tmp1 = tcg_temp_new_i32();
-    TCGv_i32 fp0 = tcg_temp_local_new_i32();
+    TCGv_i32 t0 = tcg_temp_new_i32();
     int l1 = gen_new_label();
 
-    if (cc)
-        ccbit = 1 << (24 + cc);
-    else
-        ccbit = 1 << 23;
-
     if (tf)
         cond = TCG_COND_EQ;
     else
         cond = TCG_COND_NE;
 
-    gen_load_fpr32(fp0, fd);
-    tcg_gen_andi_i32(r_tmp1, fpu_fcr31, ccbit);
-    tcg_gen_brcondi_i32(cond, r_tmp1, 0, l1);
-    tcg_temp_free_i32(r_tmp1);
-    gen_load_fpr32(fp0, fs);
+    tcg_gen_andi_i32(t0, fpu_fcr31, get_fp_bit(cc));
+    tcg_gen_brcondi_i32(cond, t0, 0, l1);
+    gen_load_fpr32(t0, fs);
+    gen_store_fpr32(t0, fd);
     gen_set_label(l1);
-    gen_store_fpr32(fp0, fd);
-    tcg_temp_free_i32(fp0);
+    tcg_temp_free_i32(t0);
 }
 
 static inline void gen_movcf_d (DisasContext *ctx, int fs, int fd, int cc, int tf)
 {
-    uint32_t ccbit;
     int cond;
-    TCGv_i32 r_tmp1 = tcg_temp_new_i32();
-    TCGv_i64 fp0 = tcg_temp_local_new_i64();
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i64 fp0;
     int l1 = gen_new_label();
 
-    if (cc)
-        ccbit = 1 << (24 + cc);
-    else
-        ccbit = 1 << 23;
-
     if (tf)
         cond = TCG_COND_EQ;
     else
         cond = TCG_COND_NE;
 
-    gen_load_fpr64(ctx, fp0, fd);
-    tcg_gen_andi_i32(r_tmp1, fpu_fcr31, ccbit);
-    tcg_gen_brcondi_i32(cond, r_tmp1, 0, l1);
-    tcg_temp_free_i32(r_tmp1);
+    tcg_gen_andi_i32(t0, fpu_fcr31, get_fp_bit(cc));
+    tcg_gen_brcondi_i32(cond, t0, 0, l1);
+    tcg_temp_free_i32(t0);
+    fp0 = tcg_temp_new_i64();
     gen_load_fpr64(ctx, fp0, fs);
-    gen_set_label(l1);
     gen_store_fpr64(ctx, fp0, fd);
     tcg_temp_free_i64(fp0);
+    gen_set_label(l1);
 }
 
 static inline void gen_movcf_ps (int fs, int fd, int cc, int tf)
 {
-    uint32_t ccbit1, ccbit2;
     int cond;
-    TCGv_i32 r_tmp1 = tcg_temp_new_i32();
-    TCGv_i32 fp0 = tcg_temp_local_new_i32();
+    TCGv_i32 t0 = tcg_temp_new_i32();
     int l1 = gen_new_label();
     int l2 = gen_new_label();
 
-    if (cc) {
-        ccbit1 = 1 << (24 + cc);
-        ccbit2 = 1 << (25 + cc);
-    } else {
-        ccbit1 = 1 << 23;
-        ccbit2 = 1 << 25;
-    }
-
     if (tf)
         cond = TCG_COND_EQ;
     else
         cond = TCG_COND_NE;
 
-    gen_load_fpr32(fp0, fd);
-    tcg_gen_andi_i32(r_tmp1, fpu_fcr31, ccbit1);
-    tcg_gen_brcondi_i32(cond, r_tmp1, 0, l1);
-    gen_load_fpr32(fp0, fs);
+    tcg_gen_andi_i32(t0, fpu_fcr31, get_fp_bit(cc));
+    tcg_gen_brcondi_i32(cond, t0, 0, l1);
+    gen_load_fpr32(t0, fs);
+    gen_store_fpr32(t0, fd);
     gen_set_label(l1);
-    gen_store_fpr32(fp0, fd);
 
-    gen_load_fpr32h(fp0, fd);
-    tcg_gen_andi_i32(r_tmp1, fpu_fcr31, ccbit2);
-    tcg_gen_brcondi_i32(cond, r_tmp1, 0, l2);
-    gen_load_fpr32h(fp0, fs);
+    tcg_gen_andi_i32(t0, fpu_fcr31, get_fp_bit(cc+1));
+    tcg_gen_brcondi_i32(cond, t0, 0, l2);
+    gen_load_fpr32h(t0, fs);
+    gen_store_fpr32h(t0, fd);
+    tcg_temp_free_i32(t0);
     gen_set_label(l2);
-    gen_store_fpr32h(fp0, fd);
-
-    tcg_temp_free_i32(r_tmp1);
-    tcg_temp_free_i32(fp0);
 }
 
 
@@ -6146,32 +6196,32 @@ static void gen_farith (DisasContext *ctx, uint32_t op1,
     case FOP(18, 16):
         {
             int l1 = gen_new_label();
-            TCGv t0 = tcg_temp_new();
-            TCGv_i32 fp0 = tcg_temp_local_new_i32();
+            TCGv_i32 fp0;
 
-            gen_load_gpr(t0, ft);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
+            if (ft != 0) {
+                tcg_gen_brcondi_tl(TCG_COND_NE, cpu_gpr[ft], 0, l1);
+            }
+            fp0 = tcg_temp_new_i32();
             gen_load_fpr32(fp0, fs);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
             gen_set_label(l1);
-            tcg_temp_free(t0);
         }
         opn = "movz.s";
         break;
     case FOP(19, 16):
         {
             int l1 = gen_new_label();
-            TCGv t0 = tcg_temp_new();
-            TCGv_i32 fp0 = tcg_temp_local_new_i32();
-
-            gen_load_gpr(t0, ft);
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);
-            gen_load_fpr32(fp0, fs);
-            gen_store_fpr32(fp0, fd);
-            tcg_temp_free_i32(fp0);
-            gen_set_label(l1);
-            tcg_temp_free(t0);
+            TCGv_i32 fp0;
+
+            if (ft != 0) {
+                tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[ft], 0, l1);
+                fp0 = tcg_temp_new_i32();
+                gen_load_fpr32(fp0, fs);
+                gen_store_fpr32(fp0, fd);
+                tcg_temp_free_i32(fp0);
+                gen_set_label(l1);
+            }
         }
         opn = "movn.s";
         break;
@@ -6565,32 +6615,32 @@ static void gen_farith (DisasContext *ctx, uint32_t op1,
     case FOP(18, 17):
         {
             int l1 = gen_new_label();
-            TCGv t0 = tcg_temp_new();
-            TCGv_i64 fp0 = tcg_temp_local_new_i64();
+            TCGv_i64 fp0;
 
-            gen_load_gpr(t0, ft);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
+            if (ft != 0) {
+                tcg_gen_brcondi_tl(TCG_COND_NE, cpu_gpr[ft], 0, l1);
+            }
+            fp0 = tcg_temp_new_i64();
             gen_load_fpr64(ctx, fp0, fs);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
             gen_set_label(l1);
-            tcg_temp_free(t0);
         }
         opn = "movz.d";
         break;
     case FOP(19, 17):
         {
             int l1 = gen_new_label();
-            TCGv t0 = tcg_temp_new();
-            TCGv_i64 fp0 = tcg_temp_local_new_i64();
-
-            gen_load_gpr(t0, ft);
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);
-            gen_load_fpr64(ctx, fp0, fs);
-            gen_store_fpr64(ctx, fp0, fd);
-            tcg_temp_free_i64(fp0);
-            gen_set_label(l1);
-            tcg_temp_free(t0);
+            TCGv_i64 fp0;
+
+            if (ft != 0) {
+                tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[ft], 0, l1);
+                fp0 = tcg_temp_new_i64();
+                gen_load_fpr64(ctx, fp0, fs);
+                gen_store_fpr64(ctx, fp0, fd);
+                tcg_temp_free_i64(fp0);
+                gen_set_label(l1);
+            }
         }
         opn = "movn.d";
         break;
@@ -6900,20 +6950,15 @@ static void gen_farith (DisasContext *ctx, uint32_t op1,
         check_cp1_64bitmode(ctx);
         {
             int l1 = gen_new_label();
-            TCGv t0 = tcg_temp_new();
-            TCGv_i32 fp0 = tcg_temp_local_new_i32();
-            TCGv_i32 fph0 = tcg_temp_local_new_i32();
+            TCGv_i64 fp0;
 
-            gen_load_gpr(t0, ft);
-            tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32h(fph0, fs);
-            gen_store_fpr32(fp0, fd);
-            gen_store_fpr32h(fph0, fd);
-            tcg_temp_free_i32(fp0);
-            tcg_temp_free_i32(fph0);
+            if (ft != 0)
+                tcg_gen_brcondi_tl(TCG_COND_NE, cpu_gpr[ft], 0, l1);
+            fp0 = tcg_temp_new_i64();
+            gen_load_fpr64(ctx, fp0, fs);
+            gen_store_fpr64(ctx, fp0, fd);
+            tcg_temp_free_i64(fp0);
             gen_set_label(l1);
-            tcg_temp_free(t0);
         }
         opn = "movz.ps";
         break;
@@ -6921,20 +6966,16 @@ static void gen_farith (DisasContext *ctx, uint32_t op1,
         check_cp1_64bitmode(ctx);
         {
             int l1 = gen_new_label();
-            TCGv t0 = tcg_temp_new();
-            TCGv_i32 fp0 = tcg_temp_local_new_i32();
-            TCGv_i32 fph0 = tcg_temp_local_new_i32();
-
-            gen_load_gpr(t0, ft);
-            tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32h(fph0, fs);
-            gen_store_fpr32(fp0, fd);
-            gen_store_fpr32h(fph0, fd);
-            tcg_temp_free_i32(fp0);
-            tcg_temp_free_i32(fph0);
-            gen_set_label(l1);
-            tcg_temp_free(t0);
+            TCGv_i64 fp0;
+
+            if (ft != 0) {
+                tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[ft], 0, l1);
+                fp0 = tcg_temp_new_i64();
+                gen_load_fpr64(ctx, fp0, fs);
+                gen_store_fpr64(ctx, fp0, fd);
+                tcg_temp_free_i64(fp0);
+                gen_set_label(l1);
+            }
         }
         opn = "movn.ps";
         break;
@@ -7176,8 +7217,7 @@ static void gen_flt3_ldst (DisasContext *ctx, uint32_t opc,
 {
     const char *opn = "extended float load/store";
     int store = 0;
-    TCGv t0 = tcg_temp_local_new();
-    TCGv t1 = tcg_temp_local_new();
+    TCGv t0 = tcg_temp_new();
 
     if (base == 0) {
         gen_load_gpr(t0, index);
@@ -7189,14 +7229,15 @@ static void gen_flt3_ldst (DisasContext *ctx, uint32_t opc,
     }
     /* Don't do NOP if destination is zero: we must perform the actual
        memory access. */
+    save_cpu_state(ctx, 0);
     switch (opc) {
     case OPC_LWXC1:
         check_cop1x(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
-            tcg_gen_qemu_ld32s(t1, t0, ctx->mem_idx);
-            tcg_gen_trunc_tl_i32(fp0, t1);
+            tcg_gen_qemu_ld32s(t0, t0, ctx->mem_idx);
+            tcg_gen_trunc_tl_i32(fp0, t0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -7230,11 +7271,13 @@ static void gen_flt3_ldst (DisasContext *ctx, uint32_t opc,
         check_cop1x(ctx);
         {
             TCGv_i32 fp0 = tcg_temp_new_i32();
+            TCGv t1 = tcg_temp_new();
 
             gen_load_fpr32(fp0, fs);
             tcg_gen_extu_i32_tl(t1, fp0);
             tcg_gen_qemu_st32(t1, t0, ctx->mem_idx);
             tcg_temp_free_i32(fp0);
+            tcg_temp_free(t1);
         }
         opn = "swxc1";
         store = 1;
@@ -7265,15 +7308,8 @@ static void gen_flt3_ldst (DisasContext *ctx, uint32_t opc,
         opn = "suxc1";
         store = 1;
         break;
-    default:
-        MIPS_INVAL(opn);
-        generate_exception(ctx, EXCP_RI);
-        tcg_temp_free(t0);
-        tcg_temp_free(t1);
-        return;
     }
     tcg_temp_free(t0);
-    tcg_temp_free(t1);
     MIPS_DEBUG("%s %s, %s(%s)", opn, fregnames[store ? fs : fd],
                regnames[index], regnames[base]);
 }
@@ -7288,39 +7324,37 @@ static void gen_flt3_arith (DisasContext *ctx, uint32_t opc,
         check_cp1_64bitmode(ctx);
         {
             TCGv t0 = tcg_temp_local_new();
-            TCGv_i32 fp0 = tcg_temp_local_new_i32();
-            TCGv_i32 fph0 = tcg_temp_local_new_i32();
-            TCGv_i32 fp1 = tcg_temp_local_new_i32();
-            TCGv_i32 fph1 = tcg_temp_local_new_i32();
+            TCGv_i32 fp = tcg_temp_new_i32();
+            TCGv_i32 fph = tcg_temp_new_i32();
             int l1 = gen_new_label();
             int l2 = gen_new_label();
 
             gen_load_gpr(t0, fr);
             tcg_gen_andi_tl(t0, t0, 0x7);
-            gen_load_fpr32(fp0, fs);
-            gen_load_fpr32h(fph0, fs);
-            gen_load_fpr32(fp1, ft);
-            gen_load_fpr32h(fph1, ft);
 
             tcg_gen_brcondi_tl(TCG_COND_NE, t0, 0, l1);
-            gen_store_fpr32(fp0, fd);
-            gen_store_fpr32h(fph0, fd);
+            gen_load_fpr32(fp, fs);
+            gen_load_fpr32h(fph, fs);
+            gen_store_fpr32(fp, fd);
+            gen_store_fpr32h(fph, fd);
             tcg_gen_br(l2);
             gen_set_label(l1);
             tcg_gen_brcondi_tl(TCG_COND_NE, t0, 4, l2);
             tcg_temp_free(t0);
 #ifdef TARGET_WORDS_BIGENDIAN
-            gen_store_fpr32(fph1, fd);
-            gen_store_fpr32h(fp0, fd);
+            gen_load_fpr32(fp, fs);
+            gen_load_fpr32h(fph, ft);
+            gen_store_fpr32h(fp, fd);
+            gen_store_fpr32(fph, fd);
 #else
-            gen_store_fpr32(fph0, fd);
-            gen_store_fpr32h(fp1, fd);
+            gen_load_fpr32h(fph, fs);
+            gen_load_fpr32(fp, ft);
+            gen_store_fpr32(fph, fd);
+            gen_store_fpr32h(fp, fd);
 #endif
             gen_set_label(l2);
-            tcg_temp_free_i32(fp0);
-            tcg_temp_free_i32(fph0);
-            tcg_temp_free_i32(fp1);
-            tcg_temp_free_i32(fph1);
+            tcg_temp_free_i32(fp);
+            tcg_temp_free_i32(fph);
         }
         opn = "alnv.ps";
         break;
@@ -7582,14 +7616,8 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
         int l1 = gen_new_label();
 
         MIPS_DEBUG("blikely condition (" TARGET_FMT_lx ")", ctx->pc + 4);
-        tcg_gen_brcondi_i32(TCG_COND_NE, bcond, 0, l1);
-        {
-            TCGv_i32 r_tmp = tcg_temp_new_i32();
-
-            tcg_gen_movi_i32(r_tmp, ctx->hflags & ~MIPS_HFLAG_BMASK);
-            tcg_gen_st_i32(r_tmp, cpu_env, offsetof(CPUState, hflags));
-            tcg_temp_free_i32(r_tmp);
-        }
+        tcg_gen_brcondi_tl(TCG_COND_NE, bcond, 0, l1);
+        tcg_gen_movi_i32(hflags, ctx->hflags & ~MIPS_HFLAG_BMASK);
         gen_goto_tb(ctx, 1, ctx->pc + 4);
         gen_set_label(l1);
     }
@@ -7603,18 +7631,34 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
     case OPC_SPECIAL:
         op1 = MASK_SPECIAL(ctx->opcode);
         switch (op1) {
-        case OPC_SLL:          /* Arithmetic with immediate */
-        case OPC_SRL ... OPC_SRA:
-            gen_arith_imm(env, ctx, op1, rd, rt, sa);
+        case OPC_SLL:          /* Shift with immediate */
+        case OPC_SRA:
+        case OPC_SRL:
+            gen_shift_imm(env, ctx, op1, rd, rt, sa);
             break;
-        case OPC_MOVZ ... OPC_MOVN:
+        case OPC_MOVN:         /* Conditional move */
+        case OPC_MOVZ:
             check_insn(env, ctx, ISA_MIPS4 | ISA_MIPS32);
-        case OPC_SLLV:         /* Arithmetic */
-        case OPC_SRLV ... OPC_SRAV:
-        case OPC_ADD ... OPC_NOR:
-        case OPC_SLT ... OPC_SLTU:
+            gen_cond_move(env, op1, rd, rs, rt);
+            break;
+        case OPC_ADD ... OPC_SUBU:
             gen_arith(env, ctx, op1, rd, rs, rt);
             break;
+        case OPC_SLLV:         /* Shifts */
+        case OPC_SRLV:
+        case OPC_SRAV:
+            gen_shift(env, ctx, op1, rd, rs, rt);
+            break;
+        case OPC_SLT:          /* Set on less than */
+        case OPC_SLTU:
+            gen_slt(env, op1, rd, rs, rt);
+            break;
+        case OPC_AND:          /* Logic*/
+        case OPC_OR:
+        case OPC_NOR:
+        case OPC_XOR:
+            gen_logic(env, op1, rd, rs, rt);
+            break;
         case OPC_MULT ... OPC_DIVU:
             if (sa) {
                 check_insn(env, ctx, INSN_VR54XX);
@@ -7648,6 +7692,7 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
             break;
         case OPC_SYSCALL:
             generate_exception(ctx, EXCP_SYSCALL);
+            ctx->bstate = BS_STOP;
             break;
         case OPC_BREAK:
             generate_exception(ctx, EXCP_BREAK);
@@ -7669,7 +7714,6 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
         case OPC_MOVCI:
             check_insn(env, ctx, ISA_MIPS4 | ISA_MIPS32);
             if (env->CP0_Config1 & (1 << CP0C1_FP)) {
-                save_cpu_state(ctx, 1);
                 check_cp1_enabled(ctx);
                 gen_movci(ctx, rd, rs, (ctx->opcode >> 18) & 0x7,
                           (ctx->opcode >> 16) & 1);
@@ -7681,20 +7725,27 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
 #if defined(TARGET_MIPS64)
        /* MIPS64 specific opcodes */
         case OPC_DSLL:
-        case OPC_DSRL ... OPC_DSRA:
+        case OPC_DSRA:
+        case OPC_DSRL:
         case OPC_DSLL32:
-        case OPC_DSRL32 ... OPC_DSRA32:
+        case OPC_DSRA32:
+        case OPC_DSRL32:
             check_insn(env, ctx, ISA_MIPS3);
             check_mips_64(ctx);
-            gen_arith_imm(env, ctx, op1, rd, rt, sa);
+            gen_shift_imm(env, ctx, op1, rd, rt, sa);
             break;
-        case OPC_DSLLV:
-        case OPC_DSRLV ... OPC_DSRAV:
         case OPC_DADD ... OPC_DSUBU:
             check_insn(env, ctx, ISA_MIPS3);
             check_mips_64(ctx);
             gen_arith(env, ctx, op1, rd, rs, rt);
             break;
+        case OPC_DSLLV:
+        case OPC_DSRAV:
+        case OPC_DSRLV:
+            check_insn(env, ctx, ISA_MIPS3);
+            check_mips_64(ctx);
+            gen_shift(env, ctx, op1, rd, rs, rt);
+            break;
         case OPC_DMULT ... OPC_DDIVU:
             check_insn(env, ctx, ISA_MIPS3);
             check_mips_64(ctx);
@@ -7718,7 +7769,8 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
         case OPC_MUL:
             gen_arith(env, ctx, op1, rd, rs, rt);
             break;
-        case OPC_CLZ ... OPC_CLO:
+        case OPC_CLO:
+        case OPC_CLZ:
             check_insn(env, ctx, ISA_MIPS32);
             gen_cl(ctx, op1, rd, rs);
             break;
@@ -7735,7 +7787,8 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
             /* Treat as NOP. */
             break;
 #if defined(TARGET_MIPS64)
-        case OPC_DCLZ ... OPC_DCLO:
+        case OPC_DCLO:
+        case OPC_DCLZ:
             check_insn(env, ctx, ISA_MIPS64);
             check_mips_64(ctx);
             gen_cl(ctx, op1, rd, rs);
@@ -7763,28 +7816,33 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
         case OPC_RDHWR:
             check_insn(env, ctx, ISA_MIPS32R2);
             {
-                TCGv t0 = tcg_temp_local_new();
+                TCGv t0 = tcg_temp_new();
 
                 switch (rd) {
                 case 0:
                     save_cpu_state(ctx, 1);
                     gen_helper_rdhwr_cpunum(t0);
+                    gen_store_gpr(t0, rt);
                     break;
                 case 1:
                     save_cpu_state(ctx, 1);
                     gen_helper_rdhwr_synci_step(t0);
+                    gen_store_gpr(t0, rt);
                     break;
                 case 2:
                     save_cpu_state(ctx, 1);
                     gen_helper_rdhwr_cc(t0);
+                    gen_store_gpr(t0, rt);
                     break;
                 case 3:
                     save_cpu_state(ctx, 1);
                     gen_helper_rdhwr_ccres(t0);
+                    gen_store_gpr(t0, rt);
                     break;
                 case 29:
 #if defined(CONFIG_USER_ONLY)
                     tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, tls_value));
+                    gen_store_gpr(t0, rt);
                     break;
 #else
                     /* XXX: Some CPUs implement this in hardware.
@@ -7795,15 +7853,14 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
                     generate_exception(ctx, EXCP_RI);
                     break;
                 }
-                gen_store_gpr(t0, rt);
                 tcg_temp_free(t0);
             }
             break;
         case OPC_FORK:
             check_insn(env, ctx, ASE_MT);
             {
-                TCGv t0 = tcg_temp_local_new();
-                TCGv t1 = tcg_temp_local_new();
+                TCGv t0 = tcg_temp_new();
+                TCGv t1 = tcg_temp_new();
 
                 gen_load_gpr(t0, rt);
                 gen_load_gpr(t1, rs);
@@ -7815,8 +7872,9 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
         case OPC_YIELD:
             check_insn(env, ctx, ASE_MT);
             {
-                TCGv t0 = tcg_temp_local_new();
+                TCGv t0 = tcg_temp_new();
 
+                save_cpu_state(ctx, 1);
                 gen_load_gpr(t0, rs);
                 gen_helper_yield(t0, t0);
                 gen_store_gpr(t0, rd);
@@ -7888,30 +7946,35 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
         case OPC_MFMC0:
 #ifndef CONFIG_USER_ONLY
             {
-                TCGv t0 = tcg_temp_local_new();
+                TCGv t0 = tcg_temp_new();
 
                 op2 = MASK_MFMC0(ctx->opcode);
                 switch (op2) {
                 case OPC_DMT:
                     check_insn(env, ctx, ASE_MT);
                     gen_helper_dmt(t0, t0);
+                    gen_store_gpr(t0, rt);
                     break;
                 case OPC_EMT:
                     check_insn(env, ctx, ASE_MT);
                     gen_helper_emt(t0, t0);
+                    gen_store_gpr(t0, rt);
                     break;
                 case OPC_DVPE:
                     check_insn(env, ctx, ASE_MT);
                     gen_helper_dvpe(t0, t0);
+                    gen_store_gpr(t0, rt);
                     break;
                 case OPC_EVPE:
                     check_insn(env, ctx, ASE_MT);
                     gen_helper_evpe(t0, t0);
+                    gen_store_gpr(t0, rt);
                     break;
                 case OPC_DI:
                     check_insn(env, ctx, ISA_MIPS32R2);
                     save_cpu_state(ctx, 1);
                     gen_helper_di(t0);
+                    gen_store_gpr(t0, rt);
                     /* Stop translation as we may have switched the execution mode */
                     ctx->bstate = BS_STOP;
                     break;
@@ -7919,6 +7982,7 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
                     check_insn(env, ctx, ISA_MIPS32R2);
                     save_cpu_state(ctx, 1);
                     gen_helper_ei(t0);
+                    gen_store_gpr(t0, rt);
                     /* Stop translation as we may have switched the execution mode */
                     ctx->bstate = BS_STOP;
                     break;
@@ -7927,7 +7991,6 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
                     generate_exception(ctx, EXCP_RI);
                     break;
                 }
-                gen_store_gpr(t0, rt);
                 tcg_temp_free(t0);
             }
 #endif /* !CONFIG_USER_ONLY */
@@ -7946,9 +8009,20 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
             break;
         }
         break;
-    case OPC_ADDI ... OPC_LUI: /* Arithmetic with immediate opcode */
+    case OPC_ADDI: /* Arithmetic with immediate opcode */
+    case OPC_ADDIU:
          gen_arith_imm(env, ctx, op, rt, rs, imm);
          break;
+    case OPC_SLTI: /* Set on less than with immediate opcode */
+    case OPC_SLTIU:
+         gen_slt_imm(env, op, rt, rs, imm);
+         break;
+    case OPC_ANDI: /* Arithmetic with immediate opcode */
+    case OPC_LUI:
+    case OPC_ORI:
+    case OPC_XORI:
+         gen_logic_imm(env, op, rt, rs, imm);
+         break;
     case OPC_J ... OPC_JAL: /* Jump */
          offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
          gen_compute_branch(ctx, op, rs, rt, offset);
@@ -7961,9 +8035,11 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
     case OPC_SB ... OPC_SW:
     case OPC_SWR:
     case OPC_LL:
-    case OPC_SC:
          gen_ldst(ctx, op, rt, rs, imm);
          break;
+    case OPC_SC:
+         gen_st_cond(ctx, op, rt, rs, imm);
+         break;
     case OPC_CACHE:
         check_insn(env, ctx, ISA_MIPS3 | ISA_MIPS32);
         /* Treat as NOP. */
@@ -7979,7 +8055,6 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
     case OPC_SWC1:
     case OPC_SDC1:
         if (env->CP0_Config1 & (1 << CP0C1_FP)) {
-            save_cpu_state(ctx, 1);
             check_cp1_enabled(ctx);
             gen_flt_ldst(ctx, op, rt, rs, imm);
         } else {
@@ -7989,7 +8064,6 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
 
     case OPC_CP1:
         if (env->CP0_Config1 & (1 << CP0C1_FP)) {
-            save_cpu_state(ctx, 1);
             check_cp1_enabled(ctx);
             op1 = MASK_CP1(ctx->opcode);
             switch (op1) {
@@ -8048,7 +8122,6 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
 
     case OPC_CP3:
         if (env->CP0_Config1 & (1 << CP0C1_FP)) {
-            save_cpu_state(ctx, 1);
             check_cp1_enabled(ctx);
             op1 = MASK_CP3(ctx->opcode);
             switch (op1) {
@@ -8095,13 +8168,18 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
     case OPC_SDL ... OPC_SDR:
     case OPC_LLD:
     case OPC_LD:
-    case OPC_SCD:
     case OPC_SD:
         check_insn(env, ctx, ISA_MIPS3);
         check_mips_64(ctx);
         gen_ldst(ctx, op, rt, rs, imm);
         break;
-    case OPC_DADDI ... OPC_DADDIU:
+    case OPC_SCD:
+        check_insn(env, ctx, ISA_MIPS3);
+        check_mips_64(ctx);
+        gen_st_cond(ctx, op, rt, rs, imm);
+        break;
+    case OPC_DADDI:
+    case OPC_DADDIU:
         check_insn(env, ctx, ISA_MIPS3);
         check_mips_64(ctx);
         gen_arith_imm(env, ctx, op, rt, rs, imm);
@@ -8142,7 +8220,7 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
             {
                 int l1 = gen_new_label();
 
-                tcg_gen_brcondi_i32(TCG_COND_NE, bcond, 0, l1);
+                tcg_gen_brcondi_tl(TCG_COND_NE, bcond, 0, l1);
                 gen_goto_tb(ctx, 1, ctx->pc + 4);
                 gen_set_label(l1);
                 gen_goto_tb(ctx, 0, ctx->btarget);
@@ -8247,9 +8325,9 @@ gen_intermediate_code_internal (CPUState *env, TranslationBlock *tb,
 
         if (num_insns >= max_insns)
             break;
-#if defined (MIPS_SINGLE_STEP)
-        break;
-#endif
+
+        if (singlestep)
+            break;
     }
     if (tb->cflags & CF_LAST_IO)
         gen_io_end();
@@ -8289,7 +8367,7 @@ done_generating:
     }
 #ifdef DEBUG_DISAS
     LOG_DISAS("\n");
-    if (loglevel & CPU_LOG_TB_IN_ASM) {
+    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
         qemu_log("IN: %s\n", lookup_symbol(pc_start));
         log_target_disas(pc_start, ctx.pc - pc_start, 0);
         qemu_log("\n");
@@ -8415,7 +8493,8 @@ static void mips_tcg_init(void)
         return;
 
     cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
-    for (i = 0; i < 32; i++)
+    TCGV_UNUSED(cpu_gpr[0]);
+    for (i = 1; i < 32; i++)
         cpu_gpr[i] = tcg_global_mem_new(TCG_AREG0,
                                         offsetof(CPUState, active_tc.gpr[i]),
                                         regnames[i]);
@@ -8435,22 +8514,13 @@ static void mips_tcg_init(void)
     cpu_dspctrl = tcg_global_mem_new(TCG_AREG0,
                                      offsetof(CPUState, active_tc.DSPControl),
                                      "DSPControl");
-    bcond = tcg_global_mem_new_i32(TCG_AREG0,
-                                   offsetof(CPUState, bcond), "bcond");
+    bcond = tcg_global_mem_new(TCG_AREG0,
+                               offsetof(CPUState, bcond), "bcond");
     btarget = tcg_global_mem_new(TCG_AREG0,
                                  offsetof(CPUState, btarget), "btarget");
-    for (i = 0; i < 32; i++)
-        fpu_fpr32[i] = tcg_global_mem_new_i32(TCG_AREG0,
-            offsetof(CPUState, active_fpu.fpr[i].w[FP_ENDIAN_IDX]),
-            fregnames[i]);
-    for (i = 0; i < 32; i++)
-        fpu_fpr64[i] = tcg_global_mem_new_i64(TCG_AREG0,
-            offsetof(CPUState, active_fpu.fpr[i]),
-            fregnames_64[i]);
-    for (i = 0; i < 32; i++)
-        fpu_fpr32h[i] = tcg_global_mem_new_i32(TCG_AREG0,
-            offsetof(CPUState, active_fpu.fpr[i].w[!FP_ENDIAN_IDX]),
-            fregnames_h[i]);
+    hflags = tcg_global_mem_new_i32(TCG_AREG0,
+                                    offsetof(CPUState, hflags), "hflags");
+
     fpu_fcr0 = tcg_global_mem_new_i32(TCG_AREG0,
                                       offsetof(CPUState, active_fpu.fcr0),
                                       "fcr0");
@@ -8476,19 +8546,23 @@ CPUMIPSState *cpu_mips_init (const char *cpu_model)
     if (!def)
         return NULL;
     env = qemu_mallocz(sizeof(CPUMIPSState));
-    if (!env)
-        return NULL;
     env->cpu_model = def;
 
     cpu_exec_init(env);
     env->cpu_model_str = cpu_model;
     mips_tcg_init();
     cpu_reset(env);
+    qemu_init_vcpu(env);
     return env;
 }
 
 void cpu_reset (CPUMIPSState *env)
 {
+    if (qemu_loglevel_mask(CPU_LOG_RESET)) {
+        qemu_log("CPU Reset (CPU %d)\n", env->cpu_index);
+        log_cpu_state(env, 0);
+    }
+
     memset(env, 0, offsetof(CPUMIPSState, breakpoints));
 
     tlb_flush(env, 1);
@@ -8496,6 +8570,8 @@ void cpu_reset (CPUMIPSState *env)
     /* Minimal init */
 #if defined(CONFIG_USER_ONLY)
     env->hflags = MIPS_HFLAG_UM;
+    /* Enable access to the SYNCI_Step register.  */
+    env->CP0_HWREna |= (1 << 1);
 #else
     if (env->hflags & MIPS_HFLAG_BMASK) {
         /* If the exception was raised from a delay slot,