target-mips: optimize gen_movcf_*()
[qemu] / target-mips / translate.c
index d659afb..8adc89c 100644 (file)
@@ -429,10 +429,8 @@ enum {
 static TCGv_ptr cpu_env;
 static TCGv cpu_gpr[32], cpu_PC;
 static TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC], cpu_ACX[MIPS_DSP_ACC];
-static TCGv cpu_dspctrl, btarget;
-static TCGv_i32 bcond;
-static TCGv_i32 fpu_fpr32[32], fpu_fpr32h[32];
-static TCGv_i64 fpu_fpr64[32];
+static TCGv cpu_dspctrl, btarget, bcond;
+static TCGv_i32 hflags;
 static TCGv_i32 fpu_fcr0, fpu_fcr31;
 
 #include "gen-icount.h"
@@ -501,18 +499,6 @@ static const char *fregnames[] =
       "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
       "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", };
 
-static const char *fregnames_64[] =
-    { "F0",  "F1",  "F2",  "F3",  "F4",  "F5",  "F6",  "F7",
-      "F8",  "F9",  "F10", "F11", "F12", "F13", "F14", "F15",
-      "F16", "F17", "F18", "F19", "F20", "F21", "F22", "F23",
-      "F24", "F25", "F26", "F27", "F28", "F29", "F30", "F31", };
-
-static const char *fregnames_h[] =
-    { "h0",  "h1",  "h2",  "h3",  "h4",  "h5",  "h6",  "h7",
-      "h8",  "h9",  "h10", "h11", "h12", "h13", "h14", "h15",
-      "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
-      "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31", };
-
 #ifdef MIPS_DEBUG_DISAS
 #define MIPS_DEBUG(fmt, args...)                         \
         qemu_log_mask(CPU_LOG_TB_IN_ASM,                \
@@ -607,56 +593,62 @@ static inline void gen_store_srsgpr (int from, int to)
 /* Floating point register moves. */
 static inline void gen_load_fpr32 (TCGv_i32 t, int reg)
 {
-    tcg_gen_mov_i32(t, fpu_fpr32[reg]);
+    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
 }
 
 static inline void gen_store_fpr32 (TCGv_i32 t, int reg)
 {
-    tcg_gen_mov_i32(fpu_fpr32[reg], t);
+    tcg_gen_st_i32(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
 }
 
-static inline void gen_load_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
+static inline void gen_load_fpr32h (TCGv_i32 t, int reg)
 {
-    if (ctx->hflags & MIPS_HFLAG_F64)
-        tcg_gen_mov_i64(t, fpu_fpr64[reg]);
-    else {
-        tcg_gen_concat_i32_i64(t, fpu_fpr32[reg & ~1], fpu_fpr32[reg | 1]);
-    }
+    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
 }
 
-static inline void gen_store_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
+static inline void gen_store_fpr32h (TCGv_i32 t, int reg)
 {
-    if (ctx->hflags & MIPS_HFLAG_F64)
-        tcg_gen_mov_i64(fpu_fpr64[reg], t);
-    else {
-        tcg_gen_trunc_i64_i32(fpu_fpr32[reg & ~1], t);
-        tcg_gen_shri_i64(t, t, 32);
-        tcg_gen_trunc_i64_i32(fpu_fpr32[reg | 1], t);
-    }
+    tcg_gen_st_i32(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
 }
 
-static inline void gen_load_fpr32h (TCGv_i32 t, int reg)
+static inline void gen_load_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
 {
-    tcg_gen_mov_i32(t, fpu_fpr32h[reg]);
+    if (ctx->hflags & MIPS_HFLAG_F64) {
+        tcg_gen_ld_i64(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].d));
+    } else {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        TCGv_i32 t1 = tcg_temp_new_i32();
+        gen_load_fpr32(t0, reg & ~1);
+        gen_load_fpr32(t1, reg | 1);
+        tcg_gen_concat_i32_i64(t, t0, t1);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+    }
 }
 
-static inline void gen_store_fpr32h (TCGv_i32 t, int reg)
+static inline void gen_store_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
 {
-    tcg_gen_mov_i32(fpu_fpr32h[reg], t);
+    if (ctx->hflags & MIPS_HFLAG_F64) {
+        tcg_gen_st_i64(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].d));
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i32 t1 = tcg_temp_new_i32();
+        tcg_gen_trunc_i64_i32(t1, t);
+        gen_store_fpr32(t1, reg & ~1);
+        tcg_gen_shri_i64(t0, t, 32);
+        tcg_gen_trunc_i64_i32(t1, t0);
+        gen_store_fpr32(t1, reg | 1);
+        tcg_temp_free_i32(t1);
+        tcg_temp_free_i64(t0);
+    }
 }
 
-static inline void get_fp_cond (TCGv_i32 t)
+static inline int get_fp_bit (int cc)
 {
-    TCGv_i32 r_tmp1 = tcg_temp_new_i32();
-    TCGv_i32 r_tmp2 = tcg_temp_new_i32();
-
-    tcg_gen_shri_i32(r_tmp2, fpu_fcr31, 24);
-    tcg_gen_andi_i32(r_tmp2, r_tmp2, 0xfe);
-    tcg_gen_shri_i32(r_tmp1, fpu_fcr31, 23);
-    tcg_gen_andi_i32(r_tmp1, r_tmp1, 0x1);
-    tcg_gen_or_i32(t, r_tmp1, r_tmp2);
-    tcg_temp_free_i32(r_tmp1);
-    tcg_temp_free_i32(r_tmp2);
+    if (cc)
+        return 24 + cc;
+    else
+        return 23;
 }
 
 #define FOP_CONDS(type, fmt, bits)                                            \
@@ -693,18 +685,18 @@ FOP_CONDS(abs, ps, 64)
 #undef FOP_CONDS
 
 /* Tests */
-#define OP_COND(name, cond)                                   \
-static inline void glue(gen_op_, name) (TCGv t0, TCGv t1)     \
-{                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcond_tl(cond, t0, t1, l1);                      \
-    tcg_gen_movi_tl(t0, 0);                                   \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t0, 1);                                   \
-    gen_set_label(l2);                                        \
+#define OP_COND(name, cond)                                         \
+static inline void glue(gen_op_, name) (TCGv ret, TCGv t0, TCGv t1) \
+{                                                                   \
+    int l1 = gen_new_label();                                       \
+    int l2 = gen_new_label();                                       \
+                                                                    \
+    tcg_gen_brcond_tl(cond, t0, t1, l1);                            \
+    tcg_gen_movi_tl(ret, 0);                                        \
+    tcg_gen_br(l2);                                                 \
+    gen_set_label(l1);                                              \
+    tcg_gen_movi_tl(ret, 1);                                        \
+    gen_set_label(l2);                                              \
 }
 OP_COND(eq, TCG_COND_EQ);
 OP_COND(ne, TCG_COND_NE);
@@ -714,34 +706,34 @@ OP_COND(lt, TCG_COND_LT);
 OP_COND(ltu, TCG_COND_LTU);
 #undef OP_COND
 
-#define OP_CONDI(name, cond)                                  \
-static inline void glue(gen_op_, name) (TCGv t, target_ulong val) \
-{                                                             \
-    int l1 = gen_new_label();                                 \
-    int l2 = gen_new_label();                                 \
-                                                              \
-    tcg_gen_brcondi_tl(cond, t, val, l1);                     \
-    tcg_gen_movi_tl(t, 0);                                    \
-    tcg_gen_br(l2);                                           \
-    gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t, 1);                                    \
-    gen_set_label(l2);                                        \
+#define OP_CONDI(name, cond)                                                 \
+static inline void glue(gen_op_, name) (TCGv ret, TCGv t0, target_ulong val) \
+{                                                                            \
+    int l1 = gen_new_label();                                                \
+    int l2 = gen_new_label();                                                \
+                                                                             \
+    tcg_gen_brcondi_tl(cond, t0, val, l1);                                   \
+    tcg_gen_movi_tl(ret, 0);                                                 \
+    tcg_gen_br(l2);                                                          \
+    gen_set_label(l1);                                                       \
+    tcg_gen_movi_tl(ret, 1);                                                 \
+    gen_set_label(l2);                                                       \
 }
 OP_CONDI(lti, TCG_COND_LT);
 OP_CONDI(ltiu, TCG_COND_LTU);
 #undef OP_CONDI
 
 #define OP_CONDZ(name, cond)                                  \
-static inline void glue(gen_op_, name) (TCGv t)               \
+static inline void glue(gen_op_, name) (TCGv ret, TCGv t0)    \
 {                                                             \
     int l1 = gen_new_label();                                 \
     int l2 = gen_new_label();                                 \
                                                               \
-    tcg_gen_brcondi_tl(cond, t, 0, l1);                       \
-    tcg_gen_movi_tl(t, 0);                                    \
+    tcg_gen_brcondi_tl(cond, t0, 0, l1);                      \
+    tcg_gen_movi_tl(ret, 0);                                  \
     tcg_gen_br(l2);                                           \
     gen_set_label(l1);                                        \
-    tcg_gen_movi_tl(t, 1);                                    \
+    tcg_gen_movi_tl(ret, 1);                                  \
     gen_set_label(l2);                                        \
 }
 OP_CONDZ(gez, TCG_COND_GE);
@@ -763,11 +755,7 @@ static inline void save_cpu_state (DisasContext *ctx, int do_save_pc)
         ctx->saved_pc = ctx->pc;
     }
     if (ctx->hflags != ctx->saved_hflags) {
-        TCGv_i32 r_tmp = tcg_temp_new_i32();
-
-        tcg_gen_movi_i32(r_tmp, ctx->hflags);
-        tcg_gen_st_i32(r_tmp, cpu_env, offsetof(CPUState, hflags));
-        tcg_temp_free_i32(r_tmp);
+        tcg_gen_movi_i32(hflags, ctx->hflags);
         ctx->saved_hflags = ctx->hflags;
         switch (ctx->hflags & MIPS_HFLAG_BMASK) {
         case MIPS_HFLAG_BR:
@@ -1314,11 +1302,11 @@ static void gen_arith_imm (CPUState *env, DisasContext *ctx, uint32_t opc,
         break;
 #endif
     case OPC_SLTI:
-        gen_op_lti(t0, uimm);
+        gen_op_lti(t0, t0, uimm);
         opn = "slti";
         break;
     case OPC_SLTIU:
-        gen_op_ltiu(t0, uimm);
+        gen_op_ltiu(t0, t0, uimm);
         opn = "sltiu";
         break;
     case OPC_ANDI:
@@ -1601,11 +1589,11 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
         break;
 #endif
     case OPC_SLT:
-        gen_op_lt(t0, t1);
+        gen_op_lt(t0, t0, t1);
         opn = "slt";
         break;
     case OPC_SLTU:
-        gen_op_ltu(t0, t1);
+        gen_op_ltu(t0, t0, t1);
         opn = "sltu";
         break;
     case OPC_AND:
@@ -1613,8 +1601,7 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
         opn = "and";
         break;
     case OPC_NOR:
-        tcg_gen_or_tl(t0, t0, t1);
-        tcg_gen_not_tl(t0, t0);
+        tcg_gen_nor_tl(t0, t0, t1);
         opn = "nor";
         break;
     case OPC_OR:
@@ -1772,50 +1759,60 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
 static void gen_HILO (DisasContext *ctx, uint32_t opc, int reg)
 {
     const char *opn = "hilo";
-    TCGv t0 = tcg_temp_local_new();
 
     if (reg == 0 && (opc == OPC_MFHI || opc == OPC_MFLO)) {
         /* Treat as NOP. */
         MIPS_DEBUG("NOP");
-        goto out;
+        return;
     }
     switch (opc) {
     case OPC_MFHI:
-        tcg_gen_mov_tl(t0, cpu_HI[0]);
-        gen_store_gpr(t0, reg);
+        tcg_gen_mov_tl(cpu_gpr[reg], cpu_HI[0]);
         opn = "mfhi";
         break;
     case OPC_MFLO:
-        tcg_gen_mov_tl(t0, cpu_LO[0]);
-        gen_store_gpr(t0, reg);
+        tcg_gen_mov_tl(cpu_gpr[reg], cpu_LO[0]);
         opn = "mflo";
         break;
     case OPC_MTHI:
-        gen_load_gpr(t0, reg);
-        tcg_gen_mov_tl(cpu_HI[0], t0);
+        if (reg != 0)
+            tcg_gen_mov_tl(cpu_HI[0], cpu_gpr[reg]);
+        else
+            tcg_gen_movi_tl(cpu_HI[0], 0);
         opn = "mthi";
         break;
     case OPC_MTLO:
-        gen_load_gpr(t0, reg);
-        tcg_gen_mov_tl(cpu_LO[0], t0);
+        if (reg != 0)
+            tcg_gen_mov_tl(cpu_LO[0], cpu_gpr[reg]);
+        else
+            tcg_gen_movi_tl(cpu_LO[0], 0);
         opn = "mtlo";
         break;
-    default:
-        MIPS_INVAL(opn);
-        generate_exception(ctx, EXCP_RI);
-        goto out;
     }
     MIPS_DEBUG("%s %s", opn, regnames[reg]);
- out:
-    tcg_temp_free(t0);
 }
 
 static void gen_muldiv (DisasContext *ctx, uint32_t opc,
                         int rs, int rt)
 {
     const char *opn = "mul/div";
-    TCGv t0 = tcg_temp_local_new();
-    TCGv t1 = tcg_temp_local_new();
+    TCGv t0, t1;
+
+    switch (opc) {
+    case OPC_DIV:
+    case OPC_DIVU:
+#if defined(TARGET_MIPS64)
+    case OPC_DDIV:
+    case OPC_DDIVU:
+#endif
+        t0 = tcg_temp_local_new();
+        t1 = tcg_temp_local_new();
+        break;
+    default:
+        t0 = tcg_temp_new();
+        t1 = tcg_temp_new();
+        break;
+    }
 
     gen_load_gpr(t0, rs);
     gen_load_gpr(t1, rt);
@@ -1823,30 +1820,22 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
     case OPC_DIV:
         {
             int l1 = gen_new_label();
+            int l2 = gen_new_label();
 
+            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ext32s_tl(t1, t1);
             tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            {
-                int l2 = gen_new_label();
-                TCGv_i32 r_tmp1 = tcg_temp_local_new_i32();
-                TCGv_i32 r_tmp2 = tcg_temp_local_new_i32();
-                TCGv_i32 r_tmp3 = tcg_temp_local_new_i32();
-
-                tcg_gen_trunc_tl_i32(r_tmp1, t0);
-                tcg_gen_trunc_tl_i32(r_tmp2, t1);
-                tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp1, -1 << 31, l2);
-                tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp2, -1, l2);
-                tcg_gen_ext32s_tl(cpu_LO[0], t0);
-                tcg_gen_movi_tl(cpu_HI[0], 0);
-                tcg_gen_br(l1);
-                gen_set_label(l2);
-                tcg_gen_div_i32(r_tmp3, r_tmp1, r_tmp2);
-                tcg_gen_rem_i32(r_tmp2, r_tmp1, r_tmp2);
-                tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3);
-                tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp2);
-                tcg_temp_free_i32(r_tmp1);
-                tcg_temp_free_i32(r_tmp2);
-                tcg_temp_free_i32(r_tmp3);
-            }
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, INT_MIN, l2);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1, l2);
+
+            tcg_gen_mov_tl(cpu_LO[0], t0);
+            tcg_gen_movi_tl(cpu_HI[0], 0);
+            tcg_gen_br(l1);
+            gen_set_label(l2);
+            tcg_gen_div_tl(cpu_LO[0], t0, t1);
+            tcg_gen_rem_tl(cpu_HI[0], t0, t1);
+            tcg_gen_ext32s_tl(cpu_LO[0], cpu_LO[0]);
+            tcg_gen_ext32s_tl(cpu_HI[0], cpu_HI[0]);
             gen_set_label(l1);
         }
         opn = "div";
@@ -1855,40 +1844,30 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         {
             int l1 = gen_new_label();
 
-            tcg_gen_ext32s_tl(t1, t1);
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_ext32u_tl(t1, t1);
             tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            {
-                TCGv_i32 r_tmp1 = tcg_temp_new_i32();
-                TCGv_i32 r_tmp2 = tcg_temp_new_i32();
-                TCGv_i32 r_tmp3 = tcg_temp_new_i32();
-
-                tcg_gen_trunc_tl_i32(r_tmp1, t0);
-                tcg_gen_trunc_tl_i32(r_tmp2, t1);
-                tcg_gen_divu_i32(r_tmp3, r_tmp1, r_tmp2);
-                tcg_gen_remu_i32(r_tmp1, r_tmp1, r_tmp2);
-                tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3);
-                tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp1);
-                tcg_temp_free_i32(r_tmp1);
-                tcg_temp_free_i32(r_tmp2);
-                tcg_temp_free_i32(r_tmp3);
-            }
+            tcg_gen_divu_tl(cpu_LO[0], t0, t1);
+            tcg_gen_remu_tl(cpu_HI[0], t0, t1);
+            tcg_gen_ext32s_tl(cpu_LO[0], cpu_LO[0]);
+            tcg_gen_ext32s_tl(cpu_HI[0], cpu_HI[0]);
             gen_set_label(l1);
         }
         opn = "divu";
         break;
     case OPC_MULT:
         {
-            TCGv_i64 r_tmp1 = tcg_temp_new_i64();
-            TCGv_i64 r_tmp2 = tcg_temp_new_i64();
-
-            tcg_gen_ext_tl_i64(r_tmp1, t0);
-            tcg_gen_ext_tl_i64(r_tmp2, t1);
-            tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free_i64(r_tmp2);
-            tcg_gen_trunc_i64_tl(t0, r_tmp1);
-            tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
-            tcg_gen_trunc_i64_tl(t1, r_tmp1);
-            tcg_temp_free_i64(r_tmp1);
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
+
+            tcg_gen_ext_tl_i64(t2, t0);
+            tcg_gen_ext_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            tcg_gen_trunc_i64_tl(t0, t2);
+            tcg_gen_shri_i64(t2, t2, 32);
+            tcg_gen_trunc_i64_tl(t1, t2);
+            tcg_temp_free_i64(t2);
             tcg_gen_ext32s_tl(cpu_LO[0], t0);
             tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
@@ -1896,19 +1875,19 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         break;
     case OPC_MULTU:
         {
-            TCGv_i64 r_tmp1 = tcg_temp_new_i64();
-            TCGv_i64 r_tmp2 = tcg_temp_new_i64();
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
 
             tcg_gen_ext32u_tl(t0, t0);
             tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_extu_tl_i64(r_tmp1, t0);
-            tcg_gen_extu_tl_i64(r_tmp2, t1);
-            tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free_i64(r_tmp2);
-            tcg_gen_trunc_i64_tl(t0, r_tmp1);
-            tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
-            tcg_gen_trunc_i64_tl(t1, r_tmp1);
-            tcg_temp_free_i64(r_tmp1);
+            tcg_gen_extu_tl_i64(t2, t0);
+            tcg_gen_extu_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            tcg_gen_trunc_i64_tl(t0, t2);
+            tcg_gen_shri_i64(t2, t2, 32);
+            tcg_gen_trunc_i64_tl(t1, t2);
+            tcg_temp_free_i64(t2);
             tcg_gen_ext32s_tl(cpu_LO[0], t0);
             tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
@@ -1918,20 +1897,17 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
     case OPC_DDIV:
         {
             int l1 = gen_new_label();
+            int l2 = gen_new_label();
 
             tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            {
-                int l2 = gen_new_label();
-
-                tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
-                tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
-                tcg_gen_mov_tl(cpu_LO[0], t0);
-                tcg_gen_movi_tl(cpu_HI[0], 0);
-                tcg_gen_br(l1);
-                gen_set_label(l2);
-                tcg_gen_div_i64(cpu_LO[0], t0, t1);
-                tcg_gen_rem_i64(cpu_HI[0], t0, t1);
-            }
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
+            tcg_gen_mov_tl(cpu_LO[0], t0);
+            tcg_gen_movi_tl(cpu_HI[0], 0);
+            tcg_gen_br(l1);
+            gen_set_label(l2);
+            tcg_gen_div_i64(cpu_LO[0], t0, t1);
+            tcg_gen_rem_i64(cpu_HI[0], t0, t1);
             gen_set_label(l1);
         }
         opn = "ddiv";
@@ -1958,19 +1934,19 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
 #endif
     case OPC_MADD:
         {
-            TCGv_i64 r_tmp1 = tcg_temp_new_i64();
-            TCGv_i64 r_tmp2 = tcg_temp_new_i64();
-
-            tcg_gen_ext_tl_i64(r_tmp1, t0);
-            tcg_gen_ext_tl_i64(r_tmp2, t1);
-            tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
-            tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free_i64(r_tmp2);
-            tcg_gen_trunc_i64_tl(t0, r_tmp1);
-            tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
-            tcg_gen_trunc_i64_tl(t1, r_tmp1);
-            tcg_temp_free_i64(r_tmp1);
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
+
+            tcg_gen_ext_tl_i64(t2, t0);
+            tcg_gen_ext_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_add_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            tcg_gen_trunc_i64_tl(t0, t2);
+            tcg_gen_shri_i64(t2, t2, 32);
+            tcg_gen_trunc_i64_tl(t1, t2);
+            tcg_temp_free_i64(t2);
             tcg_gen_ext32s_tl(cpu_LO[0], t0);
             tcg_gen_ext32s_tl(cpu_LO[1], t1);
         }
@@ -1978,21 +1954,21 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         break;
     case OPC_MADDU:
        {
-            TCGv_i64 r_tmp1 = tcg_temp_new_i64();
-            TCGv_i64 r_tmp2 = tcg_temp_new_i64();
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
 
             tcg_gen_ext32u_tl(t0, t0);
             tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_extu_tl_i64(r_tmp1, t0);
-            tcg_gen_extu_tl_i64(r_tmp2, t1);
-            tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
-            tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free_i64(r_tmp2);
-            tcg_gen_trunc_i64_tl(t0, r_tmp1);
-            tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
-            tcg_gen_trunc_i64_tl(t1, r_tmp1);
-            tcg_temp_free_i64(r_tmp1);
+            tcg_gen_extu_tl_i64(t2, t0);
+            tcg_gen_extu_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_add_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            tcg_gen_trunc_i64_tl(t0, t2);
+            tcg_gen_shri_i64(t2, t2, 32);
+            tcg_gen_trunc_i64_tl(t1, t2);
+            tcg_temp_free_i64(t2);
             tcg_gen_ext32s_tl(cpu_LO[0], t0);
             tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
@@ -2000,19 +1976,19 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         break;
     case OPC_MSUB:
         {
-            TCGv_i64 r_tmp1 = tcg_temp_new_i64();
-            TCGv_i64 r_tmp2 = tcg_temp_new_i64();
-
-            tcg_gen_ext_tl_i64(r_tmp1, t0);
-            tcg_gen_ext_tl_i64(r_tmp2, t1);
-            tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
-            tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free_i64(r_tmp2);
-            tcg_gen_trunc_i64_tl(t0, r_tmp1);
-            tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
-            tcg_gen_trunc_i64_tl(t1, r_tmp1);
-            tcg_temp_free_i64(r_tmp1);
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
+
+            tcg_gen_ext_tl_i64(t2, t0);
+            tcg_gen_ext_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_sub_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            tcg_gen_trunc_i64_tl(t0, t2);
+            tcg_gen_shri_i64(t2, t2, 32);
+            tcg_gen_trunc_i64_tl(t1, t2);
+            tcg_temp_free_i64(t2);
             tcg_gen_ext32s_tl(cpu_LO[0], t0);
             tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
@@ -2020,21 +1996,21 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         break;
     case OPC_MSUBU:
         {
-            TCGv_i64 r_tmp1 = tcg_temp_new_i64();
-            TCGv_i64 r_tmp2 = tcg_temp_new_i64();
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
 
             tcg_gen_ext32u_tl(t0, t0);
             tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_extu_tl_i64(r_tmp1, t0);
-            tcg_gen_extu_tl_i64(r_tmp2, t1);
-            tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
-            tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free_i64(r_tmp2);
-            tcg_gen_trunc_i64_tl(t0, r_tmp1);
-            tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
-            tcg_gen_trunc_i64_tl(t1, r_tmp1);
-            tcg_temp_free_i64(r_tmp1);
+            tcg_gen_extu_tl_i64(t2, t0);
+            tcg_gen_extu_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_sub_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            tcg_gen_trunc_i64_tl(t0, t2);
+            tcg_gen_shri_i64(t2, t2, 32);
+            tcg_gen_trunc_i64_tl(t1, t2);
+            tcg_temp_free_i64(t2);
             tcg_gen_ext32s_tl(cpu_LO[0], t0);
             tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
@@ -2055,8 +2031,8 @@ static void gen_mul_vr54xx (DisasContext *ctx, uint32_t opc,
                             int rd, int rs, int rt)
 {
     const char *opn = "mul vr54xx";
-    TCGv t0 = tcg_temp_local_new();
-    TCGv t1 = tcg_temp_local_new();
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
 
     gen_load_gpr(t0, rs);
     gen_load_gpr(t1, rt);
@@ -2135,42 +2111,36 @@ static void gen_cl (DisasContext *ctx, uint32_t opc,
                     int rd, int rs)
 {
     const char *opn = "CLx";
-    TCGv t0 = tcg_temp_local_new();
+    TCGv t0;
 
     if (rd == 0) {
         /* Treat as NOP. */
         MIPS_DEBUG("NOP");
-        goto out;
+        return;
     }
+    t0 = tcg_temp_new();
     gen_load_gpr(t0, rs);
     switch (opc) {
     case OPC_CLO:
-        gen_helper_clo(t0, t0);
+        gen_helper_clo(cpu_gpr[rd], t0);
         opn = "clo";
         break;
     case OPC_CLZ:
-        gen_helper_clz(t0, t0);
+        gen_helper_clz(cpu_gpr[rd], t0);
         opn = "clz";
         break;
 #if defined(TARGET_MIPS64)
     case OPC_DCLO:
-        gen_helper_dclo(t0, t0);
+        gen_helper_dclo(cpu_gpr[rd], t0);
         opn = "dclo";
         break;
     case OPC_DCLZ:
-        gen_helper_dclz(t0, t0);
+        gen_helper_dclz(cpu_gpr[rd], t0);
         opn = "dclz";
         break;
 #endif
-    default:
-        MIPS_INVAL(opn);
-        generate_exception(ctx, EXCP_RI);
-        goto out;
     }
-    gen_store_gpr(t0, rd);
     MIPS_DEBUG("%s %s, %s", opn, regnames[rd], regnames[rs]);
-
- out:
     tcg_temp_free(t0);
 }
 
@@ -2179,8 +2149,8 @@ static void gen_trap (DisasContext *ctx, uint32_t opc,
                       int rs, int rt, int16_t imm)
 {
     int cond;
-    TCGv t0 = tcg_temp_local_new();
-    TCGv t1 = tcg_temp_local_new();
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
 
     cond = 0;
     /* Load needed operands */
@@ -2221,7 +2191,7 @@ static void gen_trap (DisasContext *ctx, uint32_t opc,
         case OPC_TGEU:  /* rs >= rs unsigned */
         case OPC_TGEIU: /* r0 >= 0  unsigned */
             /* Always trap */
-            tcg_gen_movi_tl(t0, 1);
+            generate_exception(ctx, EXCP_TRAP);
             break;
         case OPC_TLT:   /* rs < rs           */
         case OPC_TLTI:  /* r0 < 0            */
@@ -2230,54 +2200,40 @@ static void gen_trap (DisasContext *ctx, uint32_t opc,
         case OPC_TNE:   /* rs != rs          */
         case OPC_TNEI:  /* r0 != 0           */
             /* Never trap: treat as NOP. */
-            goto out;
-        default:
-            MIPS_INVAL("trap");
-            generate_exception(ctx, EXCP_RI);
-            goto out;
+            break;
         }
     } else {
+        int l1 = gen_new_label();
+
         switch (opc) {
         case OPC_TEQ:
         case OPC_TEQI:
-            gen_op_eq(t0, t1);
+            tcg_gen_brcond_tl(TCG_COND_NE, t0, t1, l1);
             break;
         case OPC_TGE:
         case OPC_TGEI:
-            gen_op_ge(t0, t1);
+            tcg_gen_brcond_tl(TCG_COND_LT, t0, t1, l1);
             break;
         case OPC_TGEU:
         case OPC_TGEIU:
-            gen_op_geu(t0, t1);
+            tcg_gen_brcond_tl(TCG_COND_LTU, t0, t1, l1);
             break;
         case OPC_TLT:
         case OPC_TLTI:
-            gen_op_lt(t0, t1);
+            tcg_gen_brcond_tl(TCG_COND_GE, t0, t1, l1);
             break;
         case OPC_TLTU:
         case OPC_TLTIU:
-            gen_op_ltu(t0, t1);
+            tcg_gen_brcond_tl(TCG_COND_GEU, t0, t1, l1);
             break;
         case OPC_TNE:
         case OPC_TNEI:
-            gen_op_ne(t0, t1);
+            tcg_gen_brcond_tl(TCG_COND_EQ, t0, t1, l1);
             break;
-        default:
-            MIPS_INVAL("trap");
-            generate_exception(ctx, EXCP_RI);
-            goto out;
         }
-    }
-    save_cpu_state(ctx, 1);
-    {
-        int l1 = gen_new_label();
-
-        tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, l1);
-        gen_helper_0i(raise_exception, EXCP_TRAP);
+        generate_exception(ctx, EXCP_TRAP);
         gen_set_label(l1);
     }
-    ctx->bstate = BS_STOP;
- out:
     tcg_temp_free(t0);
     tcg_temp_free(t1);
 }
@@ -2303,8 +2259,8 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
     target_ulong btgt = -1;
     int blink = 0;
     int bcond_compute = 0;
-    TCGv t0 = tcg_temp_local_new();
-    TCGv t1 = tcg_temp_local_new();
+    TCGv t0 = tcg_temp_new();
+    TCGv t1 = tcg_temp_new();
 
     if (ctx->hflags & MIPS_HFLAG_BMASK) {
 #ifdef MIPS_DEBUG_DISAS
@@ -2396,13 +2352,11 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
             MIPS_DEBUG("bnever (NOP)");
             goto out;
         case OPC_BLTZAL:  /* 0 < 0           */
-            tcg_gen_movi_tl(t0, ctx->pc + 8);
-            gen_store_gpr(t0, 31);
+            tcg_gen_movi_tl(cpu_gpr[31], ctx->pc + 8);
             MIPS_DEBUG("bnever and link");
             goto out;
         case OPC_BLTZALL: /* 0 < 0 likely */
-            tcg_gen_movi_tl(t0, ctx->pc + 8);
-            gen_store_gpr(t0, 31);
+            tcg_gen_movi_tl(cpu_gpr[31], ctx->pc + 8);
             /* Skip the instruction in the delay slot */
             MIPS_DEBUG("bnever, link and skip");
             ctx->pc += 4;
@@ -2440,82 +2394,80 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
     } else {
         switch (opc) {
         case OPC_BEQ:
-            gen_op_eq(t0, t1);
+            gen_op_eq(bcond, t0, t1);
             MIPS_DEBUG("beq %s, %s, " TARGET_FMT_lx,
                        regnames[rs], regnames[rt], btgt);
             goto not_likely;
         case OPC_BEQL:
-            gen_op_eq(t0, t1);
+            gen_op_eq(bcond, t0, t1);
             MIPS_DEBUG("beql %s, %s, " TARGET_FMT_lx,
                        regnames[rs], regnames[rt], btgt);
             goto likely;
         case OPC_BNE:
-            gen_op_ne(t0, t1);
+            gen_op_ne(bcond, t0, t1);
             MIPS_DEBUG("bne %s, %s, " TARGET_FMT_lx,
                        regnames[rs], regnames[rt], btgt);
             goto not_likely;
         case OPC_BNEL:
-            gen_op_ne(t0, t1);
+            gen_op_ne(bcond, t0, t1);
             MIPS_DEBUG("bnel %s, %s, " TARGET_FMT_lx,
                        regnames[rs], regnames[rt], btgt);
             goto likely;
         case OPC_BGEZ:
-            gen_op_gez(t0);
+            gen_op_gez(bcond, t0);
             MIPS_DEBUG("bgez %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto not_likely;
         case OPC_BGEZL:
-            gen_op_gez(t0);
+            gen_op_gez(bcond, t0);
             MIPS_DEBUG("bgezl %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto likely;
         case OPC_BGEZAL:
-            gen_op_gez(t0);
+            gen_op_gez(bcond, t0);
             MIPS_DEBUG("bgezal %s, " TARGET_FMT_lx, regnames[rs], btgt);
             blink = 31;
             goto not_likely;
         case OPC_BGEZALL:
-            gen_op_gez(t0);
+            gen_op_gez(bcond, t0);
             blink = 31;
             MIPS_DEBUG("bgezall %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto likely;
         case OPC_BGTZ:
-            gen_op_gtz(t0);
+            gen_op_gtz(bcond, t0);
             MIPS_DEBUG("bgtz %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto not_likely;
         case OPC_BGTZL:
-            gen_op_gtz(t0);
+            gen_op_gtz(bcond, t0);
             MIPS_DEBUG("bgtzl %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto likely;
         case OPC_BLEZ:
-            gen_op_lez(t0);
+            gen_op_lez(bcond, t0);
             MIPS_DEBUG("blez %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto not_likely;
         case OPC_BLEZL:
-            gen_op_lez(t0);
+            gen_op_lez(bcond, t0);
             MIPS_DEBUG("blezl %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto likely;
         case OPC_BLTZ:
-            gen_op_ltz(t0);
+            gen_op_ltz(bcond, t0);
             MIPS_DEBUG("bltz %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto not_likely;
         case OPC_BLTZL:
-            gen_op_ltz(t0);
+            gen_op_ltz(bcond, t0);
             MIPS_DEBUG("bltzl %s, " TARGET_FMT_lx, regnames[rs], btgt);
             goto likely;
         case OPC_BLTZAL:
-            gen_op_ltz(t0);
+            gen_op_ltz(bcond, t0);
             blink = 31;
             MIPS_DEBUG("bltzal %s, " TARGET_FMT_lx, regnames[rs], btgt);
         not_likely:
             ctx->hflags |= MIPS_HFLAG_BC;
-            tcg_gen_trunc_tl_i32(bcond, t0);
             break;
         case OPC_BLTZALL:
-            gen_op_ltz(t0);
+            gen_op_ltz(bcond, t0);
             blink = 31;
             MIPS_DEBUG("bltzall %s, " TARGET_FMT_lx, regnames[rs], btgt);
         likely:
             ctx->hflags |= MIPS_HFLAG_BL;
-            tcg_gen_trunc_tl_i32(bcond, t0);
             break;
         default:
             MIPS_INVAL("conditional branch/jump");
@@ -2528,8 +2480,7 @@ static void gen_compute_branch (DisasContext *ctx, uint32_t opc,
 
     ctx->btarget = btgt;
     if (blink > 0) {
-        tcg_gen_movi_tl(t0, ctx->pc + 8);
-        gen_store_gpr(t0, blink);
+        tcg_gen_movi_tl(cpu_gpr[blink], ctx->pc + 8);
     }
 
  out:
@@ -2632,56 +2583,72 @@ fail:
 
 static void gen_bshfl (DisasContext *ctx, uint32_t op2, int rt, int rd)
 {
-    TCGv t0 = tcg_temp_new();
-    TCGv t1 = tcg_temp_new();
+    TCGv t0;
 
-    gen_load_gpr(t1, rt);
+    if (rd == 0) {
+        /* If no destination, treat it as a NOP. */
+        MIPS_DEBUG("NOP");
+        return;
+    }
+
+    t0 = tcg_temp_new();
+    gen_load_gpr(t0, rt);
     switch (op2) {
     case OPC_WSBH:
-        tcg_gen_shri_tl(t0, t1, 8);
-        tcg_gen_andi_tl(t0, t0, 0x00FF00FF);
-        tcg_gen_shli_tl(t1, t1, 8);
-        tcg_gen_andi_tl(t1, t1, ~0x00FF00FF);
-        tcg_gen_or_tl(t0, t0, t1);
-        tcg_gen_ext32s_tl(t0, t0);
+        {
+            TCGv t1 = tcg_temp_new();
+
+            tcg_gen_shri_tl(t1, t0, 8);
+            tcg_gen_andi_tl(t1, t1, 0x00FF00FF);
+            tcg_gen_shli_tl(t0, t0, 8);
+            tcg_gen_andi_tl(t0, t0, ~0x00FF00FF);
+            tcg_gen_or_tl(t0, t0, t1);
+            tcg_temp_free(t1);
+            tcg_gen_ext32s_tl(cpu_gpr[rd], t0);
+        }
         break;
     case OPC_SEB:
-        tcg_gen_ext8s_tl(t0, t1);
+        tcg_gen_ext8s_tl(cpu_gpr[rd], t0);
         break;
     case OPC_SEH:
-        tcg_gen_ext16s_tl(t0, t1);
+        tcg_gen_ext16s_tl(cpu_gpr[rd], t0);
         break;
 #if defined(TARGET_MIPS64)
     case OPC_DSBH:
-        gen_load_gpr(t1, rt);
-        tcg_gen_shri_tl(t0, t1, 8);
-        tcg_gen_andi_tl(t0, t0, 0x00FF00FF00FF00FFULL);
-        tcg_gen_shli_tl(t1, t1, 8);
-        tcg_gen_andi_tl(t1, t1, ~0x00FF00FF00FF00FFULL);
-        tcg_gen_or_tl(t0, t0, t1);
+        {
+            TCGv t1 = tcg_temp_new();
+
+            tcg_gen_shri_tl(t1, t0, 8);
+            tcg_gen_andi_tl(t1, t1, 0x00FF00FF00FF00FFULL);
+            tcg_gen_shli_tl(t0, t0, 8);
+            tcg_gen_andi_tl(t0, t0, ~0x00FF00FF00FF00FFULL);
+            tcg_gen_or_tl(cpu_gpr[rd], t0, t1);
+            tcg_temp_free(t1);
+        }
         break;
     case OPC_DSHD:
-        gen_load_gpr(t1, rt);
-        tcg_gen_shri_tl(t0, t1, 16);
-        tcg_gen_andi_tl(t0, t0, 0x0000FFFF0000FFFFULL);
-        tcg_gen_shli_tl(t1, t1, 16);
-        tcg_gen_andi_tl(t1, t1, ~0x0000FFFF0000FFFFULL);
-        tcg_gen_or_tl(t1, t0, t1);
-        tcg_gen_shri_tl(t0, t1, 32);
-        tcg_gen_shli_tl(t1, t1, 32);
-        tcg_gen_or_tl(t0, t0, t1);
+        {
+            TCGv t1 = tcg_temp_new();
+
+            tcg_gen_shri_tl(t1, t0, 16);
+            tcg_gen_andi_tl(t1, t1, 0x0000FFFF0000FFFFULL);
+            tcg_gen_shli_tl(t0, t0, 16);
+            tcg_gen_andi_tl(t0, t0, ~0x0000FFFF0000FFFFULL);
+            tcg_gen_or_tl(t0, t0, t1);
+            tcg_gen_shri_tl(t1, t0, 32);
+            tcg_gen_shli_tl(t0, t0, 32);
+            tcg_gen_or_tl(cpu_gpr[rd], t0, t1);
+            tcg_temp_free(t1);
+        }
         break;
 #endif
     default:
         MIPS_INVAL("bsfhl");
         generate_exception(ctx, EXCP_RI);
         tcg_temp_free(t0);
-        tcg_temp_free(t1);
         return;
     }
-    gen_store_gpr(t0, rd);
     tcg_temp_free(t0);
-    tcg_temp_free(t1);
 }
 
 #ifndef CONFIG_USER_ONLY
@@ -5451,25 +5418,25 @@ static void gen_cp0 (CPUState *env, DisasContext *ctx, uint32_t opc, int rt, int
         break;
     case OPC_TLBWI:
         opn = "tlbwi";
-        if (!env->tlb->do_tlbwi)
+        if (!env->tlb->helper_tlbwi)
             goto die;
         gen_helper_tlbwi();
         break;
     case OPC_TLBWR:
         opn = "tlbwr";
-        if (!env->tlb->do_tlbwr)
+        if (!env->tlb->helper_tlbwr)
             goto die;
         gen_helper_tlbwr();
         break;
     case OPC_TLBP:
         opn = "tlbp";
-        if (!env->tlb->do_tlbp)
+        if (!env->tlb->helper_tlbp)
             goto die;
         gen_helper_tlbp();
         break;
     case OPC_TLBR:
         opn = "tlbr";
-        if (!env->tlb->do_tlbr)
+        if (!env->tlb->helper_tlbr)
             goto die;
         gen_helper_tlbr();
         break;
@@ -5527,132 +5494,88 @@ static void gen_compute_branch1 (CPUState *env, DisasContext *ctx, uint32_t op,
 
     switch (op) {
     case OPC_BC1F:
-        {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0x1 << cc);
-            tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
-            tcg_gen_movi_i32(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_i32(bcond, 1);
-            gen_set_label(l2);
-        }
+        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+        tcg_gen_not_i32(t0, t0);
+        tcg_gen_andi_i32(t0, t0, 1);
+        tcg_gen_extu_i32_tl(bcond, t0);
         opn = "bc1f";
         goto not_likely;
     case OPC_BC1FL:
-        {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0x1 << cc);
-            tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
-            tcg_gen_movi_i32(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_i32(bcond, 1);
-            gen_set_label(l2);
-        }
+        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+        tcg_gen_not_i32(t0, t0);
+        tcg_gen_andi_i32(t0, t0, 1);
+        tcg_gen_extu_i32_tl(bcond, t0);
         opn = "bc1fl";
         goto likely;
     case OPC_BC1T:
-        {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0x1 << cc);
-            tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, l1);
-            tcg_gen_movi_i32(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_i32(bcond, 1);
-            gen_set_label(l2);
-        }
+        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+        tcg_gen_andi_i32(t0, t0, 1);
+        tcg_gen_extu_i32_tl(bcond, t0);
         opn = "bc1t";
         goto not_likely;
     case OPC_BC1TL:
-        {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0x1 << cc);
-            tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, l1);
-            tcg_gen_movi_i32(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_i32(bcond, 1);
-            gen_set_label(l2);
-        }
+        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+        tcg_gen_andi_i32(t0, t0, 1);
+        tcg_gen_extu_i32_tl(bcond, t0);
         opn = "bc1tl";
     likely:
         ctx->hflags |= MIPS_HFLAG_BL;
         break;
     case OPC_BC1FANY2:
         {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0x3 << cc);
-            tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
-            tcg_gen_movi_i32(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_i32(bcond, 1);
-            gen_set_label(l2);
+            TCGv_i32 t1 = tcg_temp_new_i32();
+            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+1));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_temp_free_i32(t1);
+            tcg_gen_not_i32(t0, t0);
+            tcg_gen_andi_i32(t0, t0, 1);
+            tcg_gen_extu_i32_tl(bcond, t0);
         }
         opn = "bc1any2f";
         goto not_likely;
     case OPC_BC1TANY2:
         {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0x3 << cc);
-            tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, l1);
-            tcg_gen_movi_i32(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_i32(bcond, 1);
-            gen_set_label(l2);
+            TCGv_i32 t1 = tcg_temp_new_i32();
+            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+1));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_temp_free_i32(t1);
+            tcg_gen_andi_i32(t0, t0, 1);
+            tcg_gen_extu_i32_tl(bcond, t0);
         }
         opn = "bc1any2t";
         goto not_likely;
     case OPC_BC1FANY4:
         {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0xf << cc);
-            tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
-            tcg_gen_movi_i32(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_i32(bcond, 1);
-            gen_set_label(l2);
+            TCGv_i32 t1 = tcg_temp_new_i32();
+            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+1));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+2));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+3));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_temp_free_i32(t1);
+            tcg_gen_not_i32(t0, t0);
+            tcg_gen_andi_i32(t0, t0, 1);
+            tcg_gen_extu_i32_tl(bcond, t0);
         }
         opn = "bc1any4f";
         goto not_likely;
     case OPC_BC1TANY4:
         {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0xf << cc);
-            tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, l1);
-            tcg_gen_movi_i32(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_i32(bcond, 1);
-            gen_set_label(l2);
+            TCGv_i32 t1 = tcg_temp_new_i32();
+            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+1));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+2));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+3));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_temp_free_i32(t1);
+            tcg_gen_andi_i32(t0, t0, 1);
+            tcg_gen_extu_i32_tl(bcond, t0);
         }
         opn = "bc1any4t";
     not_likely:
@@ -5770,125 +5693,99 @@ static void gen_cp1 (DisasContext *ctx, uint32_t opc, int rt, int fs)
 
 static void gen_movci (DisasContext *ctx, int rd, int rs, int cc, int tf)
 {
-    int l1 = gen_new_label();
-    uint32_t ccbit;
+    int l1;
     TCGCond cond;
-    TCGv t0 = tcg_temp_local_new();
-    TCGv_i32 r_tmp = tcg_temp_new_i32();
+    TCGv_i32 t0;
+
+    if (rd == 0) {
+        /* Treat as NOP. */
+        return;
+    }
 
-    if (cc)
-        ccbit = 1 << (24 + cc);
-    else
-        ccbit = 1 << 23;
     if (tf)
         cond = TCG_COND_EQ;
     else
         cond = TCG_COND_NE;
 
-    gen_load_gpr(t0, rd);
-    tcg_gen_andi_i32(r_tmp, fpu_fcr31, ccbit);
-    tcg_gen_brcondi_i32(cond, r_tmp, 0, l1);
-    tcg_temp_free_i32(r_tmp);
-    gen_load_gpr(t0, rs);
+    l1 = gen_new_label();
+    t0 = tcg_temp_new_i32();
+    tcg_gen_andi_i32(t0, fpu_fcr31, get_fp_bit(cc));
+    tcg_gen_brcondi_i32(cond, t0, 0, l1);
+    if (rs == 0) {
+        tcg_gen_movi_tl(cpu_gpr[rd], 0);
+    } else {
+        tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
+    }
     gen_set_label(l1);
-    gen_store_gpr(t0, rd);
-    tcg_temp_free(t0);
+    tcg_temp_free_i32(t0);
 }
 
 static inline void gen_movcf_s (int fs, int fd, int cc, int tf)
 {
-    uint32_t ccbit;
     int cond;
-    TCGv_i32 r_tmp1 = tcg_temp_new_i32();
-    TCGv_i32 fp0 = tcg_temp_local_new_i32();
+    TCGv_i32 t0 = tcg_temp_new_i32();
     int l1 = gen_new_label();
 
-    if (cc)
-        ccbit = 1 << (24 + cc);
-    else
-        ccbit = 1 << 23;
-
     if (tf)
         cond = TCG_COND_EQ;
     else
         cond = TCG_COND_NE;
 
-    gen_load_fpr32(fp0, fd);
-    tcg_gen_andi_i32(r_tmp1, fpu_fcr31, ccbit);
-    tcg_gen_brcondi_i32(cond, r_tmp1, 0, l1);
-    tcg_temp_free_i32(r_tmp1);
-    gen_load_fpr32(fp0, fs);
+    tcg_gen_andi_i32(t0, fpu_fcr31, get_fp_bit(cc));
+    tcg_gen_brcondi_i32(cond, t0, 0, l1);
+    gen_load_fpr32(t0, fs);
+    gen_store_fpr32(t0, fd);
     gen_set_label(l1);
-    gen_store_fpr32(fp0, fd);
-    tcg_temp_free_i32(fp0);
+    tcg_temp_free_i32(t0);
 }
 
 static inline void gen_movcf_d (DisasContext *ctx, int fs, int fd, int cc, int tf)
 {
-    uint32_t ccbit;
     int cond;
-    TCGv_i32 r_tmp1 = tcg_temp_new_i32();
-    TCGv_i64 fp0 = tcg_temp_local_new_i64();
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i64 fp0;
     int l1 = gen_new_label();
 
-    if (cc)
-        ccbit = 1 << (24 + cc);
-    else
-        ccbit = 1 << 23;
-
     if (tf)
         cond = TCG_COND_EQ;
     else
         cond = TCG_COND_NE;
 
-    gen_load_fpr64(ctx, fp0, fd);
-    tcg_gen_andi_i32(r_tmp1, fpu_fcr31, ccbit);
-    tcg_gen_brcondi_i32(cond, r_tmp1, 0, l1);
-    tcg_temp_free_i32(r_tmp1);
+    tcg_gen_andi_i32(t0, fpu_fcr31, get_fp_bit(cc));
+    tcg_gen_brcondi_i32(cond, t0, 0, l1);
+    fp0 = tcg_temp_local_new_i64();
     gen_load_fpr64(ctx, fp0, fs);
-    gen_set_label(l1);
     gen_store_fpr64(ctx, fp0, fd);
     tcg_temp_free_i64(fp0);
+    gen_set_label(l1);
+    tcg_temp_free_i32(t0);
 }
 
 static inline void gen_movcf_ps (int fs, int fd, int cc, int tf)
 {
-    uint32_t ccbit1, ccbit2;
     int cond;
-    TCGv_i32 r_tmp1 = tcg_temp_new_i32();
-    TCGv_i32 fp0 = tcg_temp_local_new_i32();
+    TCGv_i32 t0 = tcg_temp_new_i32();
     int l1 = gen_new_label();
     int l2 = gen_new_label();
 
-    if (cc) {
-        ccbit1 = 1 << (24 + cc);
-        ccbit2 = 1 << (25 + cc);
-    } else {
-        ccbit1 = 1 << 23;
-        ccbit2 = 1 << 25;
-    }
-
     if (tf)
         cond = TCG_COND_EQ;
     else
         cond = TCG_COND_NE;
 
-    gen_load_fpr32(fp0, fd);
-    tcg_gen_andi_i32(r_tmp1, fpu_fcr31, ccbit1);
-    tcg_gen_brcondi_i32(cond, r_tmp1, 0, l1);
-    gen_load_fpr32(fp0, fs);
+    tcg_gen_andi_i32(t0, fpu_fcr31, get_fp_bit(cc));
+    tcg_gen_brcondi_i32(cond, t0, 0, l1);
+    gen_load_fpr32(t0, fs);
+    gen_store_fpr32(t0, fd);
     gen_set_label(l1);
-    gen_store_fpr32(fp0, fd);
 
-    gen_load_fpr32h(fp0, fd);
-    tcg_gen_andi_i32(r_tmp1, fpu_fcr31, ccbit2);
-    tcg_gen_brcondi_i32(cond, r_tmp1, 0, l2);
-    gen_load_fpr32h(fp0, fs);
+    tcg_gen_andi_i32(t0, fpu_fcr31, get_fp_bit(cc+1));
+    tcg_gen_brcondi_i32(cond, t0, 0, l2);
+    gen_load_fpr32h(t0, fs);
+    gen_store_fpr32h(t0, fd);
     gen_set_label(l2);
-    gen_store_fpr32h(fp0, fd);
 
-    tcg_temp_free_i32(r_tmp1);
-    tcg_temp_free_i32(fp0);
+    tcg_temp_free_i32(t0);
 }
 
 
@@ -7582,14 +7479,8 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
         int l1 = gen_new_label();
 
         MIPS_DEBUG("blikely condition (" TARGET_FMT_lx ")", ctx->pc + 4);
-        tcg_gen_brcondi_i32(TCG_COND_NE, bcond, 0, l1);
-        {
-            TCGv_i32 r_tmp = tcg_temp_new_i32();
-
-            tcg_gen_movi_i32(r_tmp, ctx->hflags & ~MIPS_HFLAG_BMASK);
-            tcg_gen_st_i32(r_tmp, cpu_env, offsetof(CPUState, hflags));
-            tcg_temp_free_i32(r_tmp);
-        }
+        tcg_gen_brcondi_tl(TCG_COND_NE, bcond, 0, l1);
+        tcg_gen_movi_i32(hflags, ctx->hflags & ~MIPS_HFLAG_BMASK);
         gen_goto_tb(ctx, 1, ctx->pc + 4);
         gen_set_label(l1);
     }
@@ -7718,7 +7609,8 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
         case OPC_MUL:
             gen_arith(env, ctx, op1, rd, rs, rt);
             break;
-        case OPC_CLZ ... OPC_CLO:
+        case OPC_CLO:
+        case OPC_CLZ:
             check_insn(env, ctx, ISA_MIPS32);
             gen_cl(ctx, op1, rd, rs);
             break;
@@ -7735,7 +7627,8 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
             /* Treat as NOP. */
             break;
 #if defined(TARGET_MIPS64)
-        case OPC_DCLZ ... OPC_DCLO:
+        case OPC_DCLO:
+        case OPC_DCLZ:
             check_insn(env, ctx, ISA_MIPS64);
             check_mips_64(ctx);
             gen_cl(ctx, op1, rd, rs);
@@ -8142,7 +8035,7 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
             {
                 int l1 = gen_new_label();
 
-                tcg_gen_brcondi_i32(TCG_COND_NE, bcond, 0, l1);
+                tcg_gen_brcondi_tl(TCG_COND_NE, bcond, 0, l1);
                 gen_goto_tb(ctx, 1, ctx->pc + 4);
                 gen_set_label(l1);
                 gen_goto_tb(ctx, 0, ctx->btarget);
@@ -8435,22 +8328,13 @@ static void mips_tcg_init(void)
     cpu_dspctrl = tcg_global_mem_new(TCG_AREG0,
                                      offsetof(CPUState, active_tc.DSPControl),
                                      "DSPControl");
-    bcond = tcg_global_mem_new_i32(TCG_AREG0,
-                                   offsetof(CPUState, bcond), "bcond");
+    bcond = tcg_global_mem_new(TCG_AREG0,
+                               offsetof(CPUState, bcond), "bcond");
     btarget = tcg_global_mem_new(TCG_AREG0,
                                  offsetof(CPUState, btarget), "btarget");
-    for (i = 0; i < 32; i++)
-        fpu_fpr32[i] = tcg_global_mem_new_i32(TCG_AREG0,
-            offsetof(CPUState, active_fpu.fpr[i].w[FP_ENDIAN_IDX]),
-            fregnames[i]);
-    for (i = 0; i < 32; i++)
-        fpu_fpr64[i] = tcg_global_mem_new_i64(TCG_AREG0,
-            offsetof(CPUState, active_fpu.fpr[i]),
-            fregnames_64[i]);
-    for (i = 0; i < 32; i++)
-        fpu_fpr32h[i] = tcg_global_mem_new_i32(TCG_AREG0,
-            offsetof(CPUState, active_fpu.fpr[i].w[!FP_ENDIAN_IDX]),
-            fregnames_h[i]);
+    hflags = tcg_global_mem_new_i32(TCG_AREG0,
+                                    offsetof(CPUState, hflags), "hflags");
+
     fpu_fcr0 = tcg_global_mem_new_i32(TCG_AREG0,
                                       offsetof(CPUState, active_fpu.fcr0),
                                       "fcr0");