target-mips: optimize gen_movcf_*()
[qemu] / target-mips / translate.c
index 677981c..8adc89c 100644 (file)
@@ -429,9 +429,8 @@ enum {
 static TCGv_ptr cpu_env;
 static TCGv cpu_gpr[32], cpu_PC;
 static TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC], cpu_ACX[MIPS_DSP_ACC];
-static TCGv cpu_dspctrl, btarget;
-static TCGv bcond;
-static TCGv_i32 fpu_fpr32[32], fpu_fpr32h[32];
+static TCGv cpu_dspctrl, btarget, bcond;
+static TCGv_i32 hflags;
 static TCGv_i32 fpu_fcr0, fpu_fcr31;
 
 #include "gen-icount.h"
@@ -500,12 +499,6 @@ static const char *fregnames[] =
       "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
       "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", };
 
-static const char *fregnames_h[] =
-    { "h0",  "h1",  "h2",  "h3",  "h4",  "h5",  "h6",  "h7",
-      "h8",  "h9",  "h10", "h11", "h12", "h13", "h14", "h15",
-      "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
-      "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31", };
-
 #ifdef MIPS_DEBUG_DISAS
 #define MIPS_DEBUG(fmt, args...)                         \
         qemu_log_mask(CPU_LOG_TB_IN_ASM,                \
@@ -600,58 +593,62 @@ static inline void gen_store_srsgpr (int from, int to)
 /* Floating point register moves. */
 static inline void gen_load_fpr32 (TCGv_i32 t, int reg)
 {
-    tcg_gen_mov_i32(t, fpu_fpr32[reg]);
+    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
 }
 
 static inline void gen_store_fpr32 (TCGv_i32 t, int reg)
 {
-    tcg_gen_mov_i32(fpu_fpr32[reg], t);
+    tcg_gen_st_i32(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
+}
+
+static inline void gen_load_fpr32h (TCGv_i32 t, int reg)
+{
+    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
+}
+
+static inline void gen_store_fpr32h (TCGv_i32 t, int reg)
+{
+    tcg_gen_st_i32(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
 }
 
 static inline void gen_load_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
 {
     if (ctx->hflags & MIPS_HFLAG_F64) {
-        tcg_gen_concat_i32_i64(t, fpu_fpr32[reg], fpu_fpr32h[reg]);
+        tcg_gen_ld_i64(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].d));
     } else {
-        tcg_gen_concat_i32_i64(t, fpu_fpr32[reg & ~1], fpu_fpr32[reg | 1]);
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        TCGv_i32 t1 = tcg_temp_new_i32();
+        gen_load_fpr32(t0, reg & ~1);
+        gen_load_fpr32(t1, reg | 1);
+        tcg_gen_concat_i32_i64(t, t0, t1);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
     }
 }
 
 static inline void gen_store_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
 {
     if (ctx->hflags & MIPS_HFLAG_F64) {
-        tcg_gen_trunc_i64_i32(fpu_fpr32[reg], t);
-        tcg_gen_shri_i64(t, t, 32);
-        tcg_gen_trunc_i64_i32(fpu_fpr32h[reg], t);
+        tcg_gen_st_i64(t, cpu_env, offsetof(CPUState, active_fpu.fpr[reg].d));
     } else {
-        tcg_gen_trunc_i64_i32(fpu_fpr32[reg & ~1], t);
-        tcg_gen_shri_i64(t, t, 32);
-        tcg_gen_trunc_i64_i32(fpu_fpr32[reg | 1], t);
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i32 t1 = tcg_temp_new_i32();
+        tcg_gen_trunc_i64_i32(t1, t);
+        gen_store_fpr32(t1, reg & ~1);
+        tcg_gen_shri_i64(t0, t, 32);
+        tcg_gen_trunc_i64_i32(t1, t0);
+        gen_store_fpr32(t1, reg | 1);
+        tcg_temp_free_i32(t1);
+        tcg_temp_free_i64(t0);
     }
 }
 
-static inline void gen_load_fpr32h (TCGv_i32 t, int reg)
-{
-    tcg_gen_mov_i32(t, fpu_fpr32h[reg]);
-}
-
-static inline void gen_store_fpr32h (TCGv_i32 t, int reg)
-{
-    tcg_gen_mov_i32(fpu_fpr32h[reg], t);
-}
-
-static inline void get_fp_cond (TCGv_i32 t)
+static inline int get_fp_bit (int cc)
 {
-    TCGv_i32 r_tmp1 = tcg_temp_new_i32();
-    TCGv_i32 r_tmp2 = tcg_temp_new_i32();
-
-    tcg_gen_shri_i32(r_tmp2, fpu_fcr31, 24);
-    tcg_gen_andi_i32(r_tmp2, r_tmp2, 0xfe);
-    tcg_gen_shri_i32(r_tmp1, fpu_fcr31, 23);
-    tcg_gen_andi_i32(r_tmp1, r_tmp1, 0x1);
-    tcg_gen_or_i32(t, r_tmp1, r_tmp2);
-    tcg_temp_free_i32(r_tmp1);
-    tcg_temp_free_i32(r_tmp2);
+    if (cc)
+        return 24 + cc;
+    else
+        return 23;
 }
 
 #define FOP_CONDS(type, fmt, bits)                                            \
@@ -758,11 +755,7 @@ static inline void save_cpu_state (DisasContext *ctx, int do_save_pc)
         ctx->saved_pc = ctx->pc;
     }
     if (ctx->hflags != ctx->saved_hflags) {
-        TCGv_i32 r_tmp = tcg_temp_new_i32();
-
-        tcg_gen_movi_i32(r_tmp, ctx->hflags);
-        tcg_gen_st_i32(r_tmp, cpu_env, offsetof(CPUState, hflags));
-        tcg_temp_free_i32(r_tmp);
+        tcg_gen_movi_i32(hflags, ctx->hflags);
         ctx->saved_hflags = ctx->hflags;
         switch (ctx->hflags & MIPS_HFLAG_BMASK) {
         case MIPS_HFLAG_BR:
@@ -1795,10 +1788,6 @@ static void gen_HILO (DisasContext *ctx, uint32_t opc, int reg)
             tcg_gen_movi_tl(cpu_LO[0], 0);
         opn = "mtlo";
         break;
-    default:
-        MIPS_INVAL(opn);
-        generate_exception(ctx, EXCP_RI);
-        return;
     }
     MIPS_DEBUG("%s %s", opn, regnames[reg]);
 }
@@ -1807,8 +1796,23 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
                         int rs, int rt)
 {
     const char *opn = "mul/div";
-    TCGv t0 = tcg_temp_local_new();
-    TCGv t1 = tcg_temp_local_new();
+    TCGv t0, t1;
+
+    switch (opc) {
+    case OPC_DIV:
+    case OPC_DIVU:
+#if defined(TARGET_MIPS64)
+    case OPC_DDIV:
+    case OPC_DDIVU:
+#endif
+        t0 = tcg_temp_local_new();
+        t1 = tcg_temp_local_new();
+        break;
+    default:
+        t0 = tcg_temp_new();
+        t1 = tcg_temp_new();
+        break;
+    }
 
     gen_load_gpr(t0, rs);
     gen_load_gpr(t1, rt);
@@ -1816,30 +1820,22 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
     case OPC_DIV:
         {
             int l1 = gen_new_label();
+            int l2 = gen_new_label();
 
+            tcg_gen_ext32s_tl(t0, t0);
+            tcg_gen_ext32s_tl(t1, t1);
             tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            {
-                int l2 = gen_new_label();
-                TCGv_i32 r_tmp1 = tcg_temp_local_new_i32();
-                TCGv_i32 r_tmp2 = tcg_temp_local_new_i32();
-                TCGv_i32 r_tmp3 = tcg_temp_local_new_i32();
-
-                tcg_gen_trunc_tl_i32(r_tmp1, t0);
-                tcg_gen_trunc_tl_i32(r_tmp2, t1);
-                tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp1, -1 << 31, l2);
-                tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp2, -1, l2);
-                tcg_gen_ext32s_tl(cpu_LO[0], t0);
-                tcg_gen_movi_tl(cpu_HI[0], 0);
-                tcg_gen_br(l1);
-                gen_set_label(l2);
-                tcg_gen_div_i32(r_tmp3, r_tmp1, r_tmp2);
-                tcg_gen_rem_i32(r_tmp2, r_tmp1, r_tmp2);
-                tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3);
-                tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp2);
-                tcg_temp_free_i32(r_tmp1);
-                tcg_temp_free_i32(r_tmp2);
-                tcg_temp_free_i32(r_tmp3);
-            }
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, INT_MIN, l2);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1, l2);
+
+            tcg_gen_mov_tl(cpu_LO[0], t0);
+            tcg_gen_movi_tl(cpu_HI[0], 0);
+            tcg_gen_br(l1);
+            gen_set_label(l2);
+            tcg_gen_div_tl(cpu_LO[0], t0, t1);
+            tcg_gen_rem_tl(cpu_HI[0], t0, t1);
+            tcg_gen_ext32s_tl(cpu_LO[0], cpu_LO[0]);
+            tcg_gen_ext32s_tl(cpu_HI[0], cpu_HI[0]);
             gen_set_label(l1);
         }
         opn = "div";
@@ -1848,40 +1844,30 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         {
             int l1 = gen_new_label();
 
-            tcg_gen_ext32s_tl(t1, t1);
+            tcg_gen_ext32u_tl(t0, t0);
+            tcg_gen_ext32u_tl(t1, t1);
             tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            {
-                TCGv_i32 r_tmp1 = tcg_temp_new_i32();
-                TCGv_i32 r_tmp2 = tcg_temp_new_i32();
-                TCGv_i32 r_tmp3 = tcg_temp_new_i32();
-
-                tcg_gen_trunc_tl_i32(r_tmp1, t0);
-                tcg_gen_trunc_tl_i32(r_tmp2, t1);
-                tcg_gen_divu_i32(r_tmp3, r_tmp1, r_tmp2);
-                tcg_gen_remu_i32(r_tmp1, r_tmp1, r_tmp2);
-                tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3);
-                tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp1);
-                tcg_temp_free_i32(r_tmp1);
-                tcg_temp_free_i32(r_tmp2);
-                tcg_temp_free_i32(r_tmp3);
-            }
+            tcg_gen_divu_tl(cpu_LO[0], t0, t1);
+            tcg_gen_remu_tl(cpu_HI[0], t0, t1);
+            tcg_gen_ext32s_tl(cpu_LO[0], cpu_LO[0]);
+            tcg_gen_ext32s_tl(cpu_HI[0], cpu_HI[0]);
             gen_set_label(l1);
         }
         opn = "divu";
         break;
     case OPC_MULT:
         {
-            TCGv_i64 r_tmp1 = tcg_temp_new_i64();
-            TCGv_i64 r_tmp2 = tcg_temp_new_i64();
-
-            tcg_gen_ext_tl_i64(r_tmp1, t0);
-            tcg_gen_ext_tl_i64(r_tmp2, t1);
-            tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free_i64(r_tmp2);
-            tcg_gen_trunc_i64_tl(t0, r_tmp1);
-            tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
-            tcg_gen_trunc_i64_tl(t1, r_tmp1);
-            tcg_temp_free_i64(r_tmp1);
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
+
+            tcg_gen_ext_tl_i64(t2, t0);
+            tcg_gen_ext_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            tcg_gen_trunc_i64_tl(t0, t2);
+            tcg_gen_shri_i64(t2, t2, 32);
+            tcg_gen_trunc_i64_tl(t1, t2);
+            tcg_temp_free_i64(t2);
             tcg_gen_ext32s_tl(cpu_LO[0], t0);
             tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
@@ -1889,19 +1875,19 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         break;
     case OPC_MULTU:
         {
-            TCGv_i64 r_tmp1 = tcg_temp_new_i64();
-            TCGv_i64 r_tmp2 = tcg_temp_new_i64();
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
 
             tcg_gen_ext32u_tl(t0, t0);
             tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_extu_tl_i64(r_tmp1, t0);
-            tcg_gen_extu_tl_i64(r_tmp2, t1);
-            tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free_i64(r_tmp2);
-            tcg_gen_trunc_i64_tl(t0, r_tmp1);
-            tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
-            tcg_gen_trunc_i64_tl(t1, r_tmp1);
-            tcg_temp_free_i64(r_tmp1);
+            tcg_gen_extu_tl_i64(t2, t0);
+            tcg_gen_extu_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            tcg_gen_trunc_i64_tl(t0, t2);
+            tcg_gen_shri_i64(t2, t2, 32);
+            tcg_gen_trunc_i64_tl(t1, t2);
+            tcg_temp_free_i64(t2);
             tcg_gen_ext32s_tl(cpu_LO[0], t0);
             tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
@@ -1911,20 +1897,17 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
     case OPC_DDIV:
         {
             int l1 = gen_new_label();
+            int l2 = gen_new_label();
 
             tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
-            {
-                int l2 = gen_new_label();
-
-                tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
-                tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
-                tcg_gen_mov_tl(cpu_LO[0], t0);
-                tcg_gen_movi_tl(cpu_HI[0], 0);
-                tcg_gen_br(l1);
-                gen_set_label(l2);
-                tcg_gen_div_i64(cpu_LO[0], t0, t1);
-                tcg_gen_rem_i64(cpu_HI[0], t0, t1);
-            }
+            tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
+            tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
+            tcg_gen_mov_tl(cpu_LO[0], t0);
+            tcg_gen_movi_tl(cpu_HI[0], 0);
+            tcg_gen_br(l1);
+            gen_set_label(l2);
+            tcg_gen_div_i64(cpu_LO[0], t0, t1);
+            tcg_gen_rem_i64(cpu_HI[0], t0, t1);
             gen_set_label(l1);
         }
         opn = "ddiv";
@@ -1951,19 +1934,19 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
 #endif
     case OPC_MADD:
         {
-            TCGv_i64 r_tmp1 = tcg_temp_new_i64();
-            TCGv_i64 r_tmp2 = tcg_temp_new_i64();
-
-            tcg_gen_ext_tl_i64(r_tmp1, t0);
-            tcg_gen_ext_tl_i64(r_tmp2, t1);
-            tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
-            tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free_i64(r_tmp2);
-            tcg_gen_trunc_i64_tl(t0, r_tmp1);
-            tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
-            tcg_gen_trunc_i64_tl(t1, r_tmp1);
-            tcg_temp_free_i64(r_tmp1);
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
+
+            tcg_gen_ext_tl_i64(t2, t0);
+            tcg_gen_ext_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_add_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            tcg_gen_trunc_i64_tl(t0, t2);
+            tcg_gen_shri_i64(t2, t2, 32);
+            tcg_gen_trunc_i64_tl(t1, t2);
+            tcg_temp_free_i64(t2);
             tcg_gen_ext32s_tl(cpu_LO[0], t0);
             tcg_gen_ext32s_tl(cpu_LO[1], t1);
         }
@@ -1971,21 +1954,21 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         break;
     case OPC_MADDU:
        {
-            TCGv_i64 r_tmp1 = tcg_temp_new_i64();
-            TCGv_i64 r_tmp2 = tcg_temp_new_i64();
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
 
             tcg_gen_ext32u_tl(t0, t0);
             tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_extu_tl_i64(r_tmp1, t0);
-            tcg_gen_extu_tl_i64(r_tmp2, t1);
-            tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
-            tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free_i64(r_tmp2);
-            tcg_gen_trunc_i64_tl(t0, r_tmp1);
-            tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
-            tcg_gen_trunc_i64_tl(t1, r_tmp1);
-            tcg_temp_free_i64(r_tmp1);
+            tcg_gen_extu_tl_i64(t2, t0);
+            tcg_gen_extu_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_add_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            tcg_gen_trunc_i64_tl(t0, t2);
+            tcg_gen_shri_i64(t2, t2, 32);
+            tcg_gen_trunc_i64_tl(t1, t2);
+            tcg_temp_free_i64(t2);
             tcg_gen_ext32s_tl(cpu_LO[0], t0);
             tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
@@ -1993,19 +1976,19 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         break;
     case OPC_MSUB:
         {
-            TCGv_i64 r_tmp1 = tcg_temp_new_i64();
-            TCGv_i64 r_tmp2 = tcg_temp_new_i64();
-
-            tcg_gen_ext_tl_i64(r_tmp1, t0);
-            tcg_gen_ext_tl_i64(r_tmp2, t1);
-            tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
-            tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free_i64(r_tmp2);
-            tcg_gen_trunc_i64_tl(t0, r_tmp1);
-            tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
-            tcg_gen_trunc_i64_tl(t1, r_tmp1);
-            tcg_temp_free_i64(r_tmp1);
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
+
+            tcg_gen_ext_tl_i64(t2, t0);
+            tcg_gen_ext_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_sub_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            tcg_gen_trunc_i64_tl(t0, t2);
+            tcg_gen_shri_i64(t2, t2, 32);
+            tcg_gen_trunc_i64_tl(t1, t2);
+            tcg_temp_free_i64(t2);
             tcg_gen_ext32s_tl(cpu_LO[0], t0);
             tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
@@ -2013,21 +1996,21 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
         break;
     case OPC_MSUBU:
         {
-            TCGv_i64 r_tmp1 = tcg_temp_new_i64();
-            TCGv_i64 r_tmp2 = tcg_temp_new_i64();
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            TCGv_i64 t3 = tcg_temp_new_i64();
 
             tcg_gen_ext32u_tl(t0, t0);
             tcg_gen_ext32u_tl(t1, t1);
-            tcg_gen_extu_tl_i64(r_tmp1, t0);
-            tcg_gen_extu_tl_i64(r_tmp2, t1);
-            tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
-            tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2);
-            tcg_temp_free_i64(r_tmp2);
-            tcg_gen_trunc_i64_tl(t0, r_tmp1);
-            tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
-            tcg_gen_trunc_i64_tl(t1, r_tmp1);
-            tcg_temp_free_i64(r_tmp1);
+            tcg_gen_extu_tl_i64(t2, t0);
+            tcg_gen_extu_tl_i64(t3, t1);
+            tcg_gen_mul_i64(t2, t2, t3);
+            tcg_gen_concat_tl_i64(t3, cpu_LO[0], cpu_HI[0]);
+            tcg_gen_sub_i64(t2, t2, t3);
+            tcg_temp_free_i64(t3);
+            tcg_gen_trunc_i64_tl(t0, t2);
+            tcg_gen_shri_i64(t2, t2, 32);
+            tcg_gen_trunc_i64_tl(t1, t2);
+            tcg_temp_free_i64(t2);
             tcg_gen_ext32s_tl(cpu_LO[0], t0);
             tcg_gen_ext32s_tl(cpu_HI[0], t1);
         }
@@ -5511,132 +5494,88 @@ static void gen_compute_branch1 (CPUState *env, DisasContext *ctx, uint32_t op,
 
     switch (op) {
     case OPC_BC1F:
-        {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0x1 << cc);
-            tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
-            tcg_gen_movi_tl(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_tl(bcond, 1);
-            gen_set_label(l2);
-        }
+        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+        tcg_gen_not_i32(t0, t0);
+        tcg_gen_andi_i32(t0, t0, 1);
+        tcg_gen_extu_i32_tl(bcond, t0);
         opn = "bc1f";
         goto not_likely;
     case OPC_BC1FL:
-        {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0x1 << cc);
-            tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
-            tcg_gen_movi_tl(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_tl(bcond, 1);
-            gen_set_label(l2);
-        }
+        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+        tcg_gen_not_i32(t0, t0);
+        tcg_gen_andi_i32(t0, t0, 1);
+        tcg_gen_extu_i32_tl(bcond, t0);
         opn = "bc1fl";
         goto likely;
     case OPC_BC1T:
-        {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0x1 << cc);
-            tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, l1);
-            tcg_gen_movi_tl(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_tl(bcond, 1);
-            gen_set_label(l2);
-        }
+        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+        tcg_gen_andi_i32(t0, t0, 1);
+        tcg_gen_extu_i32_tl(bcond, t0);
         opn = "bc1t";
         goto not_likely;
     case OPC_BC1TL:
-        {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0x1 << cc);
-            tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, l1);
-            tcg_gen_movi_tl(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_tl(bcond, 1);
-            gen_set_label(l2);
-        }
+        tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+        tcg_gen_andi_i32(t0, t0, 1);
+        tcg_gen_extu_i32_tl(bcond, t0);
         opn = "bc1tl";
     likely:
         ctx->hflags |= MIPS_HFLAG_BL;
         break;
     case OPC_BC1FANY2:
         {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0x3 << cc);
-            tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
-            tcg_gen_movi_tl(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_tl(bcond, 1);
-            gen_set_label(l2);
+            TCGv_i32 t1 = tcg_temp_new_i32();
+            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+1));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_temp_free_i32(t1);
+            tcg_gen_not_i32(t0, t0);
+            tcg_gen_andi_i32(t0, t0, 1);
+            tcg_gen_extu_i32_tl(bcond, t0);
         }
         opn = "bc1any2f";
         goto not_likely;
     case OPC_BC1TANY2:
         {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0x3 << cc);
-            tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, l1);
-            tcg_gen_movi_tl(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_tl(bcond, 1);
-            gen_set_label(l2);
+            TCGv_i32 t1 = tcg_temp_new_i32();
+            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+1));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_temp_free_i32(t1);
+            tcg_gen_andi_i32(t0, t0, 1);
+            tcg_gen_extu_i32_tl(bcond, t0);
         }
         opn = "bc1any2t";
         goto not_likely;
     case OPC_BC1FANY4:
         {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0xf << cc);
-            tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
-            tcg_gen_movi_tl(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_tl(bcond, 1);
-            gen_set_label(l2);
+            TCGv_i32 t1 = tcg_temp_new_i32();
+            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+1));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+2));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+3));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_temp_free_i32(t1);
+            tcg_gen_not_i32(t0, t0);
+            tcg_gen_andi_i32(t0, t0, 1);
+            tcg_gen_extu_i32_tl(bcond, t0);
         }
         opn = "bc1any4f";
         goto not_likely;
     case OPC_BC1TANY4:
         {
-            int l1 = gen_new_label();
-            int l2 = gen_new_label();
-
-            get_fp_cond(t0);
-            tcg_gen_andi_i32(t0, t0, 0xf << cc);
-            tcg_gen_brcondi_i32(TCG_COND_NE, t0, 0, l1);
-            tcg_gen_movi_tl(bcond, 0);
-            tcg_gen_br(l2);
-            gen_set_label(l1);
-            tcg_gen_movi_tl(bcond, 1);
-            gen_set_label(l2);
+            TCGv_i32 t1 = tcg_temp_new_i32();
+            tcg_gen_shri_i32(t0, fpu_fcr31, get_fp_bit(cc));
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+1));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+2));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_gen_shri_i32(t1, fpu_fcr31, get_fp_bit(cc+3));
+            tcg_gen_or_i32(t0, t0, t1);
+            tcg_temp_free_i32(t1);
+            tcg_gen_andi_i32(t0, t0, 1);
+            tcg_gen_extu_i32_tl(bcond, t0);
         }
         opn = "bc1any4t";
     not_likely:
@@ -5754,125 +5693,99 @@ static void gen_cp1 (DisasContext *ctx, uint32_t opc, int rt, int fs)
 
 static void gen_movci (DisasContext *ctx, int rd, int rs, int cc, int tf)
 {
-    int l1 = gen_new_label();
-    uint32_t ccbit;
+    int l1;
     TCGCond cond;
-    TCGv t0 = tcg_temp_local_new();
-    TCGv_i32 r_tmp = tcg_temp_new_i32();
+    TCGv_i32 t0;
+
+    if (rd == 0) {
+        /* Treat as NOP. */
+        return;
+    }
 
-    if (cc)
-        ccbit = 1 << (24 + cc);
-    else
-        ccbit = 1 << 23;
     if (tf)
         cond = TCG_COND_EQ;
     else
         cond = TCG_COND_NE;
 
-    gen_load_gpr(t0, rd);
-    tcg_gen_andi_i32(r_tmp, fpu_fcr31, ccbit);
-    tcg_gen_brcondi_i32(cond, r_tmp, 0, l1);
-    tcg_temp_free_i32(r_tmp);
-    gen_load_gpr(t0, rs);
+    l1 = gen_new_label();
+    t0 = tcg_temp_new_i32();
+    tcg_gen_andi_i32(t0, fpu_fcr31, get_fp_bit(cc));
+    tcg_gen_brcondi_i32(cond, t0, 0, l1);
+    if (rs == 0) {
+        tcg_gen_movi_tl(cpu_gpr[rd], 0);
+    } else {
+        tcg_gen_mov_tl(cpu_gpr[rd], cpu_gpr[rs]);
+    }
     gen_set_label(l1);
-    gen_store_gpr(t0, rd);
-    tcg_temp_free(t0);
+    tcg_temp_free_i32(t0);
 }
 
 static inline void gen_movcf_s (int fs, int fd, int cc, int tf)
 {
-    uint32_t ccbit;
     int cond;
-    TCGv_i32 r_tmp1 = tcg_temp_new_i32();
-    TCGv_i32 fp0 = tcg_temp_local_new_i32();
+    TCGv_i32 t0 = tcg_temp_new_i32();
     int l1 = gen_new_label();
 
-    if (cc)
-        ccbit = 1 << (24 + cc);
-    else
-        ccbit = 1 << 23;
-
     if (tf)
         cond = TCG_COND_EQ;
     else
         cond = TCG_COND_NE;
 
-    gen_load_fpr32(fp0, fd);
-    tcg_gen_andi_i32(r_tmp1, fpu_fcr31, ccbit);
-    tcg_gen_brcondi_i32(cond, r_tmp1, 0, l1);
-    tcg_temp_free_i32(r_tmp1);
-    gen_load_fpr32(fp0, fs);
+    tcg_gen_andi_i32(t0, fpu_fcr31, get_fp_bit(cc));
+    tcg_gen_brcondi_i32(cond, t0, 0, l1);
+    gen_load_fpr32(t0, fs);
+    gen_store_fpr32(t0, fd);
     gen_set_label(l1);
-    gen_store_fpr32(fp0, fd);
-    tcg_temp_free_i32(fp0);
+    tcg_temp_free_i32(t0);
 }
 
 static inline void gen_movcf_d (DisasContext *ctx, int fs, int fd, int cc, int tf)
 {
-    uint32_t ccbit;
     int cond;
-    TCGv_i32 r_tmp1 = tcg_temp_new_i32();
-    TCGv_i64 fp0 = tcg_temp_local_new_i64();
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i64 fp0;
     int l1 = gen_new_label();
 
-    if (cc)
-        ccbit = 1 << (24 + cc);
-    else
-        ccbit = 1 << 23;
-
     if (tf)
         cond = TCG_COND_EQ;
     else
         cond = TCG_COND_NE;
 
-    gen_load_fpr64(ctx, fp0, fd);
-    tcg_gen_andi_i32(r_tmp1, fpu_fcr31, ccbit);
-    tcg_gen_brcondi_i32(cond, r_tmp1, 0, l1);
-    tcg_temp_free_i32(r_tmp1);
+    tcg_gen_andi_i32(t0, fpu_fcr31, get_fp_bit(cc));
+    tcg_gen_brcondi_i32(cond, t0, 0, l1);
+    fp0 = tcg_temp_local_new_i64();
     gen_load_fpr64(ctx, fp0, fs);
-    gen_set_label(l1);
     gen_store_fpr64(ctx, fp0, fd);
     tcg_temp_free_i64(fp0);
+    gen_set_label(l1);
+    tcg_temp_free_i32(t0);
 }
 
 static inline void gen_movcf_ps (int fs, int fd, int cc, int tf)
 {
-    uint32_t ccbit1, ccbit2;
     int cond;
-    TCGv_i32 r_tmp1 = tcg_temp_new_i32();
-    TCGv_i32 fp0 = tcg_temp_local_new_i32();
+    TCGv_i32 t0 = tcg_temp_new_i32();
     int l1 = gen_new_label();
     int l2 = gen_new_label();
 
-    if (cc) {
-        ccbit1 = 1 << (24 + cc);
-        ccbit2 = 1 << (25 + cc);
-    } else {
-        ccbit1 = 1 << 23;
-        ccbit2 = 1 << 25;
-    }
-
     if (tf)
         cond = TCG_COND_EQ;
     else
         cond = TCG_COND_NE;
 
-    gen_load_fpr32(fp0, fd);
-    tcg_gen_andi_i32(r_tmp1, fpu_fcr31, ccbit1);
-    tcg_gen_brcondi_i32(cond, r_tmp1, 0, l1);
-    gen_load_fpr32(fp0, fs);
+    tcg_gen_andi_i32(t0, fpu_fcr31, get_fp_bit(cc));
+    tcg_gen_brcondi_i32(cond, t0, 0, l1);
+    gen_load_fpr32(t0, fs);
+    gen_store_fpr32(t0, fd);
     gen_set_label(l1);
-    gen_store_fpr32(fp0, fd);
 
-    gen_load_fpr32h(fp0, fd);
-    tcg_gen_andi_i32(r_tmp1, fpu_fcr31, ccbit2);
-    tcg_gen_brcondi_i32(cond, r_tmp1, 0, l2);
-    gen_load_fpr32h(fp0, fs);
+    tcg_gen_andi_i32(t0, fpu_fcr31, get_fp_bit(cc+1));
+    tcg_gen_brcondi_i32(cond, t0, 0, l2);
+    gen_load_fpr32h(t0, fs);
+    gen_store_fpr32h(t0, fd);
     gen_set_label(l2);
-    gen_store_fpr32h(fp0, fd);
 
-    tcg_temp_free_i32(r_tmp1);
-    tcg_temp_free_i32(fp0);
+    tcg_temp_free_i32(t0);
 }
 
 
@@ -7567,13 +7480,7 @@ static void decode_opc (CPUState *env, DisasContext *ctx)
 
         MIPS_DEBUG("blikely condition (" TARGET_FMT_lx ")", ctx->pc + 4);
         tcg_gen_brcondi_tl(TCG_COND_NE, bcond, 0, l1);
-        {
-            TCGv_i32 r_tmp = tcg_temp_new_i32();
-
-            tcg_gen_movi_i32(r_tmp, ctx->hflags & ~MIPS_HFLAG_BMASK);
-            tcg_gen_st_i32(r_tmp, cpu_env, offsetof(CPUState, hflags));
-            tcg_temp_free_i32(r_tmp);
-        }
+        tcg_gen_movi_i32(hflags, ctx->hflags & ~MIPS_HFLAG_BMASK);
         gen_goto_tb(ctx, 1, ctx->pc + 4);
         gen_set_label(l1);
     }
@@ -8425,14 +8332,9 @@ static void mips_tcg_init(void)
                                offsetof(CPUState, bcond), "bcond");
     btarget = tcg_global_mem_new(TCG_AREG0,
                                  offsetof(CPUState, btarget), "btarget");
-    for (i = 0; i < 32; i++)
-        fpu_fpr32[i] = tcg_global_mem_new_i32(TCG_AREG0,
-            offsetof(CPUState, active_fpu.fpr[i].w[FP_ENDIAN_IDX]),
-            fregnames[i]);
-    for (i = 0; i < 32; i++)
-        fpu_fpr32h[i] = tcg_global_mem_new_i32(TCG_AREG0,
-            offsetof(CPUState, active_fpu.fpr[i].w[!FP_ENDIAN_IDX]),
-            fregnames_h[i]);
+    hflags = tcg_global_mem_new_i32(TCG_AREG0,
+                                    offsetof(CPUState, hflags), "hflags");
+
     fpu_fcr0 = tcg_global_mem_new_i32(TCG_AREG0,
                                       offsetof(CPUState, active_fpu.fcr0),
                                       "fcr0");