vvfat: one more missing BlockDriver C99 initializer conversion
[qemu] / target-arm / translate.c
index 5d71349..adac19a 100644 (file)
@@ -17,7 +17,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
  */
 #include <stdarg.h>
 #include <stdlib.h>
@@ -31,6 +31,7 @@
 #include "tcg-op.h"
 #include "qemu-log.h"
 
+#include "helpers.h"
 #define GEN_HELPER 1
 #include "helpers.h"
 
@@ -40,7 +41,7 @@
 #define ENABLE_ARCH_6T2   arm_feature(env, ARM_FEATURE_THUMB2)
 #define ENABLE_ARCH_7     arm_feature(env, ARM_FEATURE_V7)
 
-#define ARCH(x) if (!ENABLE_ARCH_##x) goto illegal_op;
+#define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
 
 /* internal defines */
 typedef struct DisasContext {
@@ -56,7 +57,6 @@ typedef struct DisasContext {
     struct TranslationBlock *tb;
     int singlestep_enabled;
     int thumb;
-    int is_mem;
 #if !defined(CONFIG_USER_ONLY)
     int user;
 #endif
@@ -73,13 +73,14 @@ typedef struct DisasContext {
 #define DISAS_WFI 4
 #define DISAS_SWI 5
 
-static TCGv cpu_env;
+static TCGv_ptr cpu_env;
 /* We reuse the same 64-bit temporaries for efficiency.  */
-static TCGv cpu_V0, cpu_V1, cpu_M0;
+static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
 
 /* FIXME:  These should be removed.  */
 static TCGv cpu_T[2];
-static TCGv cpu_F0s, cpu_F1s, cpu_F0d, cpu_F1d;
+static TCGv cpu_F0s, cpu_F1s;
+static TCGv_i64 cpu_F0d, cpu_F1d;
 
 #define ICOUNT_TEMP cpu_T[0]
 #include "gen-icount.h"
@@ -87,10 +88,13 @@ static TCGv cpu_F0s, cpu_F1s, cpu_F0d, cpu_F1d;
 /* initialize TCG globals.  */
 void arm_translate_init(void)
 {
-    cpu_env = tcg_global_reg_new(TCG_TYPE_PTR, TCG_AREG0, "env");
+    cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
+
+    cpu_T[0] = tcg_global_reg_new_i32(TCG_AREG1, "T0");
+    cpu_T[1] = tcg_global_reg_new_i32(TCG_AREG2, "T1");
 
-    cpu_T[0] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG1, "T0");
-    cpu_T[1] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG2, "T1");
+#define GEN_HELPER 2
+#include "helpers.h"
 }
 
 /* The code generator doesn't like lots of temporaries, so maintain our own
@@ -100,16 +104,16 @@ static int num_temps;
 static TCGv temps[MAX_TEMPS];
 
 /* Allocate a temporary variable.  */
-static TCGv new_tmp(void)
+static TCGv_i32 new_tmp(void)
 {
     TCGv tmp;
     if (num_temps == MAX_TEMPS)
         abort();
 
-    if (GET_TCGV(temps[num_temps]))
+    if (GET_TCGV_I32(temps[num_temps]))
       return temps[num_temps++];
 
-    tmp = tcg_temp_new(TCG_TYPE_I32);
+    tmp = tcg_temp_new_i32();
     temps[num_temps++] = tmp;
     return tmp;
 }
@@ -120,11 +124,11 @@ static void dead_tmp(TCGv tmp)
     int i;
     num_temps--;
     i = num_temps;
-    if (GET_TCGV(temps[i]) == GET_TCGV(tmp))
+    if (TCGV_EQUAL(temps[i], tmp))
         return;
 
     /* Shuffle this temp to the last slot.  */
-    while (GET_TCGV(temps[i]) != GET_TCGV(tmp))
+    while (!TCGV_EQUAL(temps[i], tmp))
         i--;
     while (i < num_temps) {
         temps[i] = temps[i + 1];
@@ -190,7 +194,6 @@ static void store_reg(DisasContext *s, int reg, TCGv var)
 
 /* Basic operations.  */
 #define gen_op_movl_T0_T1() tcg_gen_mov_i32(cpu_T[0], cpu_T[1])
-#define gen_op_movl_T1_T0() tcg_gen_mov_i32(cpu_T[1], cpu_T[0])
 #define gen_op_movl_T0_im(im) tcg_gen_movi_i32(cpu_T[0], im)
 #define gen_op_movl_T1_im(im) tcg_gen_movi_i32(cpu_T[1], im)
 
@@ -204,7 +207,6 @@ static void store_reg(DisasContext *s, int reg, TCGv var)
 #define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_sbcl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_rsbl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[1], cpu_T[0])
-#define gen_op_rscl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[1], cpu_T[0])
 
 #define gen_op_andl_T0_T1() tcg_gen_and_i32(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_xorl_T0_T1() tcg_gen_xor_i32(cpu_T[0], cpu_T[0], cpu_T[1])
@@ -214,11 +216,8 @@ static void store_reg(DisasContext *s, int reg, TCGv var)
 #define gen_op_logic_T0_cc() gen_logic_CC(cpu_T[0]);
 #define gen_op_logic_T1_cc() gen_logic_CC(cpu_T[1]);
 
-#define gen_op_shll_T0_im(im) tcg_gen_shli_i32(cpu_T[0], cpu_T[0], im)
 #define gen_op_shll_T1_im(im) tcg_gen_shli_i32(cpu_T[1], cpu_T[1], im)
 #define gen_op_shrl_T1_im(im) tcg_gen_shri_i32(cpu_T[1], cpu_T[1], im)
-#define gen_op_sarl_T1_im(im) tcg_gen_sari_i32(cpu_T[1], cpu_T[1], im)
-#define gen_op_rorl_T1_im(im) tcg_gen_rori_i32(cpu_T[1], cpu_T[1], im)
 
 /* Value extensions.  */
 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
@@ -324,10 +323,10 @@ static void gen_roundqd(TCGv a, TCGv b)
 /* FIXME: Most targets have native widening multiplication.
    It would be good to use that instead of a full wide multiply.  */
 /* 32x32->64 multiply.  Marks inputs as dead.  */
-static TCGv gen_mulu_i64_i32(TCGv a, TCGv b)
+static TCGv_i64 gen_mulu_i64_i32(TCGv a, TCGv b)
 {
-    TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
-    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
+    TCGv_i64 tmp1 = tcg_temp_new_i64();
+    TCGv_i64 tmp2 = tcg_temp_new_i64();
 
     tcg_gen_extu_i32_i64(tmp1, a);
     dead_tmp(a);
@@ -337,10 +336,10 @@ static TCGv gen_mulu_i64_i32(TCGv a, TCGv b)
     return tmp1;
 }
 
-static TCGv gen_muls_i64_i32(TCGv a, TCGv b)
+static TCGv_i64 gen_muls_i64_i32(TCGv a, TCGv b)
 {
-    TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
-    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
+    TCGv_i64 tmp1 = tcg_temp_new_i64();
+    TCGv_i64 tmp2 = tcg_temp_new_i64();
 
     tcg_gen_ext_i32_i64(tmp1, a);
     dead_tmp(a);
@@ -353,8 +352,8 @@ static TCGv gen_muls_i64_i32(TCGv a, TCGv b)
 /* Unsigned 32x32->64 multiply.  */
 static void gen_op_mull_T0_T1(void)
 {
-    TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
-    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
+    TCGv_i64 tmp1 = tcg_temp_new_i64();
+    TCGv_i64 tmp2 = tcg_temp_new_i64();
 
     tcg_gen_extu_i32_i64(tmp1, cpu_T[0]);
     tcg_gen_extu_i32_i64(tmp2, cpu_T[1]);
@@ -367,8 +366,8 @@ static void gen_op_mull_T0_T1(void)
 /* Signed 32x32->64 multiply.  */
 static void gen_imull(TCGv a, TCGv b)
 {
-    TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
-    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
+    TCGv_i64 tmp1 = tcg_temp_new_i64();
+    TCGv_i64 tmp2 = tcg_temp_new_i64();
 
     tcg_gen_ext_i32_i64(tmp1, a);
     tcg_gen_ext_i32_i64(tmp2, b);
@@ -377,7 +376,6 @@ static void gen_imull(TCGv a, TCGv b)
     tcg_gen_shri_i64(tmp1, tmp1, 32);
     tcg_gen_trunc_i64_i32(b, tmp1);
 }
-#define gen_op_imull_T0_T1() gen_imull(cpu_T[0], cpu_T[1])
 
 /* Swap low and high halfwords.  */
 static void gen_swap_half(TCGv var)
@@ -416,7 +414,7 @@ static void gen_set_CF_bit31(TCGv var)
 {
     TCGv tmp = new_tmp();
     tcg_gen_shri_i32(tmp, var, 31);
-    gen_set_CF(var);
+    gen_set_CF(tmp);
     dead_tmp(tmp);
 }
 
@@ -437,6 +435,16 @@ static void gen_adc_T0_T1(void)
     dead_tmp(tmp);
 }
 
+/* dest = T0 + T1 + CF. */
+static void gen_add_carry(TCGv dest, TCGv t0, TCGv t1)
+{
+    TCGv tmp;
+    tcg_gen_add_i32(dest, t0, t1);
+    tmp = load_cpu_field(CF);
+    tcg_gen_add_i32(dest, dest, tmp);
+    dead_tmp(tmp);
+}
+
 /* dest = T0 - T1 + CF - 1.  */
 static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
 {
@@ -491,7 +499,7 @@ static void shifter_out_im(TCGv var, int shift)
         tcg_gen_andi_i32(tmp, var, 1);
     } else {
         tcg_gen_shri_i32(tmp, var, shift);
-        if (shift != 31);
+        if (shift != 31)
             tcg_gen_andi_i32(tmp, tmp, 1);
     }
     gen_set_CF(tmp);
@@ -580,17 +588,17 @@ static inline void gen_arm_shift_reg(TCGv var, int shiftop,
     }
 static void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
 {
-    TCGv tmp;
+    TCGv_ptr tmp;
 
     switch (op1) {
 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
     case 1:
-        tmp = tcg_temp_new(TCG_TYPE_PTR);
+        tmp = tcg_temp_new_ptr();
         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
         PAS_OP(s)
         break;
     case 5:
-        tmp = tcg_temp_new(TCG_TYPE_PTR);
+        tmp = tcg_temp_new_ptr();
         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
         PAS_OP(u)
         break;
@@ -625,17 +633,17 @@ static void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
     }
 static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
 {
-    TCGv tmp;
+    TCGv_ptr tmp;
 
     switch (op1) {
 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
     case 0:
-        tmp = tcg_temp_new(TCG_TYPE_PTR);
+        tmp = tcg_temp_new_ptr();
         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
         PAS_OP(s)
         break;
     case 4:
-        tmp = tcg_temp_new(TCG_TYPE_PTR);
+        tmp = tcg_temp_new_ptr();
         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
         PAS_OP(u)
         break;
@@ -756,7 +764,7 @@ static void gen_test_cc(int cc, int label)
     dead_tmp(tmp);
 }
 
-const uint8_t table_logic_cc[16] = {
+static const uint8_t table_logic_cc[16] = {
     1, /* and */
     1, /* xor */
     0, /* sub */
@@ -812,17 +820,19 @@ static inline void gen_bx_T0(DisasContext *s)
     gen_bx(s, tmp);
 }
 
-#if defined(CONFIG_USER_ONLY)
-#define gen_ldst(name, s) gen_op_##name##_raw()
-#else
-#define gen_ldst(name, s) do { \
-    s->is_mem = 1; \
-    if (IS_USER(s)) \
-        gen_op_##name##_user(); \
-    else \
-        gen_op_##name##_kernel(); \
-    } while (0)
-#endif
+/* Variant of store_reg which uses branch&exchange logic when storing
+   to r15 in ARM architecture v7 and above. The source must be a temporary
+   and will be marked as dead. */
+static inline void store_reg_bx(CPUState *env, DisasContext *s,
+                                int reg, TCGv var)
+{
+    if (reg == 15 && ENABLE_ARCH_7) {
+        gen_bx(s, var);
+    } else {
+        store_reg(s, reg, var);
+    }
+}
+
 static inline TCGv gen_ld8s(TCGv addr, int index)
 {
     TCGv tmp = new_tmp();
@@ -990,15 +1000,6 @@ static inline void gen_vfp_##name(int dp)                             \
         gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, cpu_env); \
 }
 
-#define VFP_OP1(name)                               \
-static inline void gen_vfp_##name(int dp, int arg)  \
-{                                                   \
-    if (dp)                                         \
-        gen_op_vfp_##name##d(arg);                  \
-    else                                            \
-        gen_op_vfp_##name##s(arg);                  \
-}
-
 VFP_OP2(add)
 VFP_OP2(sub)
 VFP_OP2(mul)
@@ -1181,12 +1182,12 @@ static void neon_store_reg(int reg, int pass, TCGv var)
     dead_tmp(var);
 }
 
-static inline void neon_load_reg64(TCGv var, int reg)
+static inline void neon_load_reg64(TCGv_i64 var, int reg)
 {
     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
 }
 
-static inline void neon_store_reg64(TCGv var, int reg)
+static inline void neon_store_reg64(TCGv_i64 var, int reg)
 {
     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
 }
@@ -1222,12 +1223,12 @@ static inline void gen_mov_vreg_F0(int dp, int reg)
 
 #define ARM_CP_RW_BIT  (1 << 20)
 
-static inline void iwmmxt_load_reg(TCGv var, int reg)
+static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
 {
     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUState, iwmmxt.regs[reg]));
 }
 
-static inline void iwmmxt_store_reg(TCGv var, int reg)
+static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
 {
     tcg_gen_st_i64(var, cpu_env, offsetof(CPUState, iwmmxt.regs[reg]));
 }
@@ -2804,10 +2805,12 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         } else if (size == 1) {
                             gen_neon_dup_low16(tmp);
                         }
-                        tmp2 = new_tmp();
-                        tcg_gen_mov_i32(tmp2, tmp);
-                        neon_store_reg(rn, 0, tmp2);
-                        neon_store_reg(rn, 0, tmp);
+                        for (n = 0; n <= pass * 2; n++) {
+                            tmp2 = new_tmp();
+                            tcg_gen_mov_i32(tmp2, tmp);
+                            neon_store_reg(rn, n, tmp2);
+                        }
+                        neon_store_reg(rn, n, tmp);
                     } else {
                         /* VMOV */
                         switch (size) {
@@ -3022,6 +3025,10 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 case 21:
                 case 22:
                 case 23:
+                case 28:
+                case 29:
+                case 30:
+                case 31:
                     /* Source and destination the same.  */
                     gen_mov_F0_vreg(dp, rd);
                     break;
@@ -3057,9 +3064,9 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     break;
                 case 3: /* nmsc: -fd - (fn * fm)  */
                     gen_vfp_mul(dp);
-                    gen_mov_F1_vreg(dp, rd);
-                    gen_vfp_add(dp);
                     gen_vfp_neg(dp);
+                    gen_mov_F1_vreg(dp, rd);
+                    gen_vfp_sub(dp);
                     break;
                 case 4: /* mul: fn * fm */
                     gen_vfp_mul(dp);
@@ -3141,22 +3148,22 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     case 20: /* fshto */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_shto(dp, rm);
+                        gen_vfp_shto(dp, 16 - rm);
                         break;
                     case 21: /* fslto */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_slto(dp, rm);
+                        gen_vfp_slto(dp, 32 - rm);
                         break;
                     case 22: /* fuhto */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_uhto(dp, rm);
+                        gen_vfp_uhto(dp, 16 - rm);
                         break;
                     case 23: /* fulto */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_ulto(dp, rm);
+                        gen_vfp_ulto(dp, 32 - rm);
                         break;
                     case 24: /* ftoui */
                         gen_vfp_toui(dp);
@@ -3173,22 +3180,22 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     case 28: /* ftosh */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_tosh(dp, rm);
+                        gen_vfp_tosh(dp, 16 - rm);
                         break;
                     case 29: /* ftosl */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_tosl(dp, rm);
+                        gen_vfp_tosl(dp, 32 - rm);
                         break;
                     case 30: /* ftouh */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_touh(dp, rm);
+                        gen_vfp_touh(dp, 16 - rm);
                         break;
                     case 31: /* ftoul */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_toul(dp, rm);
+                        gen_vfp_toul(dp, 32 - rm);
                         break;
                     default: /* undefined */
                         printf ("rn:%d\n", rn);
@@ -3461,11 +3468,11 @@ static int gen_set_psr_T0(DisasContext *s, uint32_t mask, int spsr)
     return 0;
 }
 
-/* Generate an old-style exception return.  */
-static void gen_exception_return(DisasContext *s)
+/* Generate an old-style exception return. Marks pc as dead. */
+static void gen_exception_return(DisasContext *s, TCGv pc)
 {
     TCGv tmp;
-    gen_movl_reg_T0(s, 15);
+    store_reg(s, 15, pc);
     tmp = load_cpu_field(spsr);
     gen_set_cpsr(tmp, 0xffffffff);
     dead_tmp(tmp);
@@ -3814,7 +3821,7 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 tmp2 = new_tmp();
                 tcg_gen_mov_i32(tmp2, tmp);
                 neon_store_reg(rd, 0, tmp2);
-                neon_store_reg(rd, 0, tmp);
+                neon_store_reg(rd, 1, tmp);
                 rd += stride;
             }
             stride = (1 << size) * nregs;
@@ -3907,7 +3914,7 @@ static void gen_neon_bsl(TCGv dest, TCGv t, TCGv f, TCGv c)
     tcg_gen_or_i32(dest, t, f);
 }
 
-static inline void gen_neon_narrow(int size, TCGv dest, TCGv src)
+static inline void gen_neon_narrow(int size, TCGv dest, TCGv_i64 src)
 {
     switch (size) {
     case 0: gen_helper_neon_narrow_u8(dest, src); break;
@@ -3917,7 +3924,7 @@ static inline void gen_neon_narrow(int size, TCGv dest, TCGv src)
     }
 }
 
-static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv src)
+static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv_i64 src)
 {
     switch (size) {
     case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
@@ -3927,7 +3934,7 @@ static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv src)
     }
 }
 
-static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv src)
+static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv_i64 src)
 {
     switch (size) {
     case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
@@ -3971,7 +3978,7 @@ static inline void gen_neon_shift_narrow(int size, TCGv var, TCGv shift,
     }
 }
 
-static inline void gen_neon_widen(TCGv dest, TCGv src, int size, int u)
+static inline void gen_neon_widen(TCGv_i64 dest, TCGv src, int size, int u)
 {
     if (u) {
         switch (size) {
@@ -4011,7 +4018,7 @@ static inline void gen_neon_subl(int size)
     }
 }
 
-static inline void gen_neon_negl(TCGv var, int size)
+static inline void gen_neon_negl(TCGv_i64 var, int size)
 {
     switch (size) {
     case 0: gen_helper_neon_negl_u16(var, var); break;
@@ -4021,7 +4028,7 @@ static inline void gen_neon_negl(TCGv var, int size)
     }
 }
 
-static inline void gen_neon_addl_saturate(TCGv op0, TCGv op1, int size)
+static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
 {
     switch (size) {
     case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
@@ -4030,9 +4037,9 @@ static inline void gen_neon_addl_saturate(TCGv op0, TCGv op1, int size)
     }
 }
 
-static inline void gen_neon_mull(TCGv dest, TCGv a, TCGv b, int size, int u)
+static inline void gen_neon_mull(TCGv_i64 dest, TCGv a, TCGv b, int size, int u)
 {
-    TCGv tmp;
+    TCGv_i64 tmp;
 
     switch ((size << 1) | u) {
     case 0: gen_helper_neon_mull_s8(dest, a, b); break;
@@ -4076,6 +4083,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
     TCGv tmp;
     TCGv tmp2;
     TCGv tmp3;
+    TCGv_i64 tmp64;
 
     if (!vfp_enabled(env))
       return 1;
@@ -4632,12 +4640,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     imm = (uint16_t)shift;
                     imm |= imm << 16;
                     tmp2 = tcg_const_i32(imm);
+                    TCGV_UNUSED_I64(tmp64);
                     break;
                 case 2:
                     imm = (uint32_t)shift;
                     tmp2 = tcg_const_i32(imm);
+                    TCGV_UNUSED_I64(tmp64);
+                    break;
                 case 3:
-                    tmp2 = tcg_const_i64(shift);
+                    tmp64 = tcg_const_i64(shift);
+                    TCGV_UNUSED(tmp2);
                     break;
                 default:
                     abort();
@@ -4648,14 +4660,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         neon_load_reg64(cpu_V0, rm + pass);
                         if (q) {
                           if (u)
-                            gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, tmp2);
+                            gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, tmp64);
                           else
-                            gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, tmp2);
+                            gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, tmp64);
                         } else {
                           if (u)
-                            gen_helper_neon_shl_u64(cpu_V0, cpu_V0, tmp2);
+                            gen_helper_neon_shl_u64(cpu_V0, cpu_V0, tmp64);
                           else
-                            gen_helper_neon_shl_s64(cpu_V0, cpu_V0, tmp2);
+                            gen_helper_neon_shl_s64(cpu_V0, cpu_V0, tmp64);
                         }
                     } else {
                         tmp = neon_load_reg(rm + pass, 0);
@@ -4808,7 +4820,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 neon_store_reg(rd, pass, tmp);
             }
         }
-    } else { /* (insn & 0x00800010 == 0x00800010) */
+    } else { /* (insn & 0x00800010 == 0x00800000) */
         if (size != 3) {
             op = (insn >> 8) & 0xf;
             if ((insn & (1 << 6)) == 0) {
@@ -5130,16 +5142,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         neon_load_reg64(cpu_V1, rm);
                     }
                 } else if (q) {
-                    tmp = tcg_temp_new(TCG_TYPE_I64);
+                    tmp64 = tcg_temp_new_i64();
                     if (imm < 8) {
                         neon_load_reg64(cpu_V0, rn);
-                        neon_load_reg64(tmp, rn + 1);
+                        neon_load_reg64(tmp64, rn + 1);
                     } else {
                         neon_load_reg64(cpu_V0, rn + 1);
-                        neon_load_reg64(tmp, rm);
+                        neon_load_reg64(tmp64, rm);
                     }
                     tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
-                    tcg_gen_shli_i64(cpu_V1, tmp, 64 - ((imm & 7) * 8));
+                    tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
                     if (imm < 8) {
                         neon_load_reg64(cpu_V1, rm);
@@ -5148,13 +5160,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         imm -= 8;
                     }
                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
-                    tcg_gen_shri_i64(tmp, tmp, imm * 8);
-                    tcg_gen_or_i64(cpu_V1, cpu_V1, tmp);
+                    tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
+                    tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
                 } else {
+                    /* BUGFIX */
                     neon_load_reg64(cpu_V0, rn);
-                    tcg_gen_shri_i32(cpu_V0, cpu_V0, imm * 8);
+                    tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
                     neon_load_reg64(cpu_V1, rm);
-                    tcg_gen_shli_i32(cpu_V1, cpu_V1, 64 - (imm * 8));
+                    tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
                 }
                 neon_store_reg64(cpu_V0, rd);
@@ -5173,7 +5186,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         NEON_GET_REG(T0, rm, pass * 2);
                         NEON_GET_REG(T1, rm, pass * 2 + 1);
                         switch (size) {
-                        case 0: tcg_gen_bswap_i32(cpu_T[0], cpu_T[0]); break;
+                        case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;
                         case 1: gen_swap_half(cpu_T[0]); break;
                         case 2: /* no-op */ break;
                         default: abort();
@@ -5184,7 +5197,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         } else {
                             gen_op_movl_T0_T1();
                             switch (size) {
-                            case 0: tcg_gen_bswap_i32(cpu_T[0], cpu_T[0]); break;
+                            case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;
                             case 1: gen_swap_half(cpu_T[0]); break;
                             default: abort();
                             }
@@ -5326,7 +5339,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         switch (op) {
                         case 1: /* VREV32 */
                             switch (size) {
-                            case 0: tcg_gen_bswap_i32(cpu_T[0], cpu_T[0]); break;
+                            case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;
                             case 1: gen_swap_half(cpu_T[0]); break;
                             default: return 1;
                             }
@@ -5498,7 +5511,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 }
             } else if ((insn & (1 << 10)) == 0) {
                 /* VTBL, VTBX.  */
-                n = (insn >> 5) & 0x18;
+                n = ((insn >> 5) & 0x18) + 8;
                 if (insn & (1 << 6)) {
                     tmp = neon_load_reg(rd, 0);
                 } else {
@@ -5508,6 +5521,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 tmp2 = neon_load_reg(rm, 0);
                 gen_helper_neon_tbl(tmp2, tmp2, tmp, tcg_const_i32(rn),
                                     tcg_const_i32(n));
+                dead_tmp(tmp);
                 if (insn & (1 << 6)) {
                     tmp = neon_load_reg(rd, 1);
                 } else {
@@ -5518,7 +5532,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 gen_helper_neon_tbl(tmp3, tmp3, tmp, tcg_const_i32(rn),
                                     tcg_const_i32(n));
                 neon_store_reg(rd, 0, tmp2);
-                neon_store_reg(rd, 1, tmp2);
+                neon_store_reg(rd, 1, tmp3);
+                dead_tmp(tmp);
             } else if ((insn & 0x380) == 0) {
                 /* VDUP */
                 if (insn & (1 << 19)) {
@@ -5545,6 +5560,71 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
     return 0;
 }
 
+static int disas_cp14_read(CPUState * env, DisasContext *s, uint32_t insn)
+{
+    int crn = (insn >> 16) & 0xf;
+    int crm = insn & 0xf;
+    int op1 = (insn >> 21) & 7;
+    int op2 = (insn >> 5) & 7;
+    int rt = (insn >> 12) & 0xf;
+    TCGv tmp;
+
+    if (arm_feature(env, ARM_FEATURE_THUMB2EE)) {
+        if (op1 == 6 && crn == 0 && crm == 0 && op2 == 0) {
+            /* TEECR */
+            if (IS_USER(s))
+                return 1;
+            tmp = load_cpu_field(teecr);
+            store_reg(s, rt, tmp);
+            return 0;
+        }
+        if (op1 == 6 && crn == 1 && crm == 0 && op2 == 0) {
+            /* TEEHBR */
+            if (IS_USER(s) && (env->teecr & 1))
+                return 1;
+            tmp = load_cpu_field(teehbr);
+            store_reg(s, rt, tmp);
+            return 0;
+        }
+    }
+    fprintf(stderr, "Unknown cp14 read op1:%d crn:%d crm:%d op2:%d\n",
+            op1, crn, crm, op2);
+    return 1;
+}
+
+static int disas_cp14_write(CPUState * env, DisasContext *s, uint32_t insn)
+{
+    int crn = (insn >> 16) & 0xf;
+    int crm = insn & 0xf;
+    int op1 = (insn >> 21) & 7;
+    int op2 = (insn >> 5) & 7;
+    int rt = (insn >> 12) & 0xf;
+    TCGv tmp;
+
+    if (arm_feature(env, ARM_FEATURE_THUMB2EE)) {
+        if (op1 == 6 && crn == 0 && crm == 0 && op2 == 0) {
+            /* TEECR */
+            if (IS_USER(s))
+                return 1;
+            tmp = load_reg(s, rt);
+            gen_helper_set_teecr(cpu_env, tmp);
+            dead_tmp(tmp);
+            return 0;
+        }
+        if (op1 == 6 && crn == 1 && crm == 0 && op2 == 0) {
+            /* TEEHBR */
+            if (IS_USER(s) && (env->teecr & 1))
+                return 1;
+            tmp = load_reg(s, rt);
+            store_cpu_field(tmp, teehbr);
+            return 0;
+        }
+    }
+    fprintf(stderr, "Unknown cp14 write op1:%d crn:%d crm:%d op2:%d\n",
+            op1, crn, crm, op2);
+    return 1;
+}
+
 static int disas_coproc_insn(CPUState * env, DisasContext *s, uint32_t insn)
 {
     int cpnum;
@@ -5566,9 +5646,19 @@ static int disas_coproc_insn(CPUState * env, DisasContext *s, uint32_t insn)
     case 10:
     case 11:
        return disas_vfp_insn (env, s, insn);
+    case 14:
+        /* Coprocessors 7-15 are architecturally reserved by ARM.
+           Unfortunately Intel decided to ignore this.  */
+        if (arm_feature(env, ARM_FEATURE_XSCALE))
+            goto board;
+        if (insn & (1 << 20))
+            return disas_cp14_read(env, s, insn);
+        else
+            return disas_cp14_write(env, s, insn);
     case 15:
        return disas_cp15_insn (env, s, insn);
     default:
+    board:
        /* Unknown coprocessor.  See if the board has hooked it.  */
        return disas_cp_insn (env, s, insn);
     }
@@ -5576,7 +5666,7 @@ static int disas_coproc_insn(CPUState * env, DisasContext *s, uint32_t insn)
 
 
 /* Store a 64-bit value to a register pair.  Clobbers val.  */
-static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv val)
+static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
 {
     TCGv tmp;
     tmp = new_tmp();
@@ -5589,13 +5679,13 @@ static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv val)
 }
 
 /* load a 32-bit value from a register and perform a 64-bit accumulate.  */
-static void gen_addq_lo(DisasContext *s, TCGv val, int rlow)
+static void gen_addq_lo(DisasContext *s, TCGv_i64 val, int rlow)
 {
-    TCGv tmp;
+    TCGv_i64 tmp;
     TCGv tmp2;
 
     /* Load value and extend to 64 bits.  */
-    tmp = tcg_temp_new(TCG_TYPE_I64);
+    tmp = tcg_temp_new_i64();
     tmp2 = load_reg(s, rlow);
     tcg_gen_extu_i32_i64(tmp, tmp2);
     dead_tmp(tmp2);
@@ -5603,16 +5693,16 @@ static void gen_addq_lo(DisasContext *s, TCGv val, int rlow)
 }
 
 /* load and add a 64-bit value from a register pair.  */
-static void gen_addq(DisasContext *s, TCGv val, int rlow, int rhigh)
+static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
 {
-    TCGv tmp;
+    TCGv_i64 tmp;
     TCGv tmpl;
     TCGv tmph;
 
     /* Load 64-bit value rd:rn.  */
     tmpl = load_reg(s, rlow);
     tmph = load_reg(s, rhigh);
-    tmp = tcg_temp_new(TCG_TYPE_I64);
+    tmp = tcg_temp_new_i64();
     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
     dead_tmp(tmpl);
     dead_tmp(tmph);
@@ -5620,7 +5710,7 @@ static void gen_addq(DisasContext *s, TCGv val, int rlow, int rhigh)
 }
 
 /* Set N and Z flags from a 64-bit value.  */
-static void gen_logicq_cc(TCGv val)
+static void gen_logicq_cc(TCGv_i64 val)
 {
     TCGv tmp = new_tmp();
     gen_helper_logicq_cc(tmp, val);
@@ -5635,6 +5725,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
     TCGv tmp2;
     TCGv tmp3;
     TCGv addr;
+    TCGv_i64 tmp64;
 
     insn = ldl_code(s->pc);
     s->pc += 4;
@@ -5969,10 +6060,10 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                     tcg_gen_sari_i32(tmp2, tmp2, 16);
                 else
                     gen_sxth(tmp2);
-                tmp2 = gen_muls_i64_i32(tmp, tmp2);
-                tcg_gen_shri_i64(tmp2, tmp2, 16);
+                tmp64 = gen_muls_i64_i32(tmp, tmp2);
+                tcg_gen_shri_i64(tmp64, tmp64, 16);
                 tmp = new_tmp();
-                tcg_gen_trunc_i64_i32(tmp, tmp2);
+                tcg_gen_trunc_i64_i32(tmp, tmp64);
                 if ((sh & 2) == 0) {
                     tmp2 = load_reg(s, rn);
                     gen_helper_add_setq(tmp, tmp, tmp2);
@@ -5986,11 +6077,11 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                 gen_mulxy(tmp, tmp2, sh & 2, sh & 4);
                 dead_tmp(tmp2);
                 if (op1 == 2) {
-                    tmp2 = tcg_temp_new(TCG_TYPE_I64);
-                    tcg_gen_ext_i32_i64(tmp2, tmp);
+                    tmp64 = tcg_temp_new_i64();
+                    tcg_gen_ext_i32_i64(tmp64, tmp);
                     dead_tmp(tmp);
-                    gen_addq(s, tmp2, rn, rd);
-                    gen_storeq_reg(s, rn, rd, tmp2);
+                    gen_addq(s, tmp64, rn, rd);
+                    gen_storeq_reg(s, rn, rd, tmp64);
                 } else {
                     if (op1 == 0) {
                         tmp2 = load_reg(s, rn);
@@ -6018,148 +6109,173 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
             /* immediate operand */
             val = insn & 0xff;
             shift = ((insn >> 8) & 0xf) * 2;
-            if (shift)
+            if (shift) {
                 val = (val >> shift) | (val << (32 - shift));
-            gen_op_movl_T1_im(val);
-            if (logic_cc && shift)
-                gen_set_CF_bit31(cpu_T[1]);
+            }
+            tmp2 = new_tmp();
+            tcg_gen_movi_i32(tmp2, val);
+            if (logic_cc && shift) {
+                gen_set_CF_bit31(tmp2);
+            }
         } else {
             /* register */
             rm = (insn) & 0xf;
-            gen_movl_T1_reg(s, rm);
+            tmp2 = load_reg(s, rm);
             shiftop = (insn >> 5) & 3;
             if (!(insn & (1 << 4))) {
                 shift = (insn >> 7) & 0x1f;
-                gen_arm_shift_im(cpu_T[1], shiftop, shift, logic_cc);
+                gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
             } else {
                 rs = (insn >> 8) & 0xf;
                 tmp = load_reg(s, rs);
-                gen_arm_shift_reg(cpu_T[1], shiftop, tmp, logic_cc);
+                gen_arm_shift_reg(tmp2, shiftop, tmp, logic_cc);
             }
         }
         if (op1 != 0x0f && op1 != 0x0d) {
             rn = (insn >> 16) & 0xf;
-            gen_movl_T0_reg(s, rn);
+            tmp = load_reg(s, rn);
+        } else {
+            TCGV_UNUSED(tmp);
         }
         rd = (insn >> 12) & 0xf;
         switch(op1) {
         case 0x00:
-            gen_op_andl_T0_T1();
-            gen_movl_reg_T0(s, rd);
-            if (logic_cc)
-                gen_op_logic_T0_cc();
+            tcg_gen_and_i32(tmp, tmp, tmp2);
+            if (logic_cc) {
+                gen_logic_CC(tmp);
+            }
+            store_reg_bx(env, s, rd, tmp);
             break;
         case 0x01:
-            gen_op_xorl_T0_T1();
-            gen_movl_reg_T0(s, rd);
-            if (logic_cc)
-                gen_op_logic_T0_cc();
+            tcg_gen_xor_i32(tmp, tmp, tmp2);
+            if (logic_cc) {
+                gen_logic_CC(tmp);
+            }
+            store_reg_bx(env, s, rd, tmp);
             break;
         case 0x02:
             if (set_cc && rd == 15) {
                 /* SUBS r15, ... is used for exception return.  */
-                if (IS_USER(s))
+                if (IS_USER(s)) {
                     goto illegal_op;
-                gen_op_subl_T0_T1_cc();
-                gen_exception_return(s);
+                }
+                gen_helper_sub_cc(tmp, tmp, tmp2);
+                gen_exception_return(s, tmp);
             } else {
-                if (set_cc)
-                    gen_op_subl_T0_T1_cc();
-                else
-                    gen_op_subl_T0_T1();
-                gen_movl_reg_T0(s, rd);
+                if (set_cc) {
+                    gen_helper_sub_cc(tmp, tmp, tmp2);
+                } else {
+                    tcg_gen_sub_i32(tmp, tmp, tmp2);
+                }
+                store_reg_bx(env, s, rd, tmp);
             }
             break;
         case 0x03:
-            if (set_cc)
-                gen_op_rsbl_T0_T1_cc();
-            else
-                gen_op_rsbl_T0_T1();
-            gen_movl_reg_T0(s, rd);
+            if (set_cc) {
+                gen_helper_sub_cc(tmp, tmp2, tmp);
+            } else {
+                tcg_gen_sub_i32(tmp, tmp2, tmp);
+            }
+            store_reg_bx(env, s, rd, tmp);
             break;
         case 0x04:
-            if (set_cc)
-                gen_op_addl_T0_T1_cc();
-            else
-                gen_op_addl_T0_T1();
-            gen_movl_reg_T0(s, rd);
+            if (set_cc) {
+                gen_helper_add_cc(tmp, tmp, tmp2);
+            } else {
+                tcg_gen_add_i32(tmp, tmp, tmp2);
+            }
+            store_reg_bx(env, s, rd, tmp);
             break;
         case 0x05:
-            if (set_cc)
-                gen_op_adcl_T0_T1_cc();
-            else
-                gen_adc_T0_T1();
-            gen_movl_reg_T0(s, rd);
+            if (set_cc) {
+                gen_helper_adc_cc(tmp, tmp, tmp2);
+            } else {
+                gen_add_carry(tmp, tmp, tmp2);
+            }
+            store_reg_bx(env, s, rd, tmp);
             break;
         case 0x06:
-            if (set_cc)
-                gen_op_sbcl_T0_T1_cc();
-            else
-                gen_sbc_T0_T1();
-            gen_movl_reg_T0(s, rd);
+            if (set_cc) {
+                gen_helper_sbc_cc(tmp, tmp, tmp2);
+            } else {
+                gen_sub_carry(tmp, tmp, tmp2);
+            }
+            store_reg_bx(env, s, rd, tmp);
             break;
         case 0x07:
-            if (set_cc)
-                gen_op_rscl_T0_T1_cc();
-            else
-                gen_rsc_T0_T1();
-            gen_movl_reg_T0(s, rd);
+            if (set_cc) {
+                gen_helper_sbc_cc(tmp, tmp2, tmp);
+            } else {
+                gen_sub_carry(tmp, tmp2, tmp);
+            }
+            store_reg_bx(env, s, rd, tmp);
             break;
         case 0x08:
             if (set_cc) {
-                gen_op_andl_T0_T1();
-                gen_op_logic_T0_cc();
+                tcg_gen_and_i32(tmp, tmp, tmp2);
+                gen_logic_CC(tmp);
             }
+            dead_tmp(tmp);
             break;
         case 0x09:
             if (set_cc) {
-                gen_op_xorl_T0_T1();
-                gen_op_logic_T0_cc();
+                tcg_gen_xor_i32(tmp, tmp, tmp2);
+                gen_logic_CC(tmp);
             }
+            dead_tmp(tmp);
             break;
         case 0x0a:
             if (set_cc) {
-                gen_op_subl_T0_T1_cc();
+                gen_helper_sub_cc(tmp, tmp, tmp2);
             }
+            dead_tmp(tmp);
             break;
         case 0x0b:
             if (set_cc) {
-                gen_op_addl_T0_T1_cc();
+                gen_helper_add_cc(tmp, tmp, tmp2);
             }
+            dead_tmp(tmp);
             break;
         case 0x0c:
-            gen_op_orl_T0_T1();
-            gen_movl_reg_T0(s, rd);
-            if (logic_cc)
-                gen_op_logic_T0_cc();
+            tcg_gen_or_i32(tmp, tmp, tmp2);
+            if (logic_cc) {
+                gen_logic_CC(tmp);
+            }
+            store_reg_bx(env, s, rd, tmp);
             break;
         case 0x0d:
             if (logic_cc && rd == 15) {
                 /* MOVS r15, ... is used for exception return.  */
-                if (IS_USER(s))
+                if (IS_USER(s)) {
                     goto illegal_op;
-                gen_op_movl_T0_T1();
-                gen_exception_return(s);
+                }
+                gen_exception_return(s, tmp2);
             } else {
-                gen_movl_reg_T1(s, rd);
-                if (logic_cc)
-                    gen_op_logic_T1_cc();
+                if (logic_cc) {
+                    gen_logic_CC(tmp2);
+                }
+                store_reg_bx(env, s, rd, tmp2);
             }
             break;
         case 0x0e:
-            gen_op_bicl_T0_T1();
-            gen_movl_reg_T0(s, rd);
-            if (logic_cc)
-                gen_op_logic_T0_cc();
+            tcg_gen_bic_i32(tmp, tmp, tmp2);
+            if (logic_cc) {
+                gen_logic_CC(tmp);
+            }
+            store_reg_bx(env, s, rd, tmp);
             break;
         default:
         case 0x0f:
-            gen_op_notl_T1();
-            gen_movl_reg_T1(s, rd);
-            if (logic_cc)
-                gen_op_logic_T1_cc();
+            tcg_gen_not_i32(tmp2, tmp2);
+            if (logic_cc) {
+                gen_logic_CC(tmp2);
+            }
+            store_reg_bx(env, s, rd, tmp2);
             break;
         }
+        if (op1 != 0x0f && op1 != 0x0d) {
+            dead_tmp(tmp2);
+        }
     } else {
         /* other instructions */
         op1 = (insn >> 24) & 0xf;
@@ -6203,19 +6319,19 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                         tmp = load_reg(s, rs);
                         tmp2 = load_reg(s, rm);
                         if (insn & (1 << 22))
-                            tmp = gen_muls_i64_i32(tmp, tmp2);
+                            tmp64 = gen_muls_i64_i32(tmp, tmp2);
                         else
-                            tmp = gen_mulu_i64_i32(tmp, tmp2);
+                            tmp64 = gen_mulu_i64_i32(tmp, tmp2);
                         if (insn & (1 << 21)) /* mult accumulate */
-                            gen_addq(s, tmp, rn, rd);
+                            gen_addq(s, tmp64, rn, rd);
                         if (!(insn & (1 << 23))) { /* double accumulate */
                             ARCH(6);
-                            gen_addq_lo(s, tmp, rn);
-                            gen_addq_lo(s, tmp, rd);
+                            gen_addq_lo(s, tmp64, rn);
+                            gen_addq_lo(s, tmp64, rd);
                         }
                         if (insn & (1 << 20))
-                            gen_logicq_cc(tmp);
-                        gen_storeq_reg(s, rn, rd, tmp);
+                            gen_logicq_cc(tmp64);
+                        gen_storeq_reg(s, rn, rd, tmp64);
                         break;
                     }
                 } else {
@@ -6223,11 +6339,35 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                     rd = (insn >> 12) & 0xf;
                     if (insn & (1 << 23)) {
                         /* load/store exclusive */
+                        op1 = (insn >> 21) & 0x3;
+                        if (op1)
+                            ARCH(6K);
+                        else
+                            ARCH(6);
                         gen_movl_T1_reg(s, rn);
                         addr = cpu_T[1];
                         if (insn & (1 << 20)) {
                             gen_helper_mark_exclusive(cpu_env, cpu_T[1]);
-                            tmp = gen_ld32(addr, IS_USER(s));
+                            switch (op1) {
+                            case 0: /* ldrex */
+                                tmp = gen_ld32(addr, IS_USER(s));
+                                break;
+                            case 1: /* ldrexd */
+                                tmp = gen_ld32(addr, IS_USER(s));
+                                store_reg(s, rd, tmp);
+                                tcg_gen_addi_i32(addr, addr, 4);
+                                tmp = gen_ld32(addr, IS_USER(s));
+                                rd++;
+                                break;
+                            case 2: /* ldrexb */
+                                tmp = gen_ld8u(addr, IS_USER(s));
+                                break;
+                            case 3: /* ldrexh */
+                                tmp = gen_ld16u(addr, IS_USER(s));
+                                break;
+                            default:
+                                abort();
+                            }
                             store_reg(s, rd, tmp);
                         } else {
                             int label = gen_new_label();
@@ -6236,7 +6376,25 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                             tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0],
                                                 0, label);
                             tmp = load_reg(s,rm);
-                            gen_st32(tmp, cpu_T[1], IS_USER(s));
+                            switch (op1) {
+                            case 0:  /*  strex */
+                                gen_st32(tmp, addr, IS_USER(s));
+                                break;
+                            case 1: /*  strexd */
+                                gen_st32(tmp, addr, IS_USER(s));
+                                tcg_gen_addi_i32(addr, addr, 4);
+                                tmp = load_reg(s, rm + 1);
+                                gen_st32(tmp, addr, IS_USER(s));
+                                break;
+                            case 2: /*  strexb */
+                                gen_st8(tmp, addr, IS_USER(s));
+                                break;
+                            case 3: /* strexh */
+                                gen_st16(tmp, addr, IS_USER(s));
+                                break;
+                            default:
+                                abort();
+                            }
                             gen_set_label(label);
                             gen_movl_reg_T0(s, rd);
                         }
@@ -6459,7 +6617,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                             if (insn & (1 << 7))
                                 gen_rev16(tmp);
                             else
-                                tcg_gen_bswap_i32(tmp, tmp);
+                                tcg_gen_bswap32_i32(tmp, tmp);
                         }
                         store_reg(s, rd, tmp);
                     } else {
@@ -6471,14 +6629,14 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                     tmp2 = load_reg(s, rs);
                     if (insn & (1 << 20)) {
                         /* Signed multiply most significant [accumulate].  */
-                        tmp2 = gen_muls_i64_i32(tmp, tmp2);
+                        tmp64 = gen_muls_i64_i32(tmp, tmp2);
                         if (insn & (1 << 5))
-                            tcg_gen_addi_i64(tmp2, tmp2, 0x80000000u);
-                        tcg_gen_shri_i64(tmp2, tmp2, 32);
+                            tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
+                        tcg_gen_shri_i64(tmp64, tmp64, 32);
                         tmp = new_tmp();
-                        tcg_gen_trunc_i64_i32(tmp, tmp2);
-                        if (rn != 15) {
-                            tmp2 = load_reg(s, rn);
+                        tcg_gen_trunc_i64_i32(tmp, tmp64);
+                        if (rd != 15) {
+                            tmp2 = load_reg(s, rd);
                             if (insn & (1 << 6)) {
                                 tcg_gen_sub_i32(tmp, tmp, tmp2);
                             } else {
@@ -6486,7 +6644,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                             }
                             dead_tmp(tmp2);
                         }
-                        store_reg(s, rd, tmp);
+                        store_reg(s, rn, tmp);
                     } else {
                         if (insn & (1 << 5))
                             gen_swap_half(tmp2);
@@ -6500,11 +6658,11 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                         dead_tmp(tmp2);
                         if (insn & (1 << 22)) {
                             /* smlald, smlsld */
-                            tmp2 = tcg_temp_new(TCG_TYPE_I64);
-                            tcg_gen_ext_i32_i64(tmp2, tmp);
+                            tmp64 = tcg_temp_new_i64();
+                            tcg_gen_ext_i32_i64(tmp64, tmp);
                             dead_tmp(tmp);
-                            gen_addq(s, tmp2, rd, rn);
-                            gen_storeq_reg(s, rd, rn, tmp2);
+                            gen_addq(s, tmp64, rd, rn);
+                            gen_storeq_reg(s, rd, rn, tmp64);
                         } else {
                             /* smuad, smusd, smlad, smlsd */
                             if (rd != 15)
@@ -6526,12 +6684,12 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                         tmp2 = load_reg(s, rs);
                         gen_helper_usad8(tmp, tmp, tmp2);
                         dead_tmp(tmp2);
-                        if (rn != 15) {
-                            tmp2 = load_reg(s, rn);
+                        if (rd != 15) {
+                            tmp2 = load_reg(s, rd);
                             tcg_gen_add_i32(tmp, tmp, tmp2);
                             dead_tmp(tmp2);
                         }
-                        store_reg(s, rd, tmp);
+                        store_reg(s, rn, tmp);
                         break;
                     case 0x20: case 0x24: case 0x28: case 0x2c:
                         /* Bitfield insert/clear.  */
@@ -6554,6 +6712,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                         break;
                     case 0x12: case 0x16: case 0x1a: case 0x1e: /* sbfx */
                     case 0x32: case 0x36: case 0x3a: case 0x3e: /* ubfx */
+                        ARCH(6T2);
                         tmp = load_reg(s, rm);
                         shift = (insn >> 7) & 0x1f;
                         i = ((insn >> 16) & 0x1f) + 1;
@@ -6594,7 +6753,6 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                 gen_add_data_offset(s, insn, tmp2);
             if (insn & (1 << 20)) {
                 /* load */
-                s->is_mem = 1;
                 if (insn & (1 << 22)) {
                     tmp = gen_ld8u(tmp2, i);
                 } else {
@@ -6873,6 +7031,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
     TCGv tmp2;
     TCGv tmp3;
     TCGv addr;
+    TCGv_i64 tmp64;
     int op;
     int shiftop;
     int conds;
@@ -7213,7 +7372,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
             gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
             if (logic_cc)
                 gen_logic_CC(tmp);
-            store_reg(s, rd, tmp);
+            store_reg_bx(env, s, rd, tmp);
             break;
         case 1: /* Sign/zero extend.  */
             tmp = load_reg(s, rm);
@@ -7274,7 +7433,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                     gen_helper_rbit(tmp, tmp);
                     break;
                 case 0x08: /* rev */
-                    tcg_gen_bswap_i32(tmp, tmp);
+                    tcg_gen_bswap32_i32(tmp, tmp);
                     break;
                 case 0x09: /* rev16 */
                     gen_rev16(tmp);
@@ -7349,10 +7508,10 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                     tcg_gen_sari_i32(tmp2, tmp2, 16);
                 else
                     gen_sxth(tmp2);
-                tmp2 = gen_muls_i64_i32(tmp, tmp2);
-                tcg_gen_shri_i64(tmp2, tmp2, 16);
+                tmp64 = gen_muls_i64_i32(tmp, tmp2);
+                tcg_gen_shri_i64(tmp64, tmp64, 16);
                 tmp = new_tmp();
-                tcg_gen_trunc_i64_i32(tmp, tmp2);
+                tcg_gen_trunc_i64_i32(tmp, tmp64);
                 if (rs != 15)
                   {
                     tmp2 = load_reg(s, rs);
@@ -7416,36 +7575,38 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                     tcg_gen_add_i32(tmp, tmp, tmp2);
                 }
                 dead_tmp(tmp2);
-                tmp2 = tcg_temp_new(TCG_TYPE_I64);
-                gen_addq(s, tmp, rs, rd);
-                gen_storeq_reg(s, rs, rd, tmp);
+                /* BUGFIX */
+                tmp64 = tcg_temp_new_i64();
+                tcg_gen_ext_i32_i64(tmp64, tmp);
+                dead_tmp(tmp);
+                gen_addq(s, tmp64, rs, rd);
+                gen_storeq_reg(s, rs, rd, tmp64);
             } else {
                 if (op & 0x20) {
                     /* Unsigned 64-bit multiply  */
-                    tmp = gen_mulu_i64_i32(tmp, tmp2);
+                    tmp64 = gen_mulu_i64_i32(tmp, tmp2);
                 } else {
                     if (op & 8) {
                         /* smlalxy */
                         gen_mulxy(tmp, tmp2, op & 2, op & 1);
                         dead_tmp(tmp2);
-                        tmp2 = tcg_temp_new(TCG_TYPE_I64);
-                        tcg_gen_ext_i32_i64(tmp2, tmp);
+                        tmp64 = tcg_temp_new_i64();
+                        tcg_gen_ext_i32_i64(tmp64, tmp);
                         dead_tmp(tmp);
-                        tmp = tmp2;
                     } else {
                         /* Signed 64-bit multiply  */
-                        tmp = gen_muls_i64_i32(tmp, tmp2);
+                        tmp64 = gen_muls_i64_i32(tmp, tmp2);
                     }
                 }
                 if (op & 4) {
                     /* umaal */
-                    gen_addq_lo(s, tmp, rs);
-                    gen_addq_lo(s, tmp, rd);
+                    gen_addq_lo(s, tmp64, rs);
+                    gen_addq_lo(s, tmp64, rd);
                 } else if (op & 0x40) {
                     /* 64-bit accumulate.  */
-                    gen_addq(s, tmp, rs, rd);
+                    gen_addq(s, tmp64, rs, rd);
                 }
-                gen_storeq_reg(s, rs, rd, tmp);
+                gen_storeq_reg(s, rs, rd, tmp64);
             }
             break;
         }
@@ -8406,7 +8567,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
             rd = insn & 0x7;
             tmp = load_reg(s, rn);
             switch ((insn >> 6) & 3) {
-            case 0: tcg_gen_bswap_i32(tmp, tmp); break;
+            case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
             case 1: gen_rev16(tmp); break;
             case 3: gen_revsh(tmp); break;
             default: goto illegal_op;
@@ -8542,6 +8703,7 @@ static inline void gen_intermediate_code_internal(CPUState *env,
                                                   int search_pc)
 {
     DisasContext dc1, *dc = &dc1;
+    CPUBreakpoint *bp;
     uint16_t *gen_opc_end;
     int j, lj;
     target_ulong pc_start;
@@ -8566,7 +8728,6 @@ static inline void gen_intermediate_code_internal(CPUState *env,
     dc->thumb = env->thumb;
     dc->condexec_mask = (env->condexec_bits & 0xf) << 1;
     dc->condexec_cond = env->condexec_bits >> 4;
-    dc->is_mem = 0;
 #if !defined(CONFIG_USER_ONLY)
     if (IS_M(env)) {
         dc->user = ((env->v7m.exception == 0) && (env->v7m.control & 1));
@@ -8574,14 +8735,14 @@ static inline void gen_intermediate_code_internal(CPUState *env,
         dc->user = (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_USR;
     }
 #endif
-    cpu_F0s = tcg_temp_new(TCG_TYPE_I32);
-    cpu_F1s = tcg_temp_new(TCG_TYPE_I32);
-    cpu_F0d = tcg_temp_new(TCG_TYPE_I64);
-    cpu_F1d = tcg_temp_new(TCG_TYPE_I64);
+    cpu_F0s = tcg_temp_new_i32();
+    cpu_F1s = tcg_temp_new_i32();
+    cpu_F0d = tcg_temp_new_i64();
+    cpu_F1d = tcg_temp_new_i64();
     cpu_V0 = cpu_F0d;
     cpu_V1 = cpu_F1d;
     /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
-    cpu_M0 = tcg_temp_new(TCG_TYPE_I64);
+    cpu_M0 = tcg_temp_new_i64();
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
     lj = -1;
     num_insns = 0;
@@ -8618,9 +8779,9 @@ static inline void gen_intermediate_code_internal(CPUState *env,
         }
 #endif
 
-        if (env->nb_breakpoints > 0) {
-            for(j = 0; j < env->nb_breakpoints; j++) {
-                if (env->breakpoints[j] == dc->pc) {
+        if (unlikely(!TAILQ_EMPTY(&env->breakpoints))) {
+            TAILQ_FOREACH(bp, &env->breakpoints, entry) {
+                if (bp->pc == dc->pc) {
                     gen_set_condexec(dc);
                     gen_set_pc_im(dc->pc);
                     gen_exception(EXCP_DEBUG);
@@ -8670,19 +8831,14 @@ static inline void gen_intermediate_code_internal(CPUState *env,
             gen_set_label(dc->condlabel);
             dc->condjmp = 0;
         }
-        /* Terminate the TB on memory ops if watchpoints are present.  */
-        /* FIXME: This should be replacd by the deterministic execution
-         * IRQ raising bits.  */
-        if (dc->is_mem && env->nb_watchpoints)
-            break;
-
-        /* Translation stops when a conditional branch is enoutered.
+        /* Translation stops when a conditional branch is encountered.
          * Otherwise the subsequent code could get translated several times.
          * Also stop translation when a page boundary is reached.  This
          * ensures prefetch aborts occur at the right place.  */
         num_insns ++;
     } while (!dc->is_jmp && gen_opc_ptr < gen_opc_end &&
              !env->singlestep_enabled &&
+             !singlestep &&
              dc->pc < next_page_start &&
              num_insns < max_insns);
 
@@ -8764,11 +8920,11 @@ done_generating:
     *gen_opc_ptr = INDEX_op_end;
 
 #ifdef DEBUG_DISAS
-    if (loglevel & CPU_LOG_TB_IN_ASM) {
-        fprintf(logfile, "----------------\n");
-        fprintf(logfile, "IN: %s\n", lookup_symbol(pc_start));
-        target_disas(logfile, pc_start, dc->pc - pc_start, env->thumb);
-        fprintf(logfile, "\n");
+    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
+        qemu_log("----------------\n");
+        qemu_log("IN: %s\n", lookup_symbol(pc_start));
+        log_target_disas(pc_start, dc->pc - pc_start, env->thumb);
+        qemu_log("\n");
     }
 #endif
     if (search_pc) {