vvfat: one more missing BlockDriver C99 initializer conversion
[qemu] / target-arm / translate.c
index 711676f..adac19a 100644 (file)
@@ -17,7 +17,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
  */
 #include <stdarg.h>
 #include <stdlib.h>
@@ -29,7 +29,9 @@
 #include "exec-all.h"
 #include "disas.h"
 #include "tcg-op.h"
+#include "qemu-log.h"
 
+#include "helpers.h"
 #define GEN_HELPER 1
 #include "helpers.h"
 
@@ -39,7 +41,7 @@
 #define ENABLE_ARCH_6T2   arm_feature(env, ARM_FEATURE_THUMB2)
 #define ENABLE_ARCH_7     arm_feature(env, ARM_FEATURE_V7)
 
-#define ARCH(x) if (!ENABLE_ARCH_##x) goto illegal_op;
+#define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
 
 /* internal defines */
 typedef struct DisasContext {
@@ -55,7 +57,6 @@ typedef struct DisasContext {
     struct TranslationBlock *tb;
     int singlestep_enabled;
     int thumb;
-    int is_mem;
 #if !defined(CONFIG_USER_ONLY)
     int user;
 #endif
@@ -72,25 +73,28 @@ typedef struct DisasContext {
 #define DISAS_WFI 4
 #define DISAS_SWI 5
 
-/* XXX: move that elsewhere */
-extern FILE *logfile;
-extern int loglevel;
-
-static TCGv cpu_env;
+static TCGv_ptr cpu_env;
 /* We reuse the same 64-bit temporaries for efficiency.  */
-static TCGv cpu_V0, cpu_V1, cpu_M0;
+static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
 
 /* FIXME:  These should be removed.  */
 static TCGv cpu_T[2];
-static TCGv cpu_F0s, cpu_F1s, cpu_F0d, cpu_F1d;
+static TCGv cpu_F0s, cpu_F1s;
+static TCGv_i64 cpu_F0d, cpu_F1d;
+
+#define ICOUNT_TEMP cpu_T[0]
+#include "gen-icount.h"
 
 /* initialize TCG globals.  */
 void arm_translate_init(void)
 {
-    cpu_env = tcg_global_reg_new(TCG_TYPE_PTR, TCG_AREG0, "env");
+    cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
+
+    cpu_T[0] = tcg_global_reg_new_i32(TCG_AREG1, "T0");
+    cpu_T[1] = tcg_global_reg_new_i32(TCG_AREG2, "T1");
 
-    cpu_T[0] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG1, "T0");
-    cpu_T[1] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG2, "T1");
+#define GEN_HELPER 2
+#include "helpers.h"
 }
 
 /* The code generator doesn't like lots of temporaries, so maintain our own
@@ -100,16 +104,16 @@ static int num_temps;
 static TCGv temps[MAX_TEMPS];
 
 /* Allocate a temporary variable.  */
-static TCGv new_tmp(void)
+static TCGv_i32 new_tmp(void)
 {
     TCGv tmp;
     if (num_temps == MAX_TEMPS)
         abort();
 
-    if (GET_TCGV(temps[num_temps]))
+    if (GET_TCGV_I32(temps[num_temps]))
       return temps[num_temps++];
 
-    tmp = tcg_temp_new(TCG_TYPE_I32);
+    tmp = tcg_temp_new_i32();
     temps[num_temps++] = tmp;
     return tmp;
 }
@@ -120,11 +124,11 @@ static void dead_tmp(TCGv tmp)
     int i;
     num_temps--;
     i = num_temps;
-    if (GET_TCGV(temps[i]) == GET_TCGV(tmp))
+    if (TCGV_EQUAL(temps[i], tmp))
         return;
 
     /* Shuffle this temp to the last slot.  */
-    while (GET_TCGV(temps[i]) != GET_TCGV(tmp))
+    while (!TCGV_EQUAL(temps[i], tmp))
         i--;
     while (i < num_temps) {
         temps[i] = temps[i + 1];
@@ -190,7 +194,6 @@ static void store_reg(DisasContext *s, int reg, TCGv var)
 
 /* Basic operations.  */
 #define gen_op_movl_T0_T1() tcg_gen_mov_i32(cpu_T[0], cpu_T[1])
-#define gen_op_movl_T1_T0() tcg_gen_mov_i32(cpu_T[1], cpu_T[0])
 #define gen_op_movl_T0_im(im) tcg_gen_movi_i32(cpu_T[0], im)
 #define gen_op_movl_T1_im(im) tcg_gen_movi_i32(cpu_T[1], im)
 
@@ -204,7 +207,6 @@ static void store_reg(DisasContext *s, int reg, TCGv var)
 #define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_sbcl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_rsbl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[1], cpu_T[0])
-#define gen_op_rscl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[1], cpu_T[0])
 
 #define gen_op_andl_T0_T1() tcg_gen_and_i32(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_op_xorl_T0_T1() tcg_gen_xor_i32(cpu_T[0], cpu_T[0], cpu_T[1])
@@ -214,11 +216,8 @@ static void store_reg(DisasContext *s, int reg, TCGv var)
 #define gen_op_logic_T0_cc() gen_logic_CC(cpu_T[0]);
 #define gen_op_logic_T1_cc() gen_logic_CC(cpu_T[1]);
 
-#define gen_op_shll_T0_im(im) tcg_gen_shli_i32(cpu_T[0], cpu_T[0], im)
 #define gen_op_shll_T1_im(im) tcg_gen_shli_i32(cpu_T[1], cpu_T[1], im)
 #define gen_op_shrl_T1_im(im) tcg_gen_shri_i32(cpu_T[1], cpu_T[1], im)
-#define gen_op_sarl_T1_im(im) tcg_gen_sari_i32(cpu_T[1], cpu_T[1], im)
-#define gen_op_rorl_T1_im(im) tcg_gen_rori_i32(cpu_T[1], cpu_T[1], im)
 
 /* Value extensions.  */
 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
@@ -247,8 +246,8 @@ static void gen_smul_dual(TCGv a, TCGv b)
 {
     TCGv tmp1 = new_tmp();
     TCGv tmp2 = new_tmp();
-    tcg_gen_ext8s_i32(tmp1, a);
-    tcg_gen_ext8s_i32(tmp2, b);
+    tcg_gen_ext16s_i32(tmp1, a);
+    tcg_gen_ext16s_i32(tmp2, b);
     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
     dead_tmp(tmp2);
     tcg_gen_sari_i32(a, a, 16);
@@ -324,10 +323,10 @@ static void gen_roundqd(TCGv a, TCGv b)
 /* FIXME: Most targets have native widening multiplication.
    It would be good to use that instead of a full wide multiply.  */
 /* 32x32->64 multiply.  Marks inputs as dead.  */
-static TCGv gen_mulu_i64_i32(TCGv a, TCGv b)
+static TCGv_i64 gen_mulu_i64_i32(TCGv a, TCGv b)
 {
-    TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
-    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
+    TCGv_i64 tmp1 = tcg_temp_new_i64();
+    TCGv_i64 tmp2 = tcg_temp_new_i64();
 
     tcg_gen_extu_i32_i64(tmp1, a);
     dead_tmp(a);
@@ -337,10 +336,10 @@ static TCGv gen_mulu_i64_i32(TCGv a, TCGv b)
     return tmp1;
 }
 
-static TCGv gen_muls_i64_i32(TCGv a, TCGv b)
+static TCGv_i64 gen_muls_i64_i32(TCGv a, TCGv b)
 {
-    TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
-    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
+    TCGv_i64 tmp1 = tcg_temp_new_i64();
+    TCGv_i64 tmp2 = tcg_temp_new_i64();
 
     tcg_gen_ext_i32_i64(tmp1, a);
     dead_tmp(a);
@@ -353,8 +352,8 @@ static TCGv gen_muls_i64_i32(TCGv a, TCGv b)
 /* Unsigned 32x32->64 multiply.  */
 static void gen_op_mull_T0_T1(void)
 {
-    TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
-    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
+    TCGv_i64 tmp1 = tcg_temp_new_i64();
+    TCGv_i64 tmp2 = tcg_temp_new_i64();
 
     tcg_gen_extu_i32_i64(tmp1, cpu_T[0]);
     tcg_gen_extu_i32_i64(tmp2, cpu_T[1]);
@@ -367,8 +366,8 @@ static void gen_op_mull_T0_T1(void)
 /* Signed 32x32->64 multiply.  */
 static void gen_imull(TCGv a, TCGv b)
 {
-    TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64);
-    TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
+    TCGv_i64 tmp1 = tcg_temp_new_i64();
+    TCGv_i64 tmp2 = tcg_temp_new_i64();
 
     tcg_gen_ext_i32_i64(tmp1, a);
     tcg_gen_ext_i32_i64(tmp2, b);
@@ -377,7 +376,6 @@ static void gen_imull(TCGv a, TCGv b)
     tcg_gen_shri_i64(tmp1, tmp1, 32);
     tcg_gen_trunc_i64_i32(b, tmp1);
 }
-#define gen_op_imull_T0_T1() gen_imull(cpu_T[0], cpu_T[1])
 
 /* Swap low and high halfwords.  */
 static void gen_swap_half(TCGv var)
@@ -416,7 +414,7 @@ static void gen_set_CF_bit31(TCGv var)
 {
     TCGv tmp = new_tmp();
     tcg_gen_shri_i32(tmp, var, 31);
-    gen_set_CF(var);
+    gen_set_CF(tmp);
     dead_tmp(tmp);
 }
 
@@ -437,6 +435,16 @@ static void gen_adc_T0_T1(void)
     dead_tmp(tmp);
 }
 
+/* dest = T0 + T1 + CF. */
+static void gen_add_carry(TCGv dest, TCGv t0, TCGv t1)
+{
+    TCGv tmp;
+    tcg_gen_add_i32(dest, t0, t1);
+    tmp = load_cpu_field(CF);
+    tcg_gen_add_i32(dest, dest, tmp);
+    dead_tmp(tmp);
+}
+
 /* dest = T0 - T1 + CF - 1.  */
 static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
 {
@@ -451,18 +459,6 @@ static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
 #define gen_sbc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[0], cpu_T[1])
 #define gen_rsc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[1], cpu_T[0])
 
-/* FIXME:  Implement this natively.  */
-static inline void tcg_gen_not_i32(TCGv t0, TCGv t1)
-{
-    tcg_gen_xori_i32(t0, t1, ~0);
-}
-
-/* FIXME:  Implement this natively.  */
-static inline void tcg_gen_neg_i64(TCGv dest, TCGv src)
-{
-    tcg_gen_sub_i64(dest, tcg_const_i64(0), src);
-}
-
 /* T0 &= ~T1.  Clobbers T1.  */
 /* FIXME: Implement bic natively.  */
 static inline void tcg_gen_bic_i32(TCGv dest, TCGv t0, TCGv t1)
@@ -503,7 +499,7 @@ static void shifter_out_im(TCGv var, int shift)
         tcg_gen_andi_i32(tmp, var, 1);
     } else {
         tcg_gen_shri_i32(tmp, var, shift);
-        if (shift != 31);
+        if (shift != 31)
             tcg_gen_andi_i32(tmp, tmp, 1);
     }
     gen_set_CF(tmp);
@@ -592,17 +588,17 @@ static inline void gen_arm_shift_reg(TCGv var, int shiftop,
     }
 static void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
 {
-    TCGv tmp;
+    TCGv_ptr tmp;
 
     switch (op1) {
 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
     case 1:
-        tmp = tcg_temp_new(TCG_TYPE_PTR);
+        tmp = tcg_temp_new_ptr();
         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
         PAS_OP(s)
         break;
     case 5:
-        tmp = tcg_temp_new(TCG_TYPE_PTR);
+        tmp = tcg_temp_new_ptr();
         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
         PAS_OP(u)
         break;
@@ -637,17 +633,17 @@ static void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
     }
 static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
 {
-    TCGv tmp;
+    TCGv_ptr tmp;
 
     switch (op1) {
 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
     case 0:
-        tmp = tcg_temp_new(TCG_TYPE_PTR);
+        tmp = tcg_temp_new_ptr();
         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
         PAS_OP(s)
         break;
     case 4:
-        tmp = tcg_temp_new(TCG_TYPE_PTR);
+        tmp = tcg_temp_new_ptr();
         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
         PAS_OP(u)
         break;
@@ -674,94 +670,92 @@ static void gen_test_cc(int cc, int label)
 {
     TCGv tmp;
     TCGv tmp2;
-    TCGv zero;
     int inv;
 
-    zero = tcg_const_i32(0);
     switch (cc) {
     case 0: /* eq: Z */
         tmp = load_cpu_field(ZF);
-        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
         break;
     case 1: /* ne: !Z */
         tmp = load_cpu_field(ZF);
-        tcg_gen_brcond_i32(TCG_COND_NE, tmp, zero, label);
+        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
         break;
     case 2: /* cs: C */
         tmp = load_cpu_field(CF);
-        tcg_gen_brcond_i32(TCG_COND_NE, tmp, zero, label);
+        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
         break;
     case 3: /* cc: !C */
         tmp = load_cpu_field(CF);
-        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
         break;
     case 4: /* mi: N */
         tmp = load_cpu_field(NF);
-        tcg_gen_brcond_i32(TCG_COND_LT, tmp, zero, label);
+        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
         break;
     case 5: /* pl: !N */
         tmp = load_cpu_field(NF);
-        tcg_gen_brcond_i32(TCG_COND_GE, tmp, zero, label);
+        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
         break;
     case 6: /* vs: V */
         tmp = load_cpu_field(VF);
-        tcg_gen_brcond_i32(TCG_COND_LT, tmp, zero, label);
+        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
         break;
     case 7: /* vc: !V */
         tmp = load_cpu_field(VF);
-        tcg_gen_brcond_i32(TCG_COND_GE, tmp, zero, label);
+        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
         break;
     case 8: /* hi: C && !Z */
         inv = gen_new_label();
         tmp = load_cpu_field(CF);
-        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, inv);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
         dead_tmp(tmp);
         tmp = load_cpu_field(ZF);
-        tcg_gen_brcond_i32(TCG_COND_NE, tmp, zero, label);
+        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
         gen_set_label(inv);
         break;
     case 9: /* ls: !C || Z */
         tmp = load_cpu_field(CF);
-        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
         dead_tmp(tmp);
         tmp = load_cpu_field(ZF);
-        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
         break;
     case 10: /* ge: N == V -> N ^ V == 0 */
         tmp = load_cpu_field(VF);
         tmp2 = load_cpu_field(NF);
         tcg_gen_xor_i32(tmp, tmp, tmp2);
         dead_tmp(tmp2);
-        tcg_gen_brcond_i32(TCG_COND_GE, tmp, zero, label);
+        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
         break;
     case 11: /* lt: N != V -> N ^ V != 0 */
         tmp = load_cpu_field(VF);
         tmp2 = load_cpu_field(NF);
         tcg_gen_xor_i32(tmp, tmp, tmp2);
         dead_tmp(tmp2);
-        tcg_gen_brcond_i32(TCG_COND_LT, tmp, zero, label);
+        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
         break;
     case 12: /* gt: !Z && N == V */
         inv = gen_new_label();
         tmp = load_cpu_field(ZF);
-        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, inv);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
         dead_tmp(tmp);
         tmp = load_cpu_field(VF);
         tmp2 = load_cpu_field(NF);
         tcg_gen_xor_i32(tmp, tmp, tmp2);
         dead_tmp(tmp2);
-        tcg_gen_brcond_i32(TCG_COND_GE, tmp, zero, label);
+        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
         gen_set_label(inv);
         break;
     case 13: /* le: Z || N != V */
         tmp = load_cpu_field(ZF);
-        tcg_gen_brcond_i32(TCG_COND_EQ, tmp, zero, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
         dead_tmp(tmp);
         tmp = load_cpu_field(VF);
         tmp2 = load_cpu_field(NF);
         tcg_gen_xor_i32(tmp, tmp, tmp2);
         dead_tmp(tmp2);
-        tcg_gen_brcond_i32(TCG_COND_LT, tmp, zero, label);
+        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
         break;
     default:
         fprintf(stderr, "Bad condition code 0x%x\n", cc);
@@ -770,7 +764,7 @@ static void gen_test_cc(int cc, int label)
     dead_tmp(tmp);
 }
 
-const uint8_t table_logic_cc[16] = {
+static const uint8_t table_logic_cc[16] = {
     1, /* and */
     1, /* xor */
     0, /* sub */
@@ -826,17 +820,19 @@ static inline void gen_bx_T0(DisasContext *s)
     gen_bx(s, tmp);
 }
 
-#if defined(CONFIG_USER_ONLY)
-#define gen_ldst(name, s) gen_op_##name##_raw()
-#else
-#define gen_ldst(name, s) do { \
-    s->is_mem = 1; \
-    if (IS_USER(s)) \
-        gen_op_##name##_user(); \
-    else \
-        gen_op_##name##_kernel(); \
-    } while (0)
-#endif
+/* Variant of store_reg which uses branch&exchange logic when storing
+   to r15 in ARM architecture v7 and above. The source must be a temporary
+   and will be marked as dead. */
+static inline void store_reg_bx(CPUState *env, DisasContext *s,
+                                int reg, TCGv var)
+{
+    if (reg == 15 && ENABLE_ARCH_7) {
+        gen_bx(s, var);
+    } else {
+        store_reg(s, reg, var);
+    }
+}
+
 static inline TCGv gen_ld8s(TCGv addr, int index)
 {
     TCGv tmp = new_tmp();
@@ -1004,15 +1000,6 @@ static inline void gen_vfp_##name(int dp)                             \
         gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, cpu_env); \
 }
 
-#define VFP_OP1(name)                               \
-static inline void gen_vfp_##name(int dp, int arg)  \
-{                                                   \
-    if (dp)                                         \
-        gen_op_vfp_##name##d(arg);                  \
-    else                                            \
-        gen_op_vfp_##name##s(arg);                  \
-}
-
 VFP_OP2(add)
 VFP_OP2(sub)
 VFP_OP2(mul)
@@ -1195,12 +1182,12 @@ static void neon_store_reg(int reg, int pass, TCGv var)
     dead_tmp(var);
 }
 
-static inline void neon_load_reg64(TCGv var, int reg)
+static inline void neon_load_reg64(TCGv_i64 var, int reg)
 {
     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
 }
 
-static inline void neon_store_reg64(TCGv var, int reg)
+static inline void neon_store_reg64(TCGv_i64 var, int reg)
 {
     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
 }
@@ -1236,12 +1223,12 @@ static inline void gen_mov_vreg_F0(int dp, int reg)
 
 #define ARM_CP_RW_BIT  (1 << 20)
 
-static inline void iwmmxt_load_reg(TCGv var, int reg)
+static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
 {
     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUState, iwmmxt.regs[reg]));
 }
 
-static inline void iwmmxt_store_reg(TCGv var, int reg)
+static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
 {
     tcg_gen_st_i64(var, cpu_env, offsetof(CPUState, iwmmxt.regs[reg]));
 }
@@ -1461,10 +1448,7 @@ static void gen_iwmmxt_movl_T0_T1_wRn(int rn)
 
 static void gen_iwmmxt_movl_wRn_T0_T1(int rn)
 {
-    tcg_gen_extu_i32_i64(cpu_V0, cpu_T[0]);
-    tcg_gen_extu_i32_i64(cpu_V1, cpu_T[0]);
-    tcg_gen_shli_i64(cpu_V1, cpu_V1, 32);
-    tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
+    tcg_gen_concat_i32_i64(cpu_V0, cpu_T[0], cpu_T[1]);
     iwmmxt_store_reg(cpu_V0, rn);
 }
 
@@ -2579,6 +2563,7 @@ static int disas_cp_insn(CPUState *env, DisasContext *s, uint32_t insn)
         gen_set_pc_im(s->pc);
         tmp = load_reg(s, rd);
         gen_helper_set_cp(cpu_env, tcg_const_i32(insn), tmp);
+        dead_tmp(tmp);
     }
     return 0;
 }
@@ -2820,10 +2805,12 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         } else if (size == 1) {
                             gen_neon_dup_low16(tmp);
                         }
-                        tmp2 = new_tmp();
-                        tcg_gen_mov_i32(tmp2, tmp);
-                        neon_store_reg(rn, 0, tmp2);
-                        neon_store_reg(rn, 0, tmp);
+                        for (n = 0; n <= pass * 2; n++) {
+                            tmp2 = new_tmp();
+                            tcg_gen_mov_i32(tmp2, tmp);
+                            neon_store_reg(rn, n, tmp2);
+                        }
+                        neon_store_reg(rn, n, tmp);
                     } else {
                         /* VMOV */
                         switch (size) {
@@ -3038,6 +3025,10 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 case 21:
                 case 22:
                 case 23:
+                case 28:
+                case 29:
+                case 30:
+                case 31:
                     /* Source and destination the same.  */
                     gen_mov_F0_vreg(dp, rd);
                     break;
@@ -3073,9 +3064,9 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     break;
                 case 3: /* nmsc: -fd - (fn * fm)  */
                     gen_vfp_mul(dp);
-                    gen_mov_F1_vreg(dp, rd);
-                    gen_vfp_add(dp);
                     gen_vfp_neg(dp);
+                    gen_mov_F1_vreg(dp, rd);
+                    gen_vfp_sub(dp);
                     break;
                 case 4: /* mul: fn * fm */
                     gen_vfp_mul(dp);
@@ -3157,22 +3148,22 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     case 20: /* fshto */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_shto(dp, rm);
+                        gen_vfp_shto(dp, 16 - rm);
                         break;
                     case 21: /* fslto */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_slto(dp, rm);
+                        gen_vfp_slto(dp, 32 - rm);
                         break;
                     case 22: /* fuhto */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_uhto(dp, rm);
+                        gen_vfp_uhto(dp, 16 - rm);
                         break;
                     case 23: /* fulto */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_ulto(dp, rm);
+                        gen_vfp_ulto(dp, 32 - rm);
                         break;
                     case 24: /* ftoui */
                         gen_vfp_toui(dp);
@@ -3189,22 +3180,22 @@ static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     case 28: /* ftosh */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_tosh(dp, rm);
+                        gen_vfp_tosh(dp, 16 - rm);
                         break;
                     case 29: /* ftosl */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_tosl(dp, rm);
+                        gen_vfp_tosl(dp, 32 - rm);
                         break;
                     case 30: /* ftouh */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_touh(dp, rm);
+                        gen_vfp_touh(dp, 16 - rm);
                         break;
                     case 31: /* ftoul */
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
-                        gen_vfp_toul(dp, rm);
+                        gen_vfp_toul(dp, 32 - rm);
                         break;
                     default: /* undefined */
                         printf ("rn:%d\n", rn);
@@ -3403,7 +3394,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint32_t dest)
 
 static inline void gen_jmp (DisasContext *s, uint32_t dest)
 {
-    if (__builtin_expect(s->singlestep_enabled, 0)) {
+    if (unlikely(s->singlestep_enabled)) {
         /* An indirect jump so that we still trigger the debug exception.  */
         if (s->thumb)
             dest |= 1;
@@ -3477,11 +3468,11 @@ static int gen_set_psr_T0(DisasContext *s, uint32_t mask, int spsr)
     return 0;
 }
 
-/* Generate an old-style exception return.  */
-static void gen_exception_return(DisasContext *s)
+/* Generate an old-style exception return. Marks pc as dead. */
+static void gen_exception_return(DisasContext *s, TCGv pc)
 {
     TCGv tmp;
-    gen_movl_reg_T0(s, 15);
+    store_reg(s, 15, pc);
     tmp = load_cpu_field(spsr);
     gen_set_cpsr(tmp, 0xffffffff);
     dead_tmp(tmp);
@@ -3767,6 +3758,7 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     }
                 } else /* size == 0 */ {
                     if (load) {
+                        TCGV_UNUSED(tmp2);
                         for (n = 0; n < 4; n++) {
                             tmp = gen_ld8u(cpu_T[1], IS_USER(s));
                             gen_op_addl_T1_im(stride);
@@ -3822,12 +3814,14 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     break;
                 case 3:
                     return 1;
+                default: /* Avoid compiler warnings.  */
+                    abort();
                 }
                 gen_op_addl_T1_im(1 << size);
                 tmp2 = new_tmp();
                 tcg_gen_mov_i32(tmp2, tmp);
                 neon_store_reg(rd, 0, tmp2);
-                neon_store_reg(rd, 0, tmp);
+                neon_store_reg(rd, 1, tmp);
                 rd += stride;
             }
             stride = (1 << size) * nregs;
@@ -3864,6 +3858,8 @@ static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     case 2:
                         tmp = gen_ld32(cpu_T[1], IS_USER(s));
                         break;
+                    default: /* Avoid compiler warnings.  */
+                        abort();
                     }
                     if (size != 2) {
                         tmp2 = neon_load_reg(rd, pass);
@@ -3918,7 +3914,7 @@ static void gen_neon_bsl(TCGv dest, TCGv t, TCGv f, TCGv c)
     tcg_gen_or_i32(dest, t, f);
 }
 
-static inline void gen_neon_narrow(int size, TCGv dest, TCGv src)
+static inline void gen_neon_narrow(int size, TCGv dest, TCGv_i64 src)
 {
     switch (size) {
     case 0: gen_helper_neon_narrow_u8(dest, src); break;
@@ -3928,7 +3924,7 @@ static inline void gen_neon_narrow(int size, TCGv dest, TCGv src)
     }
 }
 
-static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv src)
+static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv_i64 src)
 {
     switch (size) {
     case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
@@ -3938,7 +3934,7 @@ static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv src)
     }
 }
 
-static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv src)
+static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv_i64 src)
 {
     switch (size) {
     case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
@@ -3982,7 +3978,7 @@ static inline void gen_neon_shift_narrow(int size, TCGv var, TCGv shift,
     }
 }
 
-static inline void gen_neon_widen(TCGv dest, TCGv src, int size, int u)
+static inline void gen_neon_widen(TCGv_i64 dest, TCGv src, int size, int u)
 {
     if (u) {
         switch (size) {
@@ -4022,7 +4018,7 @@ static inline void gen_neon_subl(int size)
     }
 }
 
-static inline void gen_neon_negl(TCGv var, int size)
+static inline void gen_neon_negl(TCGv_i64 var, int size)
 {
     switch (size) {
     case 0: gen_helper_neon_negl_u16(var, var); break;
@@ -4032,7 +4028,7 @@ static inline void gen_neon_negl(TCGv var, int size)
     }
 }
 
-static inline void gen_neon_addl_saturate(TCGv op0, TCGv op1, int size)
+static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
 {
     switch (size) {
     case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
@@ -4041,9 +4037,9 @@ static inline void gen_neon_addl_saturate(TCGv op0, TCGv op1, int size)
     }
 }
 
-static inline void gen_neon_mull(TCGv dest, TCGv a, TCGv b, int size, int u)
+static inline void gen_neon_mull(TCGv_i64 dest, TCGv a, TCGv b, int size, int u)
 {
-    TCGv tmp;
+    TCGv_i64 tmp;
 
     switch ((size << 1) | u) {
     case 0: gen_helper_neon_mull_s8(dest, a, b); break;
@@ -4087,6 +4083,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
     TCGv tmp;
     TCGv tmp2;
     TCGv tmp3;
+    TCGv_i64 tmp64;
 
     if (!vfp_enabled(env))
       return 1;
@@ -4643,12 +4640,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     imm = (uint16_t)shift;
                     imm |= imm << 16;
                     tmp2 = tcg_const_i32(imm);
+                    TCGV_UNUSED_I64(tmp64);
                     break;
                 case 2:
                     imm = (uint32_t)shift;
                     tmp2 = tcg_const_i32(imm);
+                    TCGV_UNUSED_I64(tmp64);
+                    break;
                 case 3:
-                    tmp2 = tcg_const_i64(shift);
+                    tmp64 = tcg_const_i64(shift);
+                    TCGV_UNUSED(tmp2);
                     break;
                 default:
                     abort();
@@ -4659,26 +4660,23 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         neon_load_reg64(cpu_V0, rm + pass);
                         if (q) {
                           if (u)
-                            gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, tmp2);
+                            gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, tmp64);
                           else
-                            gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, tmp2);
+                            gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, tmp64);
                         } else {
                           if (u)
-                            gen_helper_neon_shl_u64(cpu_V0, cpu_V0, tmp2);
+                            gen_helper_neon_shl_u64(cpu_V0, cpu_V0, tmp64);
                           else
-                            gen_helper_neon_shl_s64(cpu_V0, cpu_V0, tmp2);
+                            gen_helper_neon_shl_s64(cpu_V0, cpu_V0, tmp64);
                         }
                     } else {
                         tmp = neon_load_reg(rm + pass, 0);
                         gen_neon_shift_narrow(size, tmp, tmp2, q, u);
-                        tcg_gen_extu_i32_i64(cpu_V0, tmp);
+                        tmp3 = neon_load_reg(rm + pass, 1);
+                        gen_neon_shift_narrow(size, tmp3, tmp2, q, u);
+                        tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
                         dead_tmp(tmp);
-                        tmp = neon_load_reg(rm + pass, 1);
-                        gen_neon_shift_narrow(size, tmp, tmp2, q, u);
-                        tcg_gen_extu_i32_i64(cpu_V1, tmp);
-                        dead_tmp(tmp);
-                        tcg_gen_shli_i64(cpu_V1, cpu_V1, 32);
-                        tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
+                        dead_tmp(tmp3);
                     }
                     tmp = new_tmp();
                     if (op == 8 && !u) {
@@ -4822,7 +4820,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 neon_store_reg(rd, pass, tmp);
             }
         }
-    } else { /* (insn & 0x00800010 == 0x00800010) */
+    } else { /* (insn & 0x00800010 == 0x00800000) */
         if (size != 3) {
             op = (insn >> 8) & 0xf;
             if ((insn & (1 << 6)) == 0) {
@@ -4866,9 +4864,11 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     NEON_GET_REG(T0, rn, 1);
                     gen_neon_movl_scratch_T0(2);
                 }
+                TCGV_UNUSED(tmp3);
                 for (pass = 0; pass < 2; pass++) {
                     if (src1_wide) {
                         neon_load_reg64(cpu_V0, rn + pass);
+                        TCGV_UNUSED(tmp);
                     } else {
                         if (pass == 1 && rd == rn) {
                             gen_neon_movl_T0_scratch(2);
@@ -4883,6 +4883,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                     }
                     if (src2_wide) {
                         neon_load_reg64(cpu_V1, rm + pass);
+                        TCGV_UNUSED(tmp2);
                     } else {
                         if (pass == 1 && rd == rm) {
                             gen_neon_movl_T0_scratch(2);
@@ -5141,16 +5142,16 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         neon_load_reg64(cpu_V1, rm);
                     }
                 } else if (q) {
-                    tmp = tcg_temp_new(TCG_TYPE_I64);
+                    tmp64 = tcg_temp_new_i64();
                     if (imm < 8) {
                         neon_load_reg64(cpu_V0, rn);
-                        neon_load_reg64(tmp, rn + 1);
+                        neon_load_reg64(tmp64, rn + 1);
                     } else {
                         neon_load_reg64(cpu_V0, rn + 1);
-                        neon_load_reg64(tmp, rm);
+                        neon_load_reg64(tmp64, rm);
                     }
                     tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
-                    tcg_gen_shli_i64(cpu_V1, tmp, 64 - ((imm & 7) * 8));
+                    tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
                     if (imm < 8) {
                         neon_load_reg64(cpu_V1, rm);
@@ -5159,13 +5160,14 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         imm -= 8;
                     }
                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
-                    tcg_gen_shri_i64(tmp, tmp, imm * 8);
-                    tcg_gen_or_i64(cpu_V1, cpu_V1, tmp);
+                    tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
+                    tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
                 } else {
+                    /* BUGFIX */
                     neon_load_reg64(cpu_V0, rn);
-                    tcg_gen_shri_i32(cpu_V0, cpu_V0, imm * 8);
+                    tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
                     neon_load_reg64(cpu_V1, rm);
-                    tcg_gen_shli_i32(cpu_V1, cpu_V1, 64 - (imm * 8));
+                    tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
                 }
                 neon_store_reg64(cpu_V0, rd);
@@ -5184,7 +5186,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         NEON_GET_REG(T0, rm, pass * 2);
                         NEON_GET_REG(T1, rm, pass * 2 + 1);
                         switch (size) {
-                        case 0: tcg_gen_bswap_i32(cpu_T[0], cpu_T[0]); break;
+                        case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;
                         case 1: gen_swap_half(cpu_T[0]); break;
                         case 2: /* no-op */ break;
                         default: abort();
@@ -5195,7 +5197,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         } else {
                             gen_op_movl_T0_T1();
                             switch (size) {
-                            case 0: tcg_gen_bswap_i32(cpu_T[0], cpu_T[0]); break;
+                            case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;
                             case 1: gen_swap_half(cpu_T[0]); break;
                             default: abort();
                             }
@@ -5294,6 +5296,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */
                     if (size == 3)
                         return 1;
+                    TCGV_UNUSED(tmp2);
                     for (pass = 0; pass < 2; pass++) {
                         neon_load_reg64(cpu_V0, rm + pass);
                         tmp = new_tmp();
@@ -5336,7 +5339,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                         switch (op) {
                         case 1: /* VREV32 */
                             switch (size) {
-                            case 0: tcg_gen_bswap_i32(cpu_T[0], cpu_T[0]); break;
+                            case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;
                             case 1: gen_swap_half(cpu_T[0]); break;
                             default: return 1;
                             }
@@ -5508,7 +5511,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 }
             } else if ((insn & (1 << 10)) == 0) {
                 /* VTBL, VTBX.  */
-                n = (insn >> 5) & 0x18;
+                n = ((insn >> 5) & 0x18) + 8;
                 if (insn & (1 << 6)) {
                     tmp = neon_load_reg(rd, 0);
                 } else {
@@ -5518,6 +5521,7 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 tmp2 = neon_load_reg(rm, 0);
                 gen_helper_neon_tbl(tmp2, tmp2, tmp, tcg_const_i32(rn),
                                     tcg_const_i32(n));
+                dead_tmp(tmp);
                 if (insn & (1 << 6)) {
                     tmp = neon_load_reg(rd, 1);
                 } else {
@@ -5528,7 +5532,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
                 gen_helper_neon_tbl(tmp3, tmp3, tmp, tcg_const_i32(rn),
                                     tcg_const_i32(n));
                 neon_store_reg(rd, 0, tmp2);
-                neon_store_reg(rd, 1, tmp2);
+                neon_store_reg(rd, 1, tmp3);
+                dead_tmp(tmp);
             } else if ((insn & 0x380) == 0) {
                 /* VDUP */
                 if (insn & (1 << 19)) {
@@ -5555,6 +5560,71 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
     return 0;
 }
 
+static int disas_cp14_read(CPUState * env, DisasContext *s, uint32_t insn)
+{
+    int crn = (insn >> 16) & 0xf;
+    int crm = insn & 0xf;
+    int op1 = (insn >> 21) & 7;
+    int op2 = (insn >> 5) & 7;
+    int rt = (insn >> 12) & 0xf;
+    TCGv tmp;
+
+    if (arm_feature(env, ARM_FEATURE_THUMB2EE)) {
+        if (op1 == 6 && crn == 0 && crm == 0 && op2 == 0) {
+            /* TEECR */
+            if (IS_USER(s))
+                return 1;
+            tmp = load_cpu_field(teecr);
+            store_reg(s, rt, tmp);
+            return 0;
+        }
+        if (op1 == 6 && crn == 1 && crm == 0 && op2 == 0) {
+            /* TEEHBR */
+            if (IS_USER(s) && (env->teecr & 1))
+                return 1;
+            tmp = load_cpu_field(teehbr);
+            store_reg(s, rt, tmp);
+            return 0;
+        }
+    }
+    fprintf(stderr, "Unknown cp14 read op1:%d crn:%d crm:%d op2:%d\n",
+            op1, crn, crm, op2);
+    return 1;
+}
+
+static int disas_cp14_write(CPUState * env, DisasContext *s, uint32_t insn)
+{
+    int crn = (insn >> 16) & 0xf;
+    int crm = insn & 0xf;
+    int op1 = (insn >> 21) & 7;
+    int op2 = (insn >> 5) & 7;
+    int rt = (insn >> 12) & 0xf;
+    TCGv tmp;
+
+    if (arm_feature(env, ARM_FEATURE_THUMB2EE)) {
+        if (op1 == 6 && crn == 0 && crm == 0 && op2 == 0) {
+            /* TEECR */
+            if (IS_USER(s))
+                return 1;
+            tmp = load_reg(s, rt);
+            gen_helper_set_teecr(cpu_env, tmp);
+            dead_tmp(tmp);
+            return 0;
+        }
+        if (op1 == 6 && crn == 1 && crm == 0 && op2 == 0) {
+            /* TEEHBR */
+            if (IS_USER(s) && (env->teecr & 1))
+                return 1;
+            tmp = load_reg(s, rt);
+            store_cpu_field(tmp, teehbr);
+            return 0;
+        }
+    }
+    fprintf(stderr, "Unknown cp14 write op1:%d crn:%d crm:%d op2:%d\n",
+            op1, crn, crm, op2);
+    return 1;
+}
+
 static int disas_coproc_insn(CPUState * env, DisasContext *s, uint32_t insn)
 {
     int cpnum;
@@ -5576,9 +5646,19 @@ static int disas_coproc_insn(CPUState * env, DisasContext *s, uint32_t insn)
     case 10:
     case 11:
        return disas_vfp_insn (env, s, insn);
+    case 14:
+        /* Coprocessors 7-15 are architecturally reserved by ARM.
+           Unfortunately Intel decided to ignore this.  */
+        if (arm_feature(env, ARM_FEATURE_XSCALE))
+            goto board;
+        if (insn & (1 << 20))
+            return disas_cp14_read(env, s, insn);
+        else
+            return disas_cp14_write(env, s, insn);
     case 15:
        return disas_cp15_insn (env, s, insn);
     default:
+    board:
        /* Unknown coprocessor.  See if the board has hooked it.  */
        return disas_cp_insn (env, s, insn);
     }
@@ -5586,7 +5666,7 @@ static int disas_coproc_insn(CPUState * env, DisasContext *s, uint32_t insn)
 
 
 /* Store a 64-bit value to a register pair.  Clobbers val.  */
-static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv val)
+static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
 {
     TCGv tmp;
     tmp = new_tmp();
@@ -5599,13 +5679,13 @@ static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv val)
 }
 
 /* load a 32-bit value from a register and perform a 64-bit accumulate.  */
-static void gen_addq_lo(DisasContext *s, TCGv val, int rlow)
+static void gen_addq_lo(DisasContext *s, TCGv_i64 val, int rlow)
 {
-    TCGv tmp;
+    TCGv_i64 tmp;
     TCGv tmp2;
 
-    /* Load 64-bit value rd:rn.  */
-    tmp = tcg_temp_new(TCG_TYPE_I64);
+    /* Load value and extend to 64 bits.  */
+    tmp = tcg_temp_new_i64();
     tmp2 = load_reg(s, rlow);
     tcg_gen_extu_i32_i64(tmp, tmp2);
     dead_tmp(tmp2);
@@ -5613,27 +5693,24 @@ static void gen_addq_lo(DisasContext *s, TCGv val, int rlow)
 }
 
 /* load and add a 64-bit value from a register pair.  */
-static void gen_addq(DisasContext *s, TCGv val, int rlow, int rhigh)
+static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
 {
-    TCGv tmp;
-    TCGv tmp2;
+    TCGv_i64 tmp;
+    TCGv tmpl;
+    TCGv tmph;
 
     /* Load 64-bit value rd:rn.  */
-    tmp = tcg_temp_new(TCG_TYPE_I64);
-    tmp2 = load_reg(s, rhigh);
-    tcg_gen_extu_i32_i64(tmp, tmp2);
-    dead_tmp(tmp2);
-    tcg_gen_shli_i64(tmp, tmp, 32);
-    tcg_gen_add_i64(val, val, tmp);
-
-    tmp2 = load_reg(s, rlow);
-    tcg_gen_extu_i32_i64(tmp, tmp2);
-    dead_tmp(tmp2);
+    tmpl = load_reg(s, rlow);
+    tmph = load_reg(s, rhigh);
+    tmp = tcg_temp_new_i64();
+    tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
+    dead_tmp(tmpl);
+    dead_tmp(tmph);
     tcg_gen_add_i64(val, val, tmp);
 }
 
 /* Set N and Z flags from a 64-bit value.  */
-static void gen_logicq_cc(TCGv val)
+static void gen_logicq_cc(TCGv_i64 val)
 {
     TCGv tmp = new_tmp();
     gen_helper_logicq_cc(tmp, val);
@@ -5648,6 +5725,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
     TCGv tmp2;
     TCGv tmp3;
     TCGv addr;
+    TCGv_i64 tmp64;
 
     insn = ldl_code(s->pc);
     s->pc += 4;
@@ -5814,7 +5892,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
             /* Coprocessor double register transfer.  */
         } else if ((insn & 0x0f000010) == 0x0e000010) {
             /* Additional coprocessor register transfer.  */
-        } else if ((insn & 0x0ff10010) == 0x01000000) {
+        } else if ((insn & 0x0ff10020) == 0x01000000) {
             uint32_t mask;
             uint32_t val;
             /* cps (privileged) */
@@ -5831,7 +5909,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                 if (insn & (1 << 18))
                     val |= mask;
             }
-            if (insn & (1 << 14)) {
+            if (insn & (1 << 17)) {
                 mask |= CPSR_M;
                 val |= (insn & 0x1f);
             }
@@ -5950,7 +6028,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
         case 0x5: /* saturating add/subtract */
             rd = (insn >> 12) & 0xf;
             rn = (insn >> 16) & 0xf;
-            tmp = load_reg(s, rn);
+            tmp = load_reg(s, rm);
             tmp2 = load_reg(s, rn);
             if (op1 & 2)
                 gen_helper_double_saturate(tmp2, tmp2);
@@ -5982,10 +6060,10 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                     tcg_gen_sari_i32(tmp2, tmp2, 16);
                 else
                     gen_sxth(tmp2);
-                tmp2 = gen_muls_i64_i32(tmp, tmp2);
-                tcg_gen_shri_i64(tmp2, tmp2, 16);
+                tmp64 = gen_muls_i64_i32(tmp, tmp2);
+                tcg_gen_shri_i64(tmp64, tmp64, 16);
                 tmp = new_tmp();
-                tcg_gen_trunc_i64_i32(tmp, tmp2);
+                tcg_gen_trunc_i64_i32(tmp, tmp64);
                 if ((sh & 2) == 0) {
                     tmp2 = load_reg(s, rn);
                     gen_helper_add_setq(tmp, tmp, tmp2);
@@ -5999,10 +6077,11 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                 gen_mulxy(tmp, tmp2, sh & 2, sh & 4);
                 dead_tmp(tmp2);
                 if (op1 == 2) {
-                    tmp = tcg_temp_new(TCG_TYPE_I64);
-                    tcg_gen_ext_i32_i64(tmp, cpu_T[0]);
-                    gen_addq(s, tmp, rn, rd);
-                    gen_storeq_reg(s, rn, rd, tmp);
+                    tmp64 = tcg_temp_new_i64();
+                    tcg_gen_ext_i32_i64(tmp64, tmp);
+                    dead_tmp(tmp);
+                    gen_addq(s, tmp64, rn, rd);
+                    gen_storeq_reg(s, rn, rd, tmp64);
                 } else {
                     if (op1 == 0) {
                         tmp2 = load_reg(s, rn);
@@ -6030,148 +6109,173 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
             /* immediate operand */
             val = insn & 0xff;
             shift = ((insn >> 8) & 0xf) * 2;
-            if (shift)
+            if (shift) {
                 val = (val >> shift) | (val << (32 - shift));
-            gen_op_movl_T1_im(val);
-            if (logic_cc && shift)
-                gen_set_CF_bit31(cpu_T[1]);
+            }
+            tmp2 = new_tmp();
+            tcg_gen_movi_i32(tmp2, val);
+            if (logic_cc && shift) {
+                gen_set_CF_bit31(tmp2);
+            }
         } else {
             /* register */
             rm = (insn) & 0xf;
-            gen_movl_T1_reg(s, rm);
+            tmp2 = load_reg(s, rm);
             shiftop = (insn >> 5) & 3;
             if (!(insn & (1 << 4))) {
                 shift = (insn >> 7) & 0x1f;
-                gen_arm_shift_im(cpu_T[1], shiftop, shift, logic_cc);
+                gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
             } else {
                 rs = (insn >> 8) & 0xf;
                 tmp = load_reg(s, rs);
-                gen_arm_shift_reg(cpu_T[1], shiftop, tmp, logic_cc);
+                gen_arm_shift_reg(tmp2, shiftop, tmp, logic_cc);
             }
         }
         if (op1 != 0x0f && op1 != 0x0d) {
             rn = (insn >> 16) & 0xf;
-            gen_movl_T0_reg(s, rn);
+            tmp = load_reg(s, rn);
+        } else {
+            TCGV_UNUSED(tmp);
         }
         rd = (insn >> 12) & 0xf;
         switch(op1) {
         case 0x00:
-            gen_op_andl_T0_T1();
-            gen_movl_reg_T0(s, rd);
-            if (logic_cc)
-                gen_op_logic_T0_cc();
+            tcg_gen_and_i32(tmp, tmp, tmp2);
+            if (logic_cc) {
+                gen_logic_CC(tmp);
+            }
+            store_reg_bx(env, s, rd, tmp);
             break;
         case 0x01:
-            gen_op_xorl_T0_T1();
-            gen_movl_reg_T0(s, rd);
-            if (logic_cc)
-                gen_op_logic_T0_cc();
+            tcg_gen_xor_i32(tmp, tmp, tmp2);
+            if (logic_cc) {
+                gen_logic_CC(tmp);
+            }
+            store_reg_bx(env, s, rd, tmp);
             break;
         case 0x02:
             if (set_cc && rd == 15) {
                 /* SUBS r15, ... is used for exception return.  */
-                if (IS_USER(s))
+                if (IS_USER(s)) {
                     goto illegal_op;
-                gen_op_subl_T0_T1_cc();
-                gen_exception_return(s);
+                }
+                gen_helper_sub_cc(tmp, tmp, tmp2);
+                gen_exception_return(s, tmp);
             } else {
-                if (set_cc)
-                    gen_op_subl_T0_T1_cc();
-                else
-                    gen_op_subl_T0_T1();
-                gen_movl_reg_T0(s, rd);
+                if (set_cc) {
+                    gen_helper_sub_cc(tmp, tmp, tmp2);
+                } else {
+                    tcg_gen_sub_i32(tmp, tmp, tmp2);
+                }
+                store_reg_bx(env, s, rd, tmp);
             }
             break;
         case 0x03:
-            if (set_cc)
-                gen_op_rsbl_T0_T1_cc();
-            else
-                gen_op_rsbl_T0_T1();
-            gen_movl_reg_T0(s, rd);
+            if (set_cc) {
+                gen_helper_sub_cc(tmp, tmp2, tmp);
+            } else {
+                tcg_gen_sub_i32(tmp, tmp2, tmp);
+            }
+            store_reg_bx(env, s, rd, tmp);
             break;
         case 0x04:
-            if (set_cc)
-                gen_op_addl_T0_T1_cc();
-            else
-                gen_op_addl_T0_T1();
-            gen_movl_reg_T0(s, rd);
+            if (set_cc) {
+                gen_helper_add_cc(tmp, tmp, tmp2);
+            } else {
+                tcg_gen_add_i32(tmp, tmp, tmp2);
+            }
+            store_reg_bx(env, s, rd, tmp);
             break;
         case 0x05:
-            if (set_cc)
-                gen_op_adcl_T0_T1_cc();
-            else
-                gen_adc_T0_T1();
-            gen_movl_reg_T0(s, rd);
+            if (set_cc) {
+                gen_helper_adc_cc(tmp, tmp, tmp2);
+            } else {
+                gen_add_carry(tmp, tmp, tmp2);
+            }
+            store_reg_bx(env, s, rd, tmp);
             break;
         case 0x06:
-            if (set_cc)
-                gen_op_sbcl_T0_T1_cc();
-            else
-                gen_sbc_T0_T1();
-            gen_movl_reg_T0(s, rd);
+            if (set_cc) {
+                gen_helper_sbc_cc(tmp, tmp, tmp2);
+            } else {
+                gen_sub_carry(tmp, tmp, tmp2);
+            }
+            store_reg_bx(env, s, rd, tmp);
             break;
         case 0x07:
-            if (set_cc)
-                gen_op_rscl_T0_T1_cc();
-            else
-                gen_rsc_T0_T1();
-            gen_movl_reg_T0(s, rd);
+            if (set_cc) {
+                gen_helper_sbc_cc(tmp, tmp2, tmp);
+            } else {
+                gen_sub_carry(tmp, tmp2, tmp);
+            }
+            store_reg_bx(env, s, rd, tmp);
             break;
         case 0x08:
             if (set_cc) {
-                gen_op_andl_T0_T1();
-                gen_op_logic_T0_cc();
+                tcg_gen_and_i32(tmp, tmp, tmp2);
+                gen_logic_CC(tmp);
             }
+            dead_tmp(tmp);
             break;
         case 0x09:
             if (set_cc) {
-                gen_op_xorl_T0_T1();
-                gen_op_logic_T0_cc();
+                tcg_gen_xor_i32(tmp, tmp, tmp2);
+                gen_logic_CC(tmp);
             }
+            dead_tmp(tmp);
             break;
         case 0x0a:
             if (set_cc) {
-                gen_op_subl_T0_T1_cc();
+                gen_helper_sub_cc(tmp, tmp, tmp2);
             }
+            dead_tmp(tmp);
             break;
         case 0x0b:
             if (set_cc) {
-                gen_op_addl_T0_T1_cc();
+                gen_helper_add_cc(tmp, tmp, tmp2);
             }
+            dead_tmp(tmp);
             break;
         case 0x0c:
-            gen_op_orl_T0_T1();
-            gen_movl_reg_T0(s, rd);
-            if (logic_cc)
-                gen_op_logic_T0_cc();
+            tcg_gen_or_i32(tmp, tmp, tmp2);
+            if (logic_cc) {
+                gen_logic_CC(tmp);
+            }
+            store_reg_bx(env, s, rd, tmp);
             break;
         case 0x0d:
             if (logic_cc && rd == 15) {
                 /* MOVS r15, ... is used for exception return.  */
-                if (IS_USER(s))
+                if (IS_USER(s)) {
                     goto illegal_op;
-                gen_op_movl_T0_T1();
-                gen_exception_return(s);
+                }
+                gen_exception_return(s, tmp2);
             } else {
-                gen_movl_reg_T1(s, rd);
-                if (logic_cc)
-                    gen_op_logic_T1_cc();
+                if (logic_cc) {
+                    gen_logic_CC(tmp2);
+                }
+                store_reg_bx(env, s, rd, tmp2);
             }
             break;
         case 0x0e:
-            gen_op_bicl_T0_T1();
-            gen_movl_reg_T0(s, rd);
-            if (logic_cc)
-                gen_op_logic_T0_cc();
+            tcg_gen_bic_i32(tmp, tmp, tmp2);
+            if (logic_cc) {
+                gen_logic_CC(tmp);
+            }
+            store_reg_bx(env, s, rd, tmp);
             break;
         default:
         case 0x0f:
-            gen_op_notl_T1();
-            gen_movl_reg_T1(s, rd);
-            if (logic_cc)
-                gen_op_logic_T1_cc();
+            tcg_gen_not_i32(tmp2, tmp2);
+            if (logic_cc) {
+                gen_logic_CC(tmp2);
+            }
+            store_reg_bx(env, s, rd, tmp2);
             break;
         }
+        if (op1 != 0x0f && op1 != 0x0d) {
+            dead_tmp(tmp2);
+        }
     } else {
         /* other instructions */
         op1 = (insn >> 24) & 0xf;
@@ -6215,19 +6319,19 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                         tmp = load_reg(s, rs);
                         tmp2 = load_reg(s, rm);
                         if (insn & (1 << 22))
-                            tmp = gen_muls_i64_i32(tmp, tmp2);
+                            tmp64 = gen_muls_i64_i32(tmp, tmp2);
                         else
-                            tmp = gen_mulu_i64_i32(tmp, tmp2);
+                            tmp64 = gen_mulu_i64_i32(tmp, tmp2);
                         if (insn & (1 << 21)) /* mult accumulate */
-                            gen_addq(s, tmp, rn, rd);
+                            gen_addq(s, tmp64, rn, rd);
                         if (!(insn & (1 << 23))) { /* double accumulate */
                             ARCH(6);
-                            gen_addq_lo(s, tmp, rn);
-                            gen_addq_lo(s, tmp, rd);
+                            gen_addq_lo(s, tmp64, rn);
+                            gen_addq_lo(s, tmp64, rd);
                         }
                         if (insn & (1 << 20))
-                            gen_logicq_cc(tmp);
-                        gen_storeq_reg(s, rn, rd, tmp);
+                            gen_logicq_cc(tmp64);
+                        gen_storeq_reg(s, rn, rd, tmp64);
                         break;
                     }
                 } else {
@@ -6235,20 +6339,62 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                     rd = (insn >> 12) & 0xf;
                     if (insn & (1 << 23)) {
                         /* load/store exclusive */
+                        op1 = (insn >> 21) & 0x3;
+                        if (op1)
+                            ARCH(6K);
+                        else
+                            ARCH(6);
                         gen_movl_T1_reg(s, rn);
                         addr = cpu_T[1];
                         if (insn & (1 << 20)) {
                             gen_helper_mark_exclusive(cpu_env, cpu_T[1]);
-                            tmp = gen_ld32(addr, IS_USER(s));
+                            switch (op1) {
+                            case 0: /* ldrex */
+                                tmp = gen_ld32(addr, IS_USER(s));
+                                break;
+                            case 1: /* ldrexd */
+                                tmp = gen_ld32(addr, IS_USER(s));
+                                store_reg(s, rd, tmp);
+                                tcg_gen_addi_i32(addr, addr, 4);
+                                tmp = gen_ld32(addr, IS_USER(s));
+                                rd++;
+                                break;
+                            case 2: /* ldrexb */
+                                tmp = gen_ld8u(addr, IS_USER(s));
+                                break;
+                            case 3: /* ldrexh */
+                                tmp = gen_ld16u(addr, IS_USER(s));
+                                break;
+                            default:
+                                abort();
+                            }
                             store_reg(s, rd, tmp);
                         } else {
                             int label = gen_new_label();
                             rm = insn & 0xf;
                             gen_helper_test_exclusive(cpu_T[0], cpu_env, addr);
-                            tcg_gen_brcond_i32(TCG_COND_NE, cpu_T[0],
-                                               tcg_const_i32(0), label);
+                            tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0],
+                                                0, label);
                             tmp = load_reg(s,rm);
-                            gen_st32(tmp, cpu_T[1], IS_USER(s));
+                            switch (op1) {
+                            case 0:  /*  strex */
+                                gen_st32(tmp, addr, IS_USER(s));
+                                break;
+                            case 1: /*  strexd */
+                                gen_st32(tmp, addr, IS_USER(s));
+                                tcg_gen_addi_i32(addr, addr, 4);
+                                tmp = load_reg(s, rm + 1);
+                                gen_st32(tmp, addr, IS_USER(s));
+                                break;
+                            case 2: /*  strexb */
+                                gen_st8(tmp, addr, IS_USER(s));
+                                break;
+                            case 3: /* strexh */
+                                gen_st16(tmp, addr, IS_USER(s));
+                                break;
+                            default:
+                                abort();
+                            }
                             gen_set_label(label);
                             gen_movl_reg_T0(s, rd);
                         }
@@ -6373,18 +6519,22 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                         tmp = load_reg(s, rn);
                         tmp2 = load_reg(s, rm);
                         shift = (insn >> 7) & 0x1f;
-                        if (shift)
-                            tcg_gen_shli_i32(tmp2, tmp2, shift);
                         if (insn & (1 << 6)) {
                             /* pkhtb */
+                            if (shift == 0)
+                                shift = 31;
+                            tcg_gen_sari_i32(tmp2, tmp2, shift);
                             tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
                             tcg_gen_ext16u_i32(tmp2, tmp2);
                         } else {
                             /* pkhbt */
+                            if (shift)
+                                tcg_gen_shli_i32(tmp2, tmp2, shift);
                             tcg_gen_ext16u_i32(tmp, tmp);
                             tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
                         }
                         tcg_gen_or_i32(tmp, tmp, tmp2);
+                        dead_tmp(tmp2);
                         store_reg(s, rd, tmp);
                     } else if ((insn & 0x00200020) == 0x00200000) {
                         /* [us]sat */
@@ -6467,7 +6617,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                             if (insn & (1 << 7))
                                 gen_rev16(tmp);
                             else
-                                tcg_gen_bswap_i32(tmp, tmp);
+                                tcg_gen_bswap32_i32(tmp, tmp);
                         }
                         store_reg(s, rd, tmp);
                     } else {
@@ -6479,14 +6629,14 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                     tmp2 = load_reg(s, rs);
                     if (insn & (1 << 20)) {
                         /* Signed multiply most significant [accumulate].  */
-                        tmp2 = gen_muls_i64_i32(tmp, tmp2);
+                        tmp64 = gen_muls_i64_i32(tmp, tmp2);
                         if (insn & (1 << 5))
-                            tcg_gen_addi_i64(tmp2, tmp2, 0x80000000u);
-                        tcg_gen_shri_i64(tmp2, tmp2, 32);
+                            tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
+                        tcg_gen_shri_i64(tmp64, tmp64, 32);
                         tmp = new_tmp();
-                        tcg_gen_trunc_i64_i32(tmp, tmp2);
-                        if (rn != 15) {
-                            tmp2 = load_reg(s, rn);
+                        tcg_gen_trunc_i64_i32(tmp, tmp64);
+                        if (rd != 15) {
+                            tmp2 = load_reg(s, rd);
                             if (insn & (1 << 6)) {
                                 tcg_gen_sub_i32(tmp, tmp, tmp2);
                             } else {
@@ -6494,7 +6644,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                             }
                             dead_tmp(tmp2);
                         }
-                        store_reg(s, rd, tmp);
+                        store_reg(s, rn, tmp);
                     } else {
                         if (insn & (1 << 5))
                             gen_swap_half(tmp2);
@@ -6508,20 +6658,20 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                         dead_tmp(tmp2);
                         if (insn & (1 << 22)) {
                             /* smlald, smlsld */
-                            tmp2 = tcg_temp_new(TCG_TYPE_I64);
-                            tcg_gen_ext_i32_i64(tmp2, tmp);
+                            tmp64 = tcg_temp_new_i64();
+                            tcg_gen_ext_i32_i64(tmp64, tmp);
                             dead_tmp(tmp);
-                            gen_addq(s, tmp2, rn, rd);
-                            gen_storeq_reg(s, rn, rd, tmp2);
+                            gen_addq(s, tmp64, rd, rn);
+                            gen_storeq_reg(s, rd, rn, tmp64);
                         } else {
                             /* smuad, smusd, smlad, smlsd */
-                            if (rn != 15)
+                            if (rd != 15)
                               {
-                                tmp2 = load_reg(s, rn);
+                                tmp2 = load_reg(s, rd);
                                 gen_helper_add_setq(tmp, tmp, tmp2);
                                 dead_tmp(tmp2);
                               }
-                            store_reg(s, rd, tmp);
+                            store_reg(s, rn, tmp);
                         }
                     }
                     break;
@@ -6534,12 +6684,12 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                         tmp2 = load_reg(s, rs);
                         gen_helper_usad8(tmp, tmp, tmp2);
                         dead_tmp(tmp2);
-                        if (rn != 15) {
-                            tmp2 = load_reg(s, rn);
+                        if (rd != 15) {
+                            tmp2 = load_reg(s, rd);
                             tcg_gen_add_i32(tmp, tmp, tmp2);
                             dead_tmp(tmp2);
                         }
-                        store_reg(s, rd, tmp);
+                        store_reg(s, rn, tmp);
                         break;
                     case 0x20: case 0x24: case 0x28: case 0x2c:
                         /* Bitfield insert/clear.  */
@@ -6562,6 +6712,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                         break;
                     case 0x12: case 0x16: case 0x1a: case 0x1e: /* sbfx */
                     case 0x32: case 0x36: case 0x3a: case 0x3e: /* ubfx */
+                        ARCH(6T2);
                         tmp = load_reg(s, rm);
                         shift = (insn >> 7) & 0x1f;
                         i = ((insn >> 16) & 0x1f) + 1;
@@ -6602,7 +6753,6 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
                 gen_add_data_offset(s, insn, tmp2);
             if (insn & (1 << 20)) {
                 /* load */
-                s->is_mem = 1;
                 if (insn & (1 << 22)) {
                     tmp = gen_ld8u(tmp2, i);
                 } else {
@@ -6652,6 +6802,7 @@ static void disas_arm_insn(CPUState * env, DisasContext *s)
 
                 /* compute total size */
                 loaded_base = 0;
+                TCGV_UNUSED(loaded_var);
                 n = 0;
                 for(i=0;i<16;i++) {
                     if (insn & (1 << i))
@@ -6880,6 +7031,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
     TCGv tmp2;
     TCGv tmp3;
     TCGv addr;
+    TCGv_i64 tmp64;
     int op;
     int shiftop;
     int conds;
@@ -6996,8 +7148,8 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                 } else {
                     int label = gen_new_label();
                     gen_helper_test_exclusive(cpu_T[0], cpu_env, addr);
-                    tcg_gen_brcond_i32(TCG_COND_NE, cpu_T[0],
-                                       tcg_const_i32(0), label);
+                    tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0],
+                                        0, label);
                     tmp = load_reg(s, rs);
                     gen_st32(tmp, cpu_T[1], IS_USER(s));
                     gen_set_label(label);
@@ -7059,8 +7211,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                     int label = gen_new_label();
                     /* Must use a global that is not killed by the branch.  */
                     gen_helper_test_exclusive(cpu_T[0], cpu_env, addr);
-                    tcg_gen_brcond_i32(TCG_COND_NE, cpu_T[0], tcg_const_i32(0),
-                                       label);
+                    tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0], 0, label);
                     tmp = load_reg(s, rs);
                     switch (op) {
                     case 0:
@@ -7221,7 +7372,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
             gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
             if (logic_cc)
                 gen_logic_CC(tmp);
-            store_reg(s, rd, tmp);
+            store_reg_bx(env, s, rd, tmp);
             break;
         case 1: /* Sign/zero extend.  */
             tmp = load_reg(s, rm);
@@ -7282,7 +7433,7 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                     gen_helper_rbit(tmp, tmp);
                     break;
                 case 0x08: /* rev */
-                    tcg_gen_bswap_i32(tmp, tmp);
+                    tcg_gen_bswap32_i32(tmp, tmp);
                     break;
                 case 0x09: /* rev16 */
                     gen_rev16(tmp);
@@ -7357,10 +7508,10 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                     tcg_gen_sari_i32(tmp2, tmp2, 16);
                 else
                     gen_sxth(tmp2);
-                tmp2 = gen_muls_i64_i32(tmp, tmp2);
-                tcg_gen_shri_i64(tmp2, tmp2, 16);
+                tmp64 = gen_muls_i64_i32(tmp, tmp2);
+                tcg_gen_shri_i64(tmp64, tmp64, 16);
                 tmp = new_tmp();
-                tcg_gen_trunc_i64_i32(tmp, tmp2);
+                tcg_gen_trunc_i64_i32(tmp, tmp64);
                 if (rs != 15)
                   {
                     tmp2 = load_reg(s, rs);
@@ -7424,36 +7575,38 @@ static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
                     tcg_gen_add_i32(tmp, tmp, tmp2);
                 }
                 dead_tmp(tmp2);
-                tmp2 = tcg_temp_new(TCG_TYPE_I64);
-                gen_addq(s, tmp, rs, rd);
-                gen_storeq_reg(s, rs, rd, tmp);
+                /* BUGFIX */
+                tmp64 = tcg_temp_new_i64();
+                tcg_gen_ext_i32_i64(tmp64, tmp);
+                dead_tmp(tmp);
+                gen_addq(s, tmp64, rs, rd);
+                gen_storeq_reg(s, rs, rd, tmp64);
             } else {
                 if (op & 0x20) {
                     /* Unsigned 64-bit multiply  */
-                    tmp = gen_mulu_i64_i32(tmp, tmp2);
+                    tmp64 = gen_mulu_i64_i32(tmp, tmp2);
                 } else {
                     if (op & 8) {
                         /* smlalxy */
                         gen_mulxy(tmp, tmp2, op & 2, op & 1);
                         dead_tmp(tmp2);
-                        tmp2 = tcg_temp_new(TCG_TYPE_I64);
-                        tcg_gen_ext_i32_i64(tmp2, tmp);
+                        tmp64 = tcg_temp_new_i64();
+                        tcg_gen_ext_i32_i64(tmp64, tmp);
                         dead_tmp(tmp);
-                        tmp = tmp2;
                     } else {
                         /* Signed 64-bit multiply  */
-                        tmp = gen_muls_i64_i32(tmp, tmp2);
+                        tmp64 = gen_muls_i64_i32(tmp, tmp2);
                     }
                 }
                 if (op & 4) {
                     /* umaal */
-                    gen_addq_lo(s, tmp, rs);
-                    gen_addq_lo(s, tmp, rd);
+                    gen_addq_lo(s, tmp64, rs);
+                    gen_addq_lo(s, tmp64, rd);
                 } else if (op & 0x40) {
                     /* 64-bit accumulate.  */
-                    gen_addq(s, tmp, rs, rd);
+                    gen_addq(s, tmp64, rs, rd);
                 }
-                gen_storeq_reg(s, rs, rd, tmp);
+                gen_storeq_reg(s, rs, rd, tmp64);
             }
             break;
         }
@@ -8111,7 +8264,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
             break;
         case 0x9: /* neg */
             if (s->condexec_mask)
-                gen_op_subl_T0_T1();
+                tcg_gen_neg_i32(cpu_T[0], cpu_T[1]);
             else
                 gen_op_subl_T0_T1_cc();
             break;
@@ -8350,6 +8503,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
                     tcg_gen_addi_i32(addr, addr, 4);
                 }
             }
+            TCGV_UNUSED(tmp);
             if (insn & (1 << 8)) {
                 if (insn & (1 << 11)) {
                     /* pop pc */
@@ -8376,13 +8530,12 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
         case 1: case 3: case 9: case 11: /* czb */
             rm = insn & 7;
             tmp = load_reg(s, rm);
-            tmp2 = tcg_const_i32(0);
             s->condlabel = gen_new_label();
             s->condjmp = 1;
             if (insn & (1 << 11))
-                tcg_gen_brcond_i32(TCG_COND_EQ, tmp, tmp2, s->condlabel);
+                tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, s->condlabel);
             else
-                tcg_gen_brcond_i32(TCG_COND_NE, tmp, tmp2, s->condlabel);
+                tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, s->condlabel);
             dead_tmp(tmp);
             offset = ((insn & 0xf8) >> 2) | (insn & 0x200) >> 3;
             val = (uint32_t)s->pc + 2;
@@ -8414,7 +8567,7 @@ static void disas_thumb_insn(CPUState *env, DisasContext *s)
             rd = insn & 0x7;
             tmp = load_reg(s, rn);
             switch ((insn >> 6) & 3) {
-            case 0: tcg_gen_bswap_i32(tmp, tmp); break;
+            case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
             case 1: gen_rev16(tmp); break;
             case 3: gen_revsh(tmp); break;
             default: goto illegal_op;
@@ -8545,15 +8698,18 @@ undef:
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
    basic block 'tb'. If search_pc is TRUE, also generate PC
    information for each intermediate instruction. */
-static inline int gen_intermediate_code_internal(CPUState *env,
-                                                 TranslationBlock *tb,
-                                                 int search_pc)
+static inline void gen_intermediate_code_internal(CPUState *env,
+                                                  TranslationBlock *tb,
+                                                  int search_pc)
 {
     DisasContext dc1, *dc = &dc1;
+    CPUBreakpoint *bp;
     uint16_t *gen_opc_end;
     int j, lj;
     target_ulong pc_start;
     uint32_t next_page_start;
+    int num_insns;
+    int max_insns;
 
     /* generate intermediate code */
     num_temps = 0;
@@ -8572,7 +8728,6 @@ static inline int gen_intermediate_code_internal(CPUState *env,
     dc->thumb = env->thumb;
     dc->condexec_mask = (env->condexec_bits & 0xf) << 1;
     dc->condexec_cond = env->condexec_bits >> 4;
-    dc->is_mem = 0;
 #if !defined(CONFIG_USER_ONLY)
     if (IS_M(env)) {
         dc->user = ((env->v7m.exception == 0) && (env->v7m.control & 1));
@@ -8580,16 +8735,22 @@ static inline int gen_intermediate_code_internal(CPUState *env,
         dc->user = (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_USR;
     }
 #endif
-    cpu_F0s = tcg_temp_new(TCG_TYPE_I32);
-    cpu_F1s = tcg_temp_new(TCG_TYPE_I32);
-    cpu_F0d = tcg_temp_new(TCG_TYPE_I64);
-    cpu_F1d = tcg_temp_new(TCG_TYPE_I64);
+    cpu_F0s = tcg_temp_new_i32();
+    cpu_F1s = tcg_temp_new_i32();
+    cpu_F0d = tcg_temp_new_i64();
+    cpu_F1d = tcg_temp_new_i64();
     cpu_V0 = cpu_F0d;
     cpu_V1 = cpu_F1d;
     /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
-    cpu_M0 = tcg_temp_new(TCG_TYPE_I64);
+    cpu_M0 = tcg_temp_new_i64();
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
     lj = -1;
+    num_insns = 0;
+    max_insns = tb->cflags & CF_COUNT_MASK;
+    if (max_insns == 0)
+        max_insns = CF_COUNT_MASK;
+
+    gen_icount_start();
     /* Reset the conditional execution bits immediately. This avoids
        complications trying to do it at the end of the block.  */
     if (env->condexec_bits)
@@ -8599,17 +8760,28 @@ static inline int gen_intermediate_code_internal(CPUState *env,
         store_cpu_field(tmp, condexec_bits);
       }
     do {
-#ifndef CONFIG_USER_ONLY
+#ifdef CONFIG_USER_ONLY
+        /* Intercept jump to the magic kernel page.  */
+        if (dc->pc >= 0xffff0000) {
+            /* We always get here via a jump, so know we are not in a
+               conditional execution block.  */
+            gen_exception(EXCP_KERNEL_TRAP);
+            dc->is_jmp = DISAS_UPDATE;
+            break;
+        }
+#else
         if (dc->pc >= 0xfffffff0 && IS_M(env)) {
             /* We always get here via a jump, so know we are not in a
                conditional execution block.  */
             gen_exception(EXCP_EXCEPTION_EXIT);
+            dc->is_jmp = DISAS_UPDATE;
+            break;
         }
 #endif
 
-        if (env->nb_breakpoints > 0) {
-            for(j = 0; j < env->nb_breakpoints; j++) {
-                if (env->breakpoints[j] == dc->pc) {
+        if (unlikely(!TAILQ_EMPTY(&env->breakpoints))) {
+            TAILQ_FOREACH(bp, &env->breakpoints, entry) {
+                if (bp->pc == dc->pc) {
                     gen_set_condexec(dc);
                     gen_set_pc_im(dc->pc);
                     gen_exception(EXCP_DEBUG);
@@ -8631,8 +8803,12 @@ static inline int gen_intermediate_code_internal(CPUState *env,
             }
             gen_opc_pc[lj] = dc->pc;
             gen_opc_instr_start[lj] = 1;
+            gen_opc_icount[lj] = num_insns;
         }
 
+        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+            gen_io_start();
+
         if (env->thumb) {
             disas_thumb_insn(env, dc);
             if (dc->condexec_mask) {
@@ -8655,24 +8831,30 @@ static inline int gen_intermediate_code_internal(CPUState *env,
             gen_set_label(dc->condlabel);
             dc->condjmp = 0;
         }
-        /* Terminate the TB on memory ops if watchpoints are present.  */
-        /* FIXME: This should be replacd by the deterministic execution
-         * IRQ raising bits.  */
-        if (dc->is_mem && env->nb_watchpoints)
-            break;
-
-        /* Translation stops when a conditional branch is enoutered.
+        /* Translation stops when a conditional branch is encountered.
          * Otherwise the subsequent code could get translated several times.
          * Also stop translation when a page boundary is reached.  This
-         * ensures prefech aborts occur at the right place.  */
+         * ensures prefetch aborts occur at the right place.  */
+        num_insns ++;
     } while (!dc->is_jmp && gen_opc_ptr < gen_opc_end &&
              !env->singlestep_enabled &&
-             dc->pc < next_page_start);
+             !singlestep &&
+             dc->pc < next_page_start &&
+             num_insns < max_insns);
+
+    if (tb->cflags & CF_LAST_IO) {
+        if (dc->condjmp) {
+            /* FIXME:  This can theoretically happen with self-modifying
+               code.  */
+            cpu_abort(env, "IO on conditional branch instruction");
+        }
+        gen_io_end();
+    }
 
     /* At this stage dc->condjmp will only be set when the skipped
        instruction was a conditional branch or trap, and the PC has
        already been written.  */
-    if (__builtin_expect(env->singlestep_enabled, 0)) {
+    if (unlikely(env->singlestep_enabled)) {
         /* Make sure the pc is updated, and raise a debug exception.  */
         if (dc->condjmp) {
             gen_set_condexec(dc);
@@ -8732,15 +8914,17 @@ static inline int gen_intermediate_code_internal(CPUState *env,
             dc->condjmp = 0;
         }
     }
+
 done_generating:
+    gen_icount_end(tb, num_insns);
     *gen_opc_ptr = INDEX_op_end;
 
 #ifdef DEBUG_DISAS
-    if (loglevel & CPU_LOG_TB_IN_ASM) {
-        fprintf(logfile, "----------------\n");
-        fprintf(logfile, "IN: %s\n", lookup_symbol(pc_start));
-        target_disas(logfile, pc_start, dc->pc - pc_start, env->thumb);
-        fprintf(logfile, "\n");
+    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
+        qemu_log("----------------\n");
+        qemu_log("IN: %s\n", lookup_symbol(pc_start));
+        log_target_disas(pc_start, dc->pc - pc_start, env->thumb);
+        qemu_log("\n");
     }
 #endif
     if (search_pc) {
@@ -8750,18 +8934,18 @@ done_generating:
             gen_opc_instr_start[lj++] = 0;
     } else {
         tb->size = dc->pc - pc_start;
+        tb->icount = num_insns;
     }
-    return 0;
 }
 
-int gen_intermediate_code(CPUState *env, TranslationBlock *tb)
+void gen_intermediate_code(CPUState *env, TranslationBlock *tb)
 {
-    return gen_intermediate_code_internal(env, tb, 0);
+    gen_intermediate_code_internal(env, tb, 0);
 }
 
-int gen_intermediate_code_pc(CPUState *env, TranslationBlock *tb)
+void gen_intermediate_code_pc(CPUState *env, TranslationBlock *tb)
 {
-    return gen_intermediate_code_internal(env, tb, 1);
+    gen_intermediate_code_internal(env, tb, 1);
 }
 
 static const char *cpu_mode_names[16] = {
@@ -8774,6 +8958,7 @@ void cpu_dump_state(CPUState *env, FILE *f,
                     int flags)
 {
     int i;
+#if 0
     union {
         uint32_t i;
         float s;
@@ -8785,6 +8970,7 @@ void cpu_dump_state(CPUState *env, FILE *f,
         float64 f64;
         double d;
     } d0;
+#endif
     uint32_t psr;
 
     for(i=0;i<16;i++) {