[v2] fix ARMv7 data processing instructions

[qemu] / target-arm / op_helper.c
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c

index 38c97fe..f71162b 100644 (file)
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -1,7 +1,7 @@
  /*
   *  ARM helper routines
- * 
- *  Copyright (c) 2005 CodeSourcery, LLC
+ *
+ *  Copyright (c) 2005-2007 CodeSourcery, LLC
   *
   * This library is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
@@ -15,25 +15,13 @@
   *
   * You should have received a copy of the GNU Lesser General Public
   * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
   */
-
-#include <math.h>
-#include <fenv.h>
  #include "exec.h"
+#include "helpers.h"
  
-/* If the host doesn't define C99 math intrinsics then use the normal
-   operators.  This may generate excess exceptions, but it's probably
-   near enough for most things.  */
-#ifndef isless
-#define isless(x, y) (x < y)
-#endif
-#ifndef isgreater
-#define isgreater(x, y) (x > y)
-#endif
-#ifndef isunordered
-#define isunordered(x, y) (!((x < y) || (x >= y)))
-#endif
+#define SIGNBIT (uint32_t)0x80000000
+#define SIGNBIT64 ((uint64_t)1 << 63)
  
  void raise_exception(int tt)
  {
@@ -43,7 +31,7 @@ void raise_exception(int tt)
  
  /* thread support */
  
-spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
  
  void cpu_lock(void)
  {
@@ -55,175 +43,514 @@ void cpu_unlock(void)
      spin_unlock(&global_cpu_lock);
  }
  
-/* VFP support.  */
+uint32_t HELPER(neon_tbl)(uint32_t ireg, uint32_t def,
+                          uint32_t rn, uint32_t maxindex)
+{
+    uint32_t val;
+    uint32_t tmp;
+    int index;
+    int shift;
+    uint64_t *table;
+    table = (uint64_t *)&env->vfp.regs[rn];
+    val = 0;
+    for (shift = 0; shift < 32; shift += 8) {
+        index = (ireg >> shift) & 0xff;
+        if (index < maxindex) {
+            tmp = (table[index >> 3] >> ((index & 7) << 3)) & 0xff;
+            val |= tmp << shift;
+        } else {
+            val |= def & (0xff << shift);
+        }
+    }
+    return val;
+}
+
+#if !defined(CONFIG_USER_ONLY)
+
+#define MMUSUFFIX _mmu
+
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+/* try to fill the TLB and return an exception if error. If retaddr is
+   NULL, it means that the function was called in C code (i.e. not
+   from generated code or from helper.c) */
+/* XXX: fix it to restore all registers */
+void tlb_fill (target_ulong addr, int is_write, int mmu_idx, void *retaddr)
+{
+    TranslationBlock *tb;
+    CPUState *saved_env;
+    unsigned long pc;
+    int ret;
+
+    /* XXX: hack to restore env in all cases, even if not called from
+       generated code */
+    saved_env = env;
+    env = cpu_single_env;
+    ret = cpu_arm_handle_mmu_fault(env, addr, is_write, mmu_idx, 1);
+    if (unlikely(ret)) {
+        if (retaddr) {
+            /* now we have a real cpu fault */
+            pc = (unsigned long)retaddr;
+            tb = tb_find_pc(pc);
+            if (tb) {
+                /* the PC is inside the translated code. It means that we have
+                   a virtual CPU fault */
+                cpu_restore_state(tb, env, pc, NULL);
+            }
+        }
+        raise_exception(env->exception_index);
+    }
+    env = saved_env;
+}
+#endif
  
-void do_vfp_abss(void)
+/* FIXME: Pass an axplicit pointer to QF to CPUState, and move saturating
+   instructions into helper.c  */
+uint32_t HELPER(add_setq)(uint32_t a, uint32_t b)
  {
-  FT0s = fabsf(FT0s);
+    uint32_t res = a + b;
+    if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT))
+        env->QF = 1;
+    return res;
  }
  
-void do_vfp_absd(void)
+uint32_t HELPER(add_saturate)(uint32_t a, uint32_t b)
  {
-  FT0d = fabs(FT0d);
+    uint32_t res = a + b;
+    if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) {
+        env->QF = 1;
+        res = ~(((int32_t)a >> 31) ^ SIGNBIT);
+    }
+    return res;
  }
  
-void do_vfp_sqrts(void)
+uint32_t HELPER(sub_saturate)(uint32_t a, uint32_t b)
  {
-  FT0s = sqrtf(FT0s);
+    uint32_t res = a - b;
+    if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) {
+        env->QF = 1;
+        res = ~(((int32_t)a >> 31) ^ SIGNBIT);
+    }
+    return res;
  }
  
-void do_vfp_sqrtd(void)
+uint32_t HELPER(double_saturate)(int32_t val)
  {
-  FT0d = sqrt(FT0d);
+    uint32_t res;
+    if (val >= 0x40000000) {
+        res = ~SIGNBIT;
+        env->QF = 1;
+    } else if (val <= (int32_t)0xc0000000) {
+        res = SIGNBIT;
+        env->QF = 1;
+    } else {
+        res = val << 1;
+    }
+    return res;
  }
  
-/* We use an == operator first to generate teh correct floating point
-   exception.  Subsequent comparisons use the exception-safe macros.  */
-#define DO_VFP_cmp(p)                     \
-void do_vfp_cmp##p(void)                  \
-{                                         \
-    uint32_t flags;                       \
-    if (FT0##p == FT1##p)                 \
-        flags = 0xc;                      \
-    else if (isless (FT0##p, FT1##p))     \
-        flags = 0x8;                      \
-    else if (isgreater (FT0##p, FT1##p))  \
-        flags = 0x2;                      \
-    else /* unordered */                  \
-        flags = 0x3;                      \
-    env->vfp.fpscr = (flags << 28) | (env->vfp.fpscr & 0x0fffffff); \
-    FORCE_RET();                          \
+uint32_t HELPER(add_usaturate)(uint32_t a, uint32_t b)
+{
+    uint32_t res = a + b;
+    if (res < a) {
+        env->QF = 1;
+        res = ~0;
+    }
+    return res;
  }
-DO_VFP_cmp(s)
-DO_VFP_cmp(d)
-#undef DO_VFP_cmp
  
-/* We use a > operator first to get FP exceptions right.  */
-#define DO_VFP_cmpe(p)                      \
-void do_vfp_cmpe##p(void)                   \
-{                                           \
-    uint32_t flags;                         \
-    if (FT0##p > FT1##p)                    \
-        flags = 0x2;                        \
-    else if (isless (FT0##p, FT1##p))       \
-        flags = 0x8;                        \
-    else if (isunordered (FT0##p, FT1##p))  \
-        flags = 0x3;                        \
-    else /* equal */                        \
-        flags = 0xc;                        \
-    env->vfp.fpscr = (flags << 28) | (env->vfp.fpscr & 0x0fffffff); \
-    FORCE_RET();                            \
+uint32_t HELPER(sub_usaturate)(uint32_t a, uint32_t b)
+{
+    uint32_t res = a - b;
+    if (res > a) {
+        env->QF = 1;
+        res = 0;
+    }
+    return res;
  }
-DO_VFP_cmpe(s)
-DO_VFP_cmpe(d)
-#undef DO_VFP_cmpe
  
-/* Convert host exception flags to vfp form.  */
-int vfp_exceptbits_from_host(int host_bits)
+/* Signed saturation.  */
+static inline uint32_t do_ssat(int32_t val, int shift)
  {
-    int target_bits = 0;
+    int32_t top;
+    uint32_t mask;
  
-#ifdef FE_INVALID
-    if (host_bits & FE_INVALID)
-        target_bits |= 1;
-#endif
-#ifdef FE_DIVBYZERO
-    if (host_bits & FE_DIVBYZERO)
-        target_bits |= 2;
-#endif
-#ifdef FE_OVERFLOW
-    if (host_bits & FE_OVERFLOW)
-        target_bits |= 4;
-#endif
-#ifdef FE_UNDERFLOW
-    if (host_bits & FE_UNDERFLOW)
-        target_bits |= 8;
-#endif
-#ifdef FE_INEXACT
-    if (host_bits & FE_INEXACT)
-        target_bits |= 0x10;
-#endif
-    /* C doesn't define an inexact exception.  */
-    return target_bits;
+    top = val >> shift;
+    mask = (1u << shift) - 1;
+    if (top > 0) {
+        env->QF = 1;
+        return mask;
+    } else if (top < -1) {
+        env->QF = 1;
+        return ~mask;
+    }
+    return val;
  }
  
-/* Convert vfp exception flags to target form.  */
-int vfp_host_exceptbits_to_host(int target_bits)
+/* Unsigned saturation.  */
+static inline uint32_t do_usat(int32_t val, int shift)
  {
-    int host_bits = 0;
+    uint32_t max;
  
-#ifdef FE_INVALID
-    if (target_bits & 1)
-        host_bits |= FE_INVALID;
-#endif
-#ifdef FE_DIVBYZERO
-    if (target_bits & 2)
-        host_bits |= FE_DIVBYZERO;
-#endif
-#ifdef FE_OVERFLOW
-    if (target_bits & 4)
-        host_bits |= FE_OVERFLOW;
-#endif
-#ifdef FE_UNDERFLOW
-    if (target_bits & 8)
-        host_bits |= FE_UNDERFLOW;
-#endif
-#ifdef FE_INEXACT
-    if (target_bits & 0x10)
-        host_bits |= FE_INEXACT;
-#endif
-    return host_bits;
-}
-
-void do_vfp_set_fpscr(void)
-{
-    int i;
-    uint32_t changed;
-
-    changed = env->vfp.fpscr;
-    env->vfp.fpscr = (T0 & 0xffc8ffff);
-    env->vfp.vec_len = (T0 >> 16) & 7;
-    env->vfp.vec_stride = (T0 >> 20) & 3;
-
-    changed ^= T0;
-    if (changed & (3 << 22)) {
-        i = (T0 >> 22) & 3;
-        switch (i) {
-        case 0:
-            i = FE_TONEAREST;
-            break;
-        case 1:
-            i = FE_UPWARD;
-            break;
-        case 2:
-            i = FE_DOWNWARD;
-            break;
-        case 3:
-            i = FE_TOWARDZERO;
-            break;
-        }
-        fesetround (i);
+    max = (1u << shift) - 1;
+    if (val < 0) {
+        env->QF = 1;
+        return 0;
+    } else if (val > max) {
+        env->QF = 1;
+        return max;
      }
+    return val;
+}
+
+/* Signed saturate.  */
+uint32_t HELPER(ssat)(uint32_t x, uint32_t shift)
+{
+    return do_ssat(x, shift);
+}
+
+/* Dual halfword signed saturate.  */
+uint32_t HELPER(ssat16)(uint32_t x, uint32_t shift)
+{
+    uint32_t res;
+
+    res = (uint16_t)do_ssat((int16_t)x, shift);
+    res |= do_ssat(((int32_t)x) >> 16, shift) << 16;
+    return res;
+}
+
+/* Unsigned saturate.  */
+uint32_t HELPER(usat)(uint32_t x, uint32_t shift)
+{
+    return do_usat(x, shift);
+}
+
+/* Dual halfword unsigned saturate.  */
+uint32_t HELPER(usat16)(uint32_t x, uint32_t shift)
+{
+    uint32_t res;
+
+    res = (uint16_t)do_usat((int16_t)x, shift);
+    res |= do_usat(((int32_t)x) >> 16, shift) << 16;
+    return res;
+}
+
+void HELPER(wfi)(void)
+{
+    env->exception_index = EXCP_HLT;
+    env->halted = 1;
+    cpu_loop_exit();
+}
+
+void HELPER(exception)(uint32_t excp)
+{
+    env->exception_index = excp;
+    cpu_loop_exit();
+}
  
-    /* Clear host exception flags.  */
-    feclearexcept(FE_ALL_EXCEPT);
+uint32_t HELPER(cpsr_read)(void)
+{
+    return cpsr_read(env) & ~CPSR_EXEC;
+}
  
-#ifdef feenableexcept
-    if (changed & 0x1f00) {
-        i = vfp_exceptbits_to_host((T0 >> 8) & 0x1f);
-        feenableexcept (i);
-        fedisableexcept (FE_ALL_EXCEPT & ~i);
+void HELPER(cpsr_write)(uint32_t val, uint32_t mask)
+{
+    cpsr_write(env, val, mask);
+}
+
+/* Access to user mode registers from privileged modes.  */
+uint32_t HELPER(get_user_reg)(uint32_t regno)
+{
+    uint32_t val;
+
+    if (regno == 13) {
+        val = env->banked_r13[0];
+    } else if (regno == 14) {
+        val = env->banked_r14[0];
+    } else if (regno >= 8
+               && (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_FIQ) {
+        val = env->usr_regs[regno - 8];
+    } else {
+        val = env->regs[regno];
      }
-#endif
-    /* XXX: FZ and DN are not implemented.  */
+    return val;
+}
+
+void HELPER(set_user_reg)(uint32_t regno, uint32_t val)
+{
+    if (regno == 13) {
+        env->banked_r13[0] = val;
+    } else if (regno == 14) {
+        env->banked_r14[0] = val;
+    } else if (regno >= 8
+               && (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_FIQ) {
+        env->usr_regs[regno - 8] = val;
+    } else {
+        env->regs[regno] = val;
+    }
+}
+
+/* ??? Flag setting arithmetic is awkward because we need to do comparisons.
+   The only way to do that in TCG is a conditional branch, which clobbers
+   all our temporaries.  For now implement these as helper functions.  */
+
+uint32_t HELPER (add_cc)(uint32_t a, uint32_t b)
+{
+    uint32_t result;
+    result = a + b;
+    env->NF = env->ZF = result;
+    env->CF = result < a;
+    env->VF = (a ^ b ^ -1) & (a ^ result);
+    return result;
+}
+
+uint32_t HELPER(adc_cc)(uint32_t a, uint32_t b)
+{
+    uint32_t result;
+    if (!env->CF) {
+        result = a + b;
+        env->CF = result < a;
+    } else {
+        result = a + b + 1;
+        env->CF = result <= a;
+    }
+    env->VF = (a ^ b ^ -1) & (a ^ result);
+    env->NF = env->ZF = result;
+    return result;
+}
+
+uint32_t HELPER(sub_cc)(uint32_t a, uint32_t b)
+{
+    uint32_t result;
+    result = a - b;
+    env->NF = env->ZF = result;
+    env->CF = a >= b;
+    env->VF = (a ^ b) & (a ^ result);
+    return result;
+}
+
+uint32_t HELPER(sbc_cc)(uint32_t a, uint32_t b)
+{
+    uint32_t result;
+    if (!env->CF) {
+        result = a - b - 1;
+        env->CF = a > b;
+    } else {
+        result = a - b;
+        env->CF = a >= b;
+    }
+    env->VF = (a ^ b) & (a ^ result);
+    env->NF = env->ZF = result;
+    return result;
+}
+
+/* Similarly for variable shift instructions.  */
+
+uint32_t HELPER(shl)(uint32_t x, uint32_t i)
+{
+    int shift = i & 0xff;
+    if (shift >= 32)
+        return 0;
+    return x << shift;
+}
+
+uint32_t HELPER(shr)(uint32_t x, uint32_t i)
+{
+    int shift = i & 0xff;
+    if (shift >= 32)
+        return 0;
+    return (uint32_t)x >> shift;
+}
+
+uint32_t HELPER(sar)(uint32_t x, uint32_t i)
+{
+    int shift = i & 0xff;
+    if (shift >= 32)
+        shift = 31;
+    return (int32_t)x >> shift;
+}
+
+uint32_t HELPER(ror)(uint32_t x, uint32_t i)
+{
+    int shift = i & 0xff;
+    if (shift == 0)
+        return x;
+    return (x >> shift) | (x << (32 - shift));
+}
+
+uint32_t HELPER(shl_cc)(uint32_t x, uint32_t i)
+{
+    int shift = i & 0xff;
+    if (shift >= 32) {
+        if (shift == 32)
+            env->CF = x & 1;
+        else
+            env->CF = 0;
+        return 0;
+    } else if (shift != 0) {
+        env->CF = (x >> (32 - shift)) & 1;
+        return x << shift;
+    }
+    return x;
+}
+
+uint32_t HELPER(shr_cc)(uint32_t x, uint32_t i)
+{
+    int shift = i & 0xff;
+    if (shift >= 32) {
+        if (shift == 32)
+            env->CF = (x >> 31) & 1;
+        else
+            env->CF = 0;
+        return 0;
+    } else if (shift != 0) {
+        env->CF = (x >> (shift - 1)) & 1;
+        return x >> shift;
+    }
+    return x;
+}
+
+uint32_t HELPER(sar_cc)(uint32_t x, uint32_t i)
+{
+    int shift = i & 0xff;
+    if (shift >= 32) {
+        env->CF = (x >> 31) & 1;
+        return (int32_t)x >> 31;
+    } else if (shift != 0) {
+        env->CF = (x >> (shift - 1)) & 1;
+        return (int32_t)x >> shift;
+    }
+    return x;
+}
+
+uint32_t HELPER(ror_cc)(uint32_t x, uint32_t i)
+{
+    int shift1, shift;
+    shift1 = i & 0xff;
+    shift = shift1 & 0x1f;
+    if (shift == 0) {
+        if (shift1 != 0)
+            env->CF = (x >> 31) & 1;
+        return x;
+    } else {
+        env->CF = (x >> (shift - 1)) & 1;
+        return ((uint32_t)x >> shift) | (x << (32 - shift));
+    }
+}
+
+uint64_t HELPER(neon_add_saturate_s64)(uint64_t src1, uint64_t src2)
+{
+    uint64_t res;
+
+    res = src1 + src2;
+    if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) {
+        env->QF = 1;
+        res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
+    }
+    return res;
+}
+
+uint64_t HELPER(neon_add_saturate_u64)(uint64_t src1, uint64_t src2)
+{
+    uint64_t res;
+
+    res = src1 + src2;
+    if (res < src1) {
+        env->QF = 1;
+        res = ~(uint64_t)0;
+    }
+    return res;
+}
+
+uint64_t HELPER(neon_sub_saturate_s64)(uint64_t src1, uint64_t src2)
+{
+    uint64_t res;
+
+    res = src1 - src2;
+    if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) {
+        env->QF = 1;
+        res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
+    }
+    return res;
+}
+
+uint64_t HELPER(neon_sub_saturate_u64)(uint64_t src1, uint64_t src2)
+{
+    uint64_t res;
+
+    if (src1 < src2) {
+        env->QF = 1;
+        res = 0;
+    } else {
+        res = src1 - src2;
+    }
+    return res;
+}
+
+/* These need to return a pair of value, so still use T0/T1.  */
+/* Transpose.  Argument order is rather strange to avoid special casing
+   the tranlation code.
+   On input T0 = rm, T1 = rd.  On output T0 = rd, T1 = rm  */
+void HELPER(neon_trn_u8)(void)
+{
+    uint32_t rd;
+    uint32_t rm;
+    rd = ((T0 & 0x00ff00ff) << 8) | (T1 & 0x00ff00ff);
+    rm = ((T1 & 0xff00ff00) >> 8) | (T0 & 0xff00ff00);
+    T0 = rd;
+    T1 = rm;
+}
+
+void HELPER(neon_trn_u16)(void)
+{
+    uint32_t rd;
+    uint32_t rm;
+    rd = (T0 << 16) | (T1 & 0xffff);
+    rm = (T1 >> 16) | (T0 & 0xffff0000);
+    T0 = rd;
+    T1 = rm;
+}
+
+/* Worker routines for zip and unzip.  */
+void HELPER(neon_unzip_u8)(void)
+{
+    uint32_t rd;
+    uint32_t rm;
+    rd = (T0 & 0xff) | ((T0 >> 8) & 0xff00)
+         | ((T1 << 16) & 0xff0000) | ((T1 << 8) & 0xff000000);
+    rm = ((T0 >> 8) & 0xff) | ((T0 >> 16) & 0xff00)
+         | ((T1 << 8) & 0xff0000) | (T1 & 0xff000000);
+    T0 = rd;
+    T1 = rm;
+}
+
+void HELPER(neon_zip_u8)(void)
+{
+    uint32_t rd;
+    uint32_t rm;
+    rd = (T0 & 0xff) | ((T1 << 8) & 0xff00)
+         | ((T0 << 16) & 0xff0000) | ((T1 << 24) & 0xff000000);
+    rm = ((T0 >> 16) & 0xff) | ((T1 >> 8) & 0xff00)
+         | ((T0 >> 8) & 0xff0000) | (T1 & 0xff000000);
+    T0 = rd;
+    T1 = rm;
  }
  
-void do_vfp_get_fpscr(void)
+void HELPER(neon_zip_u16)(void)
  {
-    int i;
+    uint32_t tmp;
  
-    T0 = (env->vfp.fpscr & 0xffc8ffff) | (env->vfp.vec_len << 16)
-          | (env->vfp.vec_stride << 20);
-    i = fetestexcept(FE_ALL_EXCEPT);
-    T0 |= vfp_exceptbits_from_host(i);
+    tmp = (T0 & 0xffff) | (T1 << 16);
+    T1 = (T1 & 0xffff0000) | (T0 >> 16);
+    T0 = tmp;
  }