[v2] fix ARMv7 data processing instructions

[qemu] / target-arm / op_helper.c
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c

index 6748b06..f71162b 100644 (file)
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -15,11 +15,14 @@
   *
   * You should have received a copy of the GNU Lesser General Public
   * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
   */
  #include "exec.h"
  #include "helpers.h"
  
+#define SIGNBIT (uint32_t)0x80000000
+#define SIGNBIT64 ((uint64_t)1 << 63)
+
  void raise_exception(int tt)
  {
      env->exception_index = tt;
@@ -28,7 +31,7 @@ void raise_exception(int tt)
  
  /* thread support */
  
-spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
  
  void cpu_lock(void)
  {
@@ -40,37 +43,31 @@ void cpu_unlock(void)
      spin_unlock(&global_cpu_lock);
  }
  
-void helper_neon_tbl(int rn, int maxindex)
+uint32_t HELPER(neon_tbl)(uint32_t ireg, uint32_t def,
+                          uint32_t rn, uint32_t maxindex)
  {
      uint32_t val;
-    uint32_t mask;
      uint32_t tmp;
      int index;
      int shift;
      uint64_t *table;
      table = (uint64_t *)&env->vfp.regs[rn];
      val = 0;
-    mask = 0;
      for (shift = 0; shift < 32; shift += 8) {
-        index = (T1 >> shift) & 0xff;
-        if (index <= maxindex) {
-            tmp = (table[index >> 3] >> (index & 7)) & 0xff;
+        index = (ireg >> shift) & 0xff;
+        if (index < maxindex) {
+            tmp = (table[index >> 3] >> ((index & 7) << 3)) & 0xff;
              val |= tmp << shift;
          } else {
-            val |= T0 & (0xff << shift);
+            val |= def & (0xff << shift);
          }
      }
-    T0 = val;
+    return val;
  }
  
  #if !defined(CONFIG_USER_ONLY)
  
  #define MMUSUFFIX _mmu
-#ifdef __s390__
-# define GETPC() ((void*)((unsigned long)__builtin_return_address(0) & 0x7fffffffUL))
-#else
-# define GETPC() (__builtin_return_address(0))
-#endif
  
  #define SHIFT 0
  #include "softmmu_template.h"
@@ -100,7 +97,7 @@ void tlb_fill (target_ulong addr, int is_write, int mmu_idx, void *retaddr)
      saved_env = env;
      env = cpu_single_env;
      ret = cpu_arm_handle_mmu_fault(env, addr, is_write, mmu_idx, 1);
-    if (__builtin_expect(ret, 0)) {
+    if (unlikely(ret)) {
          if (retaddr) {
              /* now we have a real cpu fault */
              pc = (unsigned long)retaddr;
@@ -117,7 +114,8 @@ void tlb_fill (target_ulong addr, int is_write, int mmu_idx, void *retaddr)
  }
  #endif
  
-#define SIGNBIT (uint32_t)0x80000000
+/* FIXME: Pass an axplicit pointer to QF to CPUState, and move saturating
+   instructions into helper.c  */
  uint32_t HELPER(add_setq)(uint32_t a, uint32_t b)
  {
      uint32_t res = a + b;
@@ -187,7 +185,6 @@ static inline uint32_t do_ssat(int32_t val, int shift)
      int32_t top;
      uint32_t mask;
  
-    shift = PARAM1;
      top = val >> shift;
      mask = (1u << shift) - 1;
      if (top > 0) {
@@ -205,7 +202,6 @@ static inline uint32_t do_usat(int32_t val, int shift)
  {
      uint32_t max;
  
-    shift = PARAM1;
      max = (1u << shift) - 1;
      if (val < 0) {
          env->QF = 1;
@@ -304,3 +300,257 @@ void HELPER(set_user_reg)(uint32_t regno, uint32_t val)
      }
  }
  
+/* ??? Flag setting arithmetic is awkward because we need to do comparisons.
+   The only way to do that in TCG is a conditional branch, which clobbers
+   all our temporaries.  For now implement these as helper functions.  */
+
+uint32_t HELPER (add_cc)(uint32_t a, uint32_t b)
+{
+    uint32_t result;
+    result = a + b;
+    env->NF = env->ZF = result;
+    env->CF = result < a;
+    env->VF = (a ^ b ^ -1) & (a ^ result);
+    return result;
+}
+
+uint32_t HELPER(adc_cc)(uint32_t a, uint32_t b)
+{
+    uint32_t result;
+    if (!env->CF) {
+        result = a + b;
+        env->CF = result < a;
+    } else {
+        result = a + b + 1;
+        env->CF = result <= a;
+    }
+    env->VF = (a ^ b ^ -1) & (a ^ result);
+    env->NF = env->ZF = result;
+    return result;
+}
+
+uint32_t HELPER(sub_cc)(uint32_t a, uint32_t b)
+{
+    uint32_t result;
+    result = a - b;
+    env->NF = env->ZF = result;
+    env->CF = a >= b;
+    env->VF = (a ^ b) & (a ^ result);
+    return result;
+}
+
+uint32_t HELPER(sbc_cc)(uint32_t a, uint32_t b)
+{
+    uint32_t result;
+    if (!env->CF) {
+        result = a - b - 1;
+        env->CF = a > b;
+    } else {
+        result = a - b;
+        env->CF = a >= b;
+    }
+    env->VF = (a ^ b) & (a ^ result);
+    env->NF = env->ZF = result;
+    return result;
+}
+
+/* Similarly for variable shift instructions.  */
+
+uint32_t HELPER(shl)(uint32_t x, uint32_t i)
+{
+    int shift = i & 0xff;
+    if (shift >= 32)
+        return 0;
+    return x << shift;
+}
+
+uint32_t HELPER(shr)(uint32_t x, uint32_t i)
+{
+    int shift = i & 0xff;
+    if (shift >= 32)
+        return 0;
+    return (uint32_t)x >> shift;
+}
+
+uint32_t HELPER(sar)(uint32_t x, uint32_t i)
+{
+    int shift = i & 0xff;
+    if (shift >= 32)
+        shift = 31;
+    return (int32_t)x >> shift;
+}
+
+uint32_t HELPER(ror)(uint32_t x, uint32_t i)
+{
+    int shift = i & 0xff;
+    if (shift == 0)
+        return x;
+    return (x >> shift) | (x << (32 - shift));
+}
+
+uint32_t HELPER(shl_cc)(uint32_t x, uint32_t i)
+{
+    int shift = i & 0xff;
+    if (shift >= 32) {
+        if (shift == 32)
+            env->CF = x & 1;
+        else
+            env->CF = 0;
+        return 0;
+    } else if (shift != 0) {
+        env->CF = (x >> (32 - shift)) & 1;
+        return x << shift;
+    }
+    return x;
+}
+
+uint32_t HELPER(shr_cc)(uint32_t x, uint32_t i)
+{
+    int shift = i & 0xff;
+    if (shift >= 32) {
+        if (shift == 32)
+            env->CF = (x >> 31) & 1;
+        else
+            env->CF = 0;
+        return 0;
+    } else if (shift != 0) {
+        env->CF = (x >> (shift - 1)) & 1;
+        return x >> shift;
+    }
+    return x;
+}
+
+uint32_t HELPER(sar_cc)(uint32_t x, uint32_t i)
+{
+    int shift = i & 0xff;
+    if (shift >= 32) {
+        env->CF = (x >> 31) & 1;
+        return (int32_t)x >> 31;
+    } else if (shift != 0) {
+        env->CF = (x >> (shift - 1)) & 1;
+        return (int32_t)x >> shift;
+    }
+    return x;
+}
+
+uint32_t HELPER(ror_cc)(uint32_t x, uint32_t i)
+{
+    int shift1, shift;
+    shift1 = i & 0xff;
+    shift = shift1 & 0x1f;
+    if (shift == 0) {
+        if (shift1 != 0)
+            env->CF = (x >> 31) & 1;
+        return x;
+    } else {
+        env->CF = (x >> (shift - 1)) & 1;
+        return ((uint32_t)x >> shift) | (x << (32 - shift));
+    }
+}
+
+uint64_t HELPER(neon_add_saturate_s64)(uint64_t src1, uint64_t src2)
+{
+    uint64_t res;
+
+    res = src1 + src2;
+    if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) {
+        env->QF = 1;
+        res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
+    }
+    return res;
+}
+
+uint64_t HELPER(neon_add_saturate_u64)(uint64_t src1, uint64_t src2)
+{
+    uint64_t res;
+
+    res = src1 + src2;
+    if (res < src1) {
+        env->QF = 1;
+        res = ~(uint64_t)0;
+    }
+    return res;
+}
+
+uint64_t HELPER(neon_sub_saturate_s64)(uint64_t src1, uint64_t src2)
+{
+    uint64_t res;
+
+    res = src1 - src2;
+    if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) {
+        env->QF = 1;
+        res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64;
+    }
+    return res;
+}
+
+uint64_t HELPER(neon_sub_saturate_u64)(uint64_t src1, uint64_t src2)
+{
+    uint64_t res;
+
+    if (src1 < src2) {
+        env->QF = 1;
+        res = 0;
+    } else {
+        res = src1 - src2;
+    }
+    return res;
+}
+
+/* These need to return a pair of value, so still use T0/T1.  */
+/* Transpose.  Argument order is rather strange to avoid special casing
+   the tranlation code.
+   On input T0 = rm, T1 = rd.  On output T0 = rd, T1 = rm  */
+void HELPER(neon_trn_u8)(void)
+{
+    uint32_t rd;
+    uint32_t rm;
+    rd = ((T0 & 0x00ff00ff) << 8) | (T1 & 0x00ff00ff);
+    rm = ((T1 & 0xff00ff00) >> 8) | (T0 & 0xff00ff00);
+    T0 = rd;
+    T1 = rm;
+}
+
+void HELPER(neon_trn_u16)(void)
+{
+    uint32_t rd;
+    uint32_t rm;
+    rd = (T0 << 16) | (T1 & 0xffff);
+    rm = (T1 >> 16) | (T0 & 0xffff0000);
+    T0 = rd;
+    T1 = rm;
+}
+
+/* Worker routines for zip and unzip.  */
+void HELPER(neon_unzip_u8)(void)
+{
+    uint32_t rd;
+    uint32_t rm;
+    rd = (T0 & 0xff) | ((T0 >> 8) & 0xff00)
+         | ((T1 << 16) & 0xff0000) | ((T1 << 8) & 0xff000000);
+    rm = ((T0 >> 8) & 0xff) | ((T0 >> 16) & 0xff00)
+         | ((T1 << 8) & 0xff0000) | (T1 & 0xff000000);
+    T0 = rd;
+    T1 = rm;
+}
+
+void HELPER(neon_zip_u8)(void)
+{
+    uint32_t rd;
+    uint32_t rm;
+    rd = (T0 & 0xff) | ((T1 << 8) & 0xff00)
+         | ((T0 << 16) & 0xff0000) | ((T1 << 24) & 0xff000000);
+    rm = ((T0 >> 16) & 0xff) | ((T1 >> 8) & 0xff00)
+         | ((T0 >> 8) & 0xff0000) | (T1 & 0xff000000);
+    T0 = rd;
+    T1 = rm;
+}
+
+void HELPER(neon_zip_u16)(void)
+{
+    uint32_t tmp;
+
+    tmp = (T0 & 0xffff) | (T1 << 16);
+    T1 = (T1 & 0xffff0000) | (T0 >> 16);
+    T0 = tmp;
+}