+ case 0x138:
+ if (s->prefix & PREFIX_REPNZ)
+ goto crc32;
+ case 0x038:
+ b = modrm;
+ modrm = ldub_code(s->pc++);
+ rm = modrm & 7;
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+
+ sse_op2 = sse_op_table6[b].op[b1];
+ if (!sse_op2)
+ goto illegal_op;
+ if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask))
+ goto illegal_op;
+
+ if (b1) {
+ op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+ if (mod == 3) {
+ op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
+ } else {
+ op2_offset = offsetof(CPUX86State,xmm_t0);
+ gen_lea_modrm(s, modrm, ®_addr, &offset_addr);
+ switch (b) {
+ case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
+ case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
+ case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
+ gen_ldq_env_A0(s->mem_index, op2_offset +
+ offsetof(XMMReg, XMM_Q(0)));
+ break;
+ case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */
+ case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */
+ tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
+ tcg_gen_st_i32(cpu_tmp2_i32, cpu_env, op2_offset +
+ offsetof(XMMReg, XMM_L(0)));
+ break;
+ case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */
+ tcg_gen_qemu_ld16u(cpu_tmp0, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ tcg_gen_st16_tl(cpu_tmp0, cpu_env, op2_offset +
+ offsetof(XMMReg, XMM_W(0)));
+ break;
+ case 0x2a: /* movntqda */
+ gen_ldo_env_A0(s->mem_index, op1_offset);
+ return;
+ default:
+ gen_ldo_env_A0(s->mem_index, op2_offset);
+ }
+ }
+ } else {
+ op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
+ if (mod == 3) {
+ op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+ } else {
+ op2_offset = offsetof(CPUX86State,mmx_t0);
+ gen_lea_modrm(s, modrm, ®_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, op2_offset);
+ }
+ }
+ if (sse_op2 == SSE_SPECIAL)
+ goto illegal_op;
+
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ ((void (*)(TCGv_ptr, TCGv_ptr))sse_op2)(cpu_ptr0, cpu_ptr1);
+
+ if (b == 0x17)
+ s->cc_op = CC_OP_EFLAGS;
+ break;
+ case 0x338: /* crc32 */
+ crc32:
+ b = modrm;
+ modrm = ldub_code(s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+
+ if (b != 0xf0 && b != 0xf1)
+ goto illegal_op;
+ if (!(s->cpuid_ext_features & CPUID_EXT_SSE42))
+ goto illegal_op;
+
+ if (b == 0xf0)
+ ot = OT_BYTE;
+ else if (b == 0xf1 && s->dflag != 2)
+ if (s->prefix & PREFIX_DATA)
+ ot = OT_WORD;
+ else
+ ot = OT_LONG;
+ else
+ ot = OT_QUAD;
+
+ gen_op_mov_TN_reg(OT_LONG, 0, reg);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+ gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
+ cpu_T[0], tcg_const_i32(8 << ot));
+
+ ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+ gen_op_mov_reg_T0(ot, reg);
+ break;
+ case 0x03a:
+ case 0x13a:
+ b = modrm;
+ modrm = ldub_code(s->pc++);
+ rm = modrm & 7;
+ reg = ((modrm >> 3) & 7) | rex_r;
+ mod = (modrm >> 6) & 3;
+
+ sse_op2 = sse_op_table7[b].op[b1];
+ if (!sse_op2)
+ goto illegal_op;
+ if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask))
+ goto illegal_op;
+
+ if (sse_op2 == SSE_SPECIAL) {
+ ot = (s->dflag == 2) ? OT_QUAD : OT_LONG;
+ rm = (modrm & 7) | REX_B(s);
+ if (mod != 3)
+ gen_lea_modrm(s, modrm, ®_addr, &offset_addr);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ val = ldub_code(s->pc++);
+ switch (b) {
+ case 0x14: /* pextrb */
+ tcg_gen_ld8u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_B(val & 15)));
+ if (mod == 3)
+ gen_op_mov_reg_T0(ot, rm);
+ else
+ tcg_gen_qemu_st8(cpu_T[0], cpu_A0,
+ (s->mem_index >> 2) - 1);
+ break;
+ case 0x15: /* pextrw */
+ tcg_gen_ld16u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_W(val & 7)));
+ if (mod == 3)
+ gen_op_mov_reg_T0(ot, rm);
+ else
+ tcg_gen_qemu_st16(cpu_T[0], cpu_A0,
+ (s->mem_index >> 2) - 1);
+ break;
+ case 0x16:
+ if (ot == OT_LONG) { /* pextrd */
+ tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
+ offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(val & 3)));
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ if (mod == 3)
+ gen_op_mov_reg_v(ot, rm, cpu_T[0]);
+ else
+ tcg_gen_qemu_st32(cpu_T[0], cpu_A0,
+ (s->mem_index >> 2) - 1);
+ } else { /* pextrq */
+#ifdef TARGET_X86_64
+ tcg_gen_ld_i64(cpu_tmp1_i64, cpu_env,
+ offsetof(CPUX86State,
+ xmm_regs[reg].XMM_Q(val & 1)));
+ if (mod == 3)
+ gen_op_mov_reg_v(ot, rm, cpu_tmp1_i64);
+ else
+ tcg_gen_qemu_st64(cpu_tmp1_i64, cpu_A0,
+ (s->mem_index >> 2) - 1);
+#else
+ goto illegal_op;
+#endif
+ }
+ break;
+ case 0x17: /* extractps */
+ tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(val & 3)));
+ if (mod == 3)
+ gen_op_mov_reg_T0(ot, rm);
+ else
+ tcg_gen_qemu_st32(cpu_T[0], cpu_A0,
+ (s->mem_index >> 2) - 1);
+ break;
+ case 0x20: /* pinsrb */
+ if (mod == 3)
+ gen_op_mov_TN_reg(OT_LONG, 0, rm);
+ else
+ tcg_gen_qemu_ld8u(cpu_tmp0, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ tcg_gen_st8_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_B(val & 15)));
+ break;
+ case 0x21: /* insertps */
+ if (mod == 3) {
+ tcg_gen_ld_i32(cpu_tmp2_i32, cpu_env,
+ offsetof(CPUX86State,xmm_regs[rm]
+ .XMM_L((val >> 6) & 3)));
+ } else {
+ tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
+ }
+ tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
+ offsetof(CPUX86State,xmm_regs[reg]
+ .XMM_L((val >> 4) & 3)));
+ if ((val >> 0) & 1)
+ tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
+ cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(0)));
+ if ((val >> 1) & 1)
+ tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
+ cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(1)));
+ if ((val >> 2) & 1)
+ tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
+ cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(2)));
+ if ((val >> 3) & 1)
+ tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/),
+ cpu_env, offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(3)));
+ break;
+ case 0x22:
+ if (ot == OT_LONG) { /* pinsrd */
+ if (mod == 3)
+ gen_op_mov_v_reg(ot, cpu_tmp0, rm);
+ else
+ tcg_gen_qemu_ld32u(cpu_tmp0, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_tmp0);
+ tcg_gen_st_i32(cpu_tmp2_i32, cpu_env,
+ offsetof(CPUX86State,
+ xmm_regs[reg].XMM_L(val & 3)));
+ } else { /* pinsrq */
+#ifdef TARGET_X86_64
+ if (mod == 3)
+ gen_op_mov_v_reg(ot, cpu_tmp1_i64, rm);
+ else
+ tcg_gen_qemu_ld64(cpu_tmp1_i64, cpu_A0,
+ (s->mem_index >> 2) - 1);
+ tcg_gen_st_i64(cpu_tmp1_i64, cpu_env,
+ offsetof(CPUX86State,
+ xmm_regs[reg].XMM_Q(val & 1)));
+#else
+ goto illegal_op;
+#endif
+ }
+ break;
+ }
+ return;
+ }
+
+ if (b1) {
+ op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
+ if (mod == 3) {
+ op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
+ } else {
+ op2_offset = offsetof(CPUX86State,xmm_t0);
+ gen_lea_modrm(s, modrm, ®_addr, &offset_addr);
+ gen_ldo_env_A0(s->mem_index, op2_offset);
+ }
+ } else {
+ op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
+ if (mod == 3) {
+ op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
+ } else {
+ op2_offset = offsetof(CPUX86State,mmx_t0);
+ gen_lea_modrm(s, modrm, ®_addr, &offset_addr);
+ gen_ldq_env_A0(s->mem_index, op2_offset);
+ }
+ }
+ val = ldub_code(s->pc++);
+
+ if ((b & 0xfc) == 0x60) { /* pcmpXstrX */
+ s->cc_op = CC_OP_EFLAGS;
+
+ if (s->dflag == 2)
+ /* The helper must use entire 64-bit gp registers */
+ val |= 1 << 8;
+ }
+
+ tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
+ tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset);
+ ((void (*)(TCGv_ptr, TCGv_ptr, TCGv_i32))sse_op2)(cpu_ptr0, cpu_ptr1, tcg_const_i32(val));
+ break;