2 #define DATA_BITS (1 << (3 + SHIFT))
3 #define SHIFT_MASK (DATA_BITS - 1)
4 #define SIGN_MASK (1 << (DATA_BITS - 1))
8 #define DATA_TYPE uint8_t
9 #define DATA_STYPE int8_t
10 #define DATA_MASK 0xff
13 #define DATA_TYPE uint16_t
14 #define DATA_STYPE int16_t
15 #define DATA_MASK 0xffff
18 #define DATA_TYPE uint32_t
19 #define DATA_STYPE int32_t
20 #define DATA_MASK 0xffffffff
22 #error unhandled operand size
25 /* dynamic flags computation */
27 static int glue(compute_all_add, SUFFIX)(void)
29 int cf, pf, af, zf, sf, of;
32 src2 = CC_DST - CC_SRC;
33 cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
34 pf = parity_table[(uint8_t)CC_DST];
35 af = (CC_DST ^ src1 ^ src2) & 0x10;
36 zf = ((DATA_TYPE)CC_DST == 0) << 6;
37 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
38 of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
39 return cf | pf | af | zf | sf | of;
42 static int glue(compute_c_add, SUFFIX)(void)
46 cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
50 static int glue(compute_all_adc, SUFFIX)(void)
52 int cf, pf, af, zf, sf, of;
55 src2 = CC_DST - CC_SRC - 1;
56 cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
57 pf = parity_table[(uint8_t)CC_DST];
58 af = (CC_DST ^ src1 ^ src2) & 0x10;
59 zf = ((DATA_TYPE)CC_DST == 0) << 6;
60 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
61 of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
62 return cf | pf | af | zf | sf | of;
65 static int glue(compute_c_adc, SUFFIX)(void)
69 cf = (DATA_TYPE)CC_DST <= (DATA_TYPE)src1;
73 static int glue(compute_all_sub, SUFFIX)(void)
75 int cf, pf, af, zf, sf, of;
78 src2 = CC_SRC - CC_DST;
79 cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
80 pf = parity_table[(uint8_t)CC_DST];
81 af = (CC_DST ^ src1 ^ src2) & 0x10;
82 zf = ((DATA_TYPE)CC_DST == 0) << 6;
83 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
84 of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
85 return cf | pf | af | zf | sf | of;
88 static int glue(compute_c_sub, SUFFIX)(void)
92 src2 = CC_SRC - CC_DST;
93 cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
97 static int glue(compute_all_sbb, SUFFIX)(void)
99 int cf, pf, af, zf, sf, of;
102 src2 = CC_SRC - CC_DST - 1;
103 cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
104 pf = parity_table[(uint8_t)CC_DST];
105 af = (CC_DST ^ src1 ^ src2) & 0x10;
106 zf = ((DATA_TYPE)CC_DST == 0) << 6;
107 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
108 of = lshift((src1 ^ src2) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
109 return cf | pf | af | zf | sf | of;
112 static int glue(compute_c_sbb, SUFFIX)(void)
116 src2 = CC_SRC - CC_DST - 1;
117 cf = (DATA_TYPE)src1 <= (DATA_TYPE)src2;
121 static int glue(compute_all_logic, SUFFIX)(void)
123 int cf, pf, af, zf, sf, of;
125 pf = parity_table[(uint8_t)CC_DST];
127 zf = ((DATA_TYPE)CC_DST == 0) << 6;
128 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
130 return cf | pf | af | zf | sf | of;
133 static int glue(compute_c_logic, SUFFIX)(void)
138 static int glue(compute_all_inc, SUFFIX)(void)
140 int cf, pf, af, zf, sf, of;
145 pf = parity_table[(uint8_t)CC_DST];
146 af = (CC_DST ^ src1 ^ src2) & 0x10;
147 zf = ((DATA_TYPE)CC_DST == 0) << 6;
148 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
149 of = ((CC_DST & DATA_MASK) == SIGN_MASK) << 11;
150 return cf | pf | af | zf | sf | of;
154 static int glue(compute_c_inc, SUFFIX)(void)
160 static int glue(compute_all_dec, SUFFIX)(void)
162 int cf, pf, af, zf, sf, of;
167 pf = parity_table[(uint8_t)CC_DST];
168 af = (CC_DST ^ src1 ^ src2) & 0x10;
169 zf = ((DATA_TYPE)CC_DST == 0) << 6;
170 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
171 of = ((CC_DST & DATA_MASK) == ((uint32_t)SIGN_MASK - 1)) << 11;
172 return cf | pf | af | zf | sf | of;
175 static int glue(compute_all_shl, SUFFIX)(void)
177 int cf, pf, af, zf, sf, of;
178 cf = (CC_SRC >> (DATA_BITS - 1)) & CC_C;
179 pf = parity_table[(uint8_t)CC_DST];
180 af = 0; /* undefined */
181 zf = ((DATA_TYPE)CC_DST == 0) << 6;
182 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
183 /* of is defined if shift count == 1 */
184 of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O;
185 return cf | pf | af | zf | sf | of;
189 static int glue(compute_c_shl, SUFFIX)(void)
195 static int glue(compute_all_sar, SUFFIX)(void)
197 int cf, pf, af, zf, sf, of;
199 pf = parity_table[(uint8_t)CC_DST];
200 af = 0; /* undefined */
201 zf = ((DATA_TYPE)CC_DST == 0) << 6;
202 sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
203 /* of is defined if shift count == 1 */
204 of = lshift(CC_SRC ^ CC_DST, 12 - DATA_BITS) & CC_O;
205 return cf | pf | af | zf | sf | of;
208 /* various optimized jumps cases */
210 void OPPROTO glue(op_jb_sub, SUFFIX)(void)
214 src2 = CC_SRC - CC_DST;
216 if ((DATA_TYPE)src1 < (DATA_TYPE)src2)
223 void OPPROTO glue(op_jz_sub, SUFFIX)(void)
225 if ((DATA_TYPE)CC_DST == 0)
232 void OPPROTO glue(op_jbe_sub, SUFFIX)(void)
236 src2 = CC_SRC - CC_DST;
238 if ((DATA_TYPE)src1 <= (DATA_TYPE)src2)
245 void OPPROTO glue(op_js_sub, SUFFIX)(void)
247 if (CC_DST & SIGN_MASK)
254 void OPPROTO glue(op_jl_sub, SUFFIX)(void)
258 src2 = CC_SRC - CC_DST;
260 if ((DATA_STYPE)src1 < (DATA_STYPE)src2)
267 void OPPROTO glue(op_jle_sub, SUFFIX)(void)
271 src2 = CC_SRC - CC_DST;
273 if ((DATA_STYPE)src1 <= (DATA_STYPE)src2)
284 void OPPROTO glue(op_loopnz, SUFFIX)(void)
288 eflags = cc_table[CC_OP].compute_all();
289 tmp = (ECX - 1) & DATA_MASK;
290 ECX = (ECX & ~DATA_MASK) | tmp;
291 if (tmp != 0 && !(eflags & CC_Z))
298 void OPPROTO glue(op_loopz, SUFFIX)(void)
302 eflags = cc_table[CC_OP].compute_all();
303 tmp = (ECX - 1) & DATA_MASK;
304 ECX = (ECX & ~DATA_MASK) | tmp;
305 if (tmp != 0 && (eflags & CC_Z))
312 void OPPROTO glue(op_loop, SUFFIX)(void)
315 tmp = (ECX - 1) & DATA_MASK;
316 ECX = (ECX & ~DATA_MASK) | tmp;
324 void OPPROTO glue(op_jecxz, SUFFIX)(void)
326 if ((DATA_TYPE)ECX == 0)
335 /* various optimized set cases */
337 void OPPROTO glue(op_setb_T0_sub, SUFFIX)(void)
341 src2 = CC_SRC - CC_DST;
343 T0 = ((DATA_TYPE)src1 < (DATA_TYPE)src2);
346 void OPPROTO glue(op_setz_T0_sub, SUFFIX)(void)
348 T0 = ((DATA_TYPE)CC_DST == 0);
351 void OPPROTO glue(op_setbe_T0_sub, SUFFIX)(void)
355 src2 = CC_SRC - CC_DST;
357 T0 = ((DATA_TYPE)src1 <= (DATA_TYPE)src2);
360 void OPPROTO glue(op_sets_T0_sub, SUFFIX)(void)
362 T0 = lshift(CC_DST, -(DATA_BITS - 1)) & 1;
365 void OPPROTO glue(op_setl_T0_sub, SUFFIX)(void)
369 src2 = CC_SRC - CC_DST;
371 T0 = ((DATA_STYPE)src1 < (DATA_STYPE)src2);
374 void OPPROTO glue(op_setle_T0_sub, SUFFIX)(void)
378 src2 = CC_SRC - CC_DST;
380 T0 = ((DATA_STYPE)src1 <= (DATA_STYPE)src2);
385 void OPPROTO glue(glue(op_rol, SUFFIX), _T0_T1_cc)(void)
388 count = T1 & SHIFT_MASK;
390 CC_SRC = cc_table[CC_OP].compute_all() & ~(CC_O | CC_C);
393 T0 = (T0 << count) | (T0 >> (DATA_BITS - count));
394 CC_SRC |= (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) |
396 CC_OP = CC_OP_EFLAGS;
401 void OPPROTO glue(glue(op_ror, SUFFIX), _T0_T1_cc)(void)
404 count = T1 & SHIFT_MASK;
406 CC_SRC = cc_table[CC_OP].compute_all() & ~(CC_O | CC_C);
409 T0 = (T0 >> count) | (T0 << (DATA_BITS - count));
410 CC_SRC |= (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) |
411 ((T0 >> (DATA_BITS - 1)) & CC_C);
412 CC_OP = CC_OP_EFLAGS;
417 void OPPROTO glue(glue(op_rcl, SUFFIX), _T0_T1_cc)(void)
419 int count, res, eflags;
424 count = rclw_table[count];
426 count = rclb_table[count];
429 eflags = cc_table[CC_OP].compute_all();
432 res = (T0 << count) | ((eflags & CC_C) << (count - 1));
434 res |= T0 >> (DATA_BITS + 1 - count);
436 CC_SRC = (eflags & ~(CC_C | CC_O)) |
437 (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) |
438 ((src >> (DATA_BITS - count)) & CC_C);
439 CC_OP = CC_OP_EFLAGS;
444 void OPPROTO glue(glue(op_rcr, SUFFIX), _T0_T1_cc)(void)
446 int count, res, eflags;
451 count = rclw_table[count];
453 count = rclb_table[count];
456 eflags = cc_table[CC_OP].compute_all();
459 res = (T0 >> count) | ((eflags & CC_C) << (DATA_BITS - count));
461 res |= T0 << (DATA_BITS + 1 - count);
463 CC_SRC = (eflags & ~(CC_C | CC_O)) |
464 (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) |
465 ((src >> (count - 1)) & CC_C);
466 CC_OP = CC_OP_EFLAGS;
471 void OPPROTO glue(glue(op_shl, SUFFIX), _T0_T1_cc)(void)
476 CC_SRC = (DATA_TYPE)T0 << (count - 1);
479 CC_OP = CC_OP_SHLB + SHIFT;
484 void OPPROTO glue(glue(op_shr, SUFFIX), _T0_T1_cc)(void)
490 CC_SRC = T0 >> (count - 1);
493 CC_OP = CC_OP_SARB + SHIFT;
498 void OPPROTO glue(glue(op_sar, SUFFIX), _T0_T1_cc)(void)
503 src = (DATA_STYPE)T0;
504 CC_SRC = src >> (count - 1);
507 CC_OP = CC_OP_SARB + SHIFT;
513 /* XXX: overflow flag might be incorrect in some cases in shldw */
514 void OPPROTO glue(glue(op_shld, SUFFIX), _T0_T1_im_cc)(void)
520 res = T1 | (T0 << 16);
521 CC_SRC = res >> (32 - count);
524 res |= T1 << (count - 16);
529 void OPPROTO glue(glue(op_shld, SUFFIX), _T0_T1_ECX_cc)(void)
536 res = T1 | (T0 << 16);
537 CC_SRC = res >> (32 - count);
540 res |= T1 << (count - 16);
543 CC_OP = CC_OP_SARB + SHIFT;
547 void OPPROTO glue(glue(op_shrd, SUFFIX), _T0_T1_im_cc)(void)
553 res = (T0 & 0xffff) | (T1 << 16);
554 CC_SRC = res >> (count - 1);
557 res |= T1 << (32 - count);
563 void OPPROTO glue(glue(op_shrd, SUFFIX), _T0_T1_ECX_cc)(void)
570 res = (T0 & 0xffff) | (T1 << 16);
571 CC_SRC = res >> (count - 1);
574 res |= T1 << (32 - count);
577 CC_OP = CC_OP_SARB + SHIFT;
583 void OPPROTO glue(glue(op_shld, SUFFIX), _T0_T1_im_cc)(void)
589 CC_SRC = T0 << (count - 1);
590 T0 = (T0 << count) | (T1 >> (DATA_BITS - count));
594 void OPPROTO glue(glue(op_shld, SUFFIX), _T0_T1_ECX_cc)(void)
601 CC_SRC = T0 << (count - 1);
602 T0 = (T0 << count) | (T1 >> (DATA_BITS - count));
604 CC_OP = CC_OP_SHLB + SHIFT;
608 void OPPROTO glue(glue(op_shrd, SUFFIX), _T0_T1_im_cc)(void)
614 CC_SRC = T0 >> (count - 1);
615 T0 = (T0 >> count) | (T1 << (DATA_BITS - count));
620 void OPPROTO glue(glue(op_shrd, SUFFIX), _T0_T1_ECX_cc)(void)
627 CC_SRC = T0 >> (count - 1);
628 T0 = (T0 >> count) | (T1 << (DATA_BITS - count));
630 CC_OP = CC_OP_SARB + SHIFT;
635 /* carry add/sub (we only need to set CC_OP differently) */
637 void OPPROTO glue(glue(op_adc, SUFFIX), _T0_T1_cc)(void)
640 cf = cc_table[CC_OP].compute_c();
644 CC_OP = CC_OP_ADDB + SHIFT + cf * 3;
647 void OPPROTO glue(glue(op_sbb, SUFFIX), _T0_T1_cc)(void)
650 cf = cc_table[CC_OP].compute_c();
654 CC_OP = CC_OP_SUBB + SHIFT + cf * 3;
657 void OPPROTO glue(glue(op_cmpxchg, SUFFIX), _T0_T1_EAX_cc)(void)
661 if ((DATA_TYPE)CC_DST == 0) {
664 EAX = (EAX & ~DATA_MASK) | (T0 & DATA_MASK);
672 void OPPROTO glue(glue(op_bt, SUFFIX), _T0_T1_cc)(void)
675 count = T1 & SHIFT_MASK;
676 CC_SRC = T0 >> count;
679 void OPPROTO glue(glue(op_bts, SUFFIX), _T0_T1_cc)(void)
682 count = T1 & SHIFT_MASK;
683 CC_SRC = T0 >> count;
687 void OPPROTO glue(glue(op_btr, SUFFIX), _T0_T1_cc)(void)
690 count = T1 & SHIFT_MASK;
691 CC_SRC = T0 >> count;
695 void OPPROTO glue(glue(op_btc, SUFFIX), _T0_T1_cc)(void)
698 count = T1 & SHIFT_MASK;
699 CC_SRC = T0 >> count;
703 void OPPROTO glue(glue(op_bsf, SUFFIX), _T0_cc)(void)
706 res = T0 & DATA_MASK;
709 while ((res & 1) == 0) {
714 CC_DST = 1; /* ZF = 1 */
716 CC_DST = 0; /* ZF = 1 */
721 void OPPROTO glue(glue(op_bsr, SUFFIX), _T0_cc)(void)
724 res = T0 & DATA_MASK;
726 count = DATA_BITS - 1;
727 while ((res & SIGN_MASK) == 0) {
732 CC_DST = 1; /* ZF = 1 */
734 CC_DST = 0; /* ZF = 1 */
741 /* string operations */
742 /* XXX: maybe use lower level instructions to ease exception handling */
744 void OPPROTO glue(op_movs, SUFFIX)(void)
747 v = glue(ldu, SUFFIX)((void *)ESI);
748 glue(st, SUFFIX)((void *)EDI, v);
749 ESI += (DF << SHIFT);
750 EDI += (DF << SHIFT);
753 void OPPROTO glue(op_rep_movs, SUFFIX)(void)
758 v = glue(ldu, SUFFIX)((void *)ESI);
759 glue(st, SUFFIX)((void *)EDI, v);
766 void OPPROTO glue(op_stos, SUFFIX)(void)
768 glue(st, SUFFIX)((void *)EDI, EAX);
769 EDI += (DF << SHIFT);
772 void OPPROTO glue(op_rep_stos, SUFFIX)(void)
777 glue(st, SUFFIX)((void *)EDI, EAX);
783 void OPPROTO glue(op_lods, SUFFIX)(void)
786 v = glue(ldu, SUFFIX)((void *)ESI);
788 EAX = (EAX & ~0xff) | v;
790 EAX = (EAX & ~0xffff) | v;
794 ESI += (DF << SHIFT);
797 /* don't know if it is used */
798 void OPPROTO glue(op_rep_lods, SUFFIX)(void)
803 v = glue(ldu, SUFFIX)((void *)ESI);
805 EAX = (EAX & ~0xff) | v;
807 EAX = (EAX & ~0xffff) | v;
816 void OPPROTO glue(op_scas, SUFFIX)(void)
820 v = glue(ldu, SUFFIX)((void *)EDI);
821 EDI += (DF << SHIFT);
826 void OPPROTO glue(op_repz_scas, SUFFIX)(void)
831 /* NOTE: the flags are not modified if ECX == 0 */
832 v1 = EAX & DATA_MASK;
835 v2 = glue(ldu, SUFFIX)((void *)EDI);
843 CC_OP = CC_OP_SUBB + SHIFT;
847 void OPPROTO glue(op_repnz_scas, SUFFIX)(void)
852 /* NOTE: the flags are not modified if ECX == 0 */
853 v1 = EAX & DATA_MASK;
856 v2 = glue(ldu, SUFFIX)((void *)EDI);
864 CC_OP = CC_OP_SUBB + SHIFT;
868 void OPPROTO glue(op_cmps, SUFFIX)(void)
871 v1 = glue(ldu, SUFFIX)((void *)ESI);
872 v2 = glue(ldu, SUFFIX)((void *)EDI);
873 ESI += (DF << SHIFT);
874 EDI += (DF << SHIFT);
879 void OPPROTO glue(op_repz_cmps, SUFFIX)(void)
885 v1 = glue(ldu, SUFFIX)((void *)ESI);
886 v2 = glue(ldu, SUFFIX)((void *)EDI);
895 CC_OP = CC_OP_SUBB + SHIFT;
899 void OPPROTO glue(op_repnz_cmps, SUFFIX)(void)
905 v1 = glue(ldu, SUFFIX)((void *)ESI);
906 v2 = glue(ldu, SUFFIX)((void *)EDI);
915 CC_OP = CC_OP_SUBB + SHIFT;
921 void OPPROTO glue(op_outs, SUFFIX)(void)
925 v = glue(ldu, SUFFIX)((void *)ESI);
926 glue(cpu_x86_out, SUFFIX)(dx, v);
927 ESI += (DF << SHIFT);
930 void OPPROTO glue(op_rep_outs, SUFFIX)(void)
936 v = glue(ldu, SUFFIX)((void *)ESI);
937 glue(cpu_x86_out, SUFFIX)(dx, v);
943 void OPPROTO glue(op_ins, SUFFIX)(void)
947 v = glue(cpu_x86_in, SUFFIX)(dx);
948 glue(st, SUFFIX)((void *)EDI, v);
949 EDI += (DF << SHIFT);
952 void OPPROTO glue(op_rep_ins, SUFFIX)(void)
958 v = glue(cpu_x86_in, SUFFIX)(dx);
959 glue(st, SUFFIX)((void *)EDI, v);
960 EDI += (DF << SHIFT);
965 void OPPROTO glue(glue(op_out, SUFFIX), _T0_T1)(void)
967 glue(cpu_x86_out, SUFFIX)(T0 & 0xffff, T1 & DATA_MASK);
970 void OPPROTO glue(glue(op_in, SUFFIX), _T0_T1)(void)
972 T1 = glue(cpu_x86_in, SUFFIX)(T0 & 0xffff);