VirtualBox

Browse Source

Changeset 94162 in vbox for trunk/src/VBox/VMM/VMMAll

Timestamp:

Mar 10, 2022 10:29:05 PM (3 years ago)

Author:

vboxsync

Message:

VMM/IEM: Try deal with basic Intel/AMD EFLAGS difference for double shifts (intel side tests). bugref:9898

Location:

trunk/src/VBox/VMM/VMMAll

Files:

: 4 edited

IEMAll.cpp (modified) (2 diffs)
IEMAllAImpl.asm (modified) (3 diffs)
IEMAllAImplC.cpp (modified) (5 diffs)
IEMAllInstructionsTwoByte0f.cpp.h (modified) (4 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/src/VBox/VMM/VMMAll/IEMAll.cpp

-              r94156
+              r94162
 /** Function table for the SHLD instruction */
+/** Function table for the SHLD instruction. */
 IEM_STATIC const IEMOPSHIFTDBLSIZES g_iemAImpl_shld =
+{
 …
 };
+/** Function table for the SHRD instruction */
+/** Function table for the SHLD instruction, AMD EFLAGS variation. */
+IEM_STATIC const IEMOPSHIFTDBLSIZES g_iemAImpl_shld_amd =
+{
+    iemAImpl_shld_u16_amd,
+    iemAImpl_shld_u32_amd,
+    iemAImpl_shld_u64_amd
+};
+/** Function table for the SHLD instruction, Intel EFLAGS variation. */
+IEM_STATIC const IEMOPSHIFTDBLSIZES g_iemAImpl_shld_intel =
+{
+    iemAImpl_shld_u16_intel,
+    iemAImpl_shld_u32_intel,
+    iemAImpl_shld_u64_intel
+};
+/** EFLAGS variation selection table for the SHLD instruction. */
+IEM_STATIC const IEMOPSHIFTDBLSIZES * const g_iemAImpl_shld_eflags[] =
+{
+    &g_iemAImpl_shld,
+    &g_iemAImpl_shld_intel,
+    &g_iemAImpl_shld_amd,
+    &g_iemAImpl_shld
+};
+/** Function table for the SHRD instruction. */
 IEM_STATIC const IEMOPSHIFTDBLSIZES g_iemAImpl_shrd =
+{
     iemAImpl_shrd_u16,
     iemAImpl_shrd_u32,
+    iemAImpl_shrd_u64,
+    iemAImpl_shrd_u64
+};
+/** Function table for the SHRD instruction, AMD EFLAGS variation. */
+IEM_STATIC const IEMOPSHIFTDBLSIZES g_iemAImpl_shrd_amd =
+{
+    iemAImpl_shrd_u16_amd,
+    iemAImpl_shrd_u32_amd,
+    iemAImpl_shrd_u64_amd
+};
+/** Function table for the SHRD instruction, Intel EFLAGS variation. */
+IEM_STATIC const IEMOPSHIFTDBLSIZES g_iemAImpl_shrd_intel =
+{
+    iemAImpl_shrd_u16_intel,
+    iemAImpl_shrd_u32_intel,
+    iemAImpl_shrd_u64_intel
+};
+/** EFLAGS variation selection table for the SHRD instruction. */
+IEM_STATIC const IEMOPSHIFTDBLSIZES * const g_iemAImpl_shrd_eflags[] =
+{
+    &g_iemAImpl_shrd,
+    &g_iemAImpl_shrd_intel,
+    &g_iemAImpl_shrd_amd,
+    &g_iemAImpl_shrd
 };

trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm

-              r94156
+              r94162
 %macro IEMIMPL_SHIFT_DBL_OP 3
 BEGINCODE
+BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_intel, 16
+BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_amd, 16
 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
         PROLOGUE_4_ARGS
 …
 ENDPROC iemAImpl_ %+ %1 %+ _u16
+BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_intel, 16
+BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_amd, 16
 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
         PROLOGUE_4_ARGS
 …
  %ifdef RT_ARCH_AMD64
+BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_intel, 20
+BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_amd, 20
 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
         PROLOGUE_4_ARGS

trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp

-              r94156
+              r94162
 /*
  * SHLD
+ *
+ *  - CF is the last bit shifted out of puDst.
+ *  - AF is always cleared by Intel 10980XE.
+ *  - AF is always set by AMD 3990X.
+ *  - OF is set according to the first shift on Intel 10980XE, it seems.
+ *  - OF is set according to the last sub-shift on AMD 3990X.
+ *  - ZF, SF and PF are calculated according to the result by both vendors.
  */
 #define EMIT_SHLD(a_cBitsWidth) \
 …
                                                          uint ## a_cBitsWidth ## _t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
 { \
+    /** @todo this ain't right for 16-bit. Apparently it should use 0x1f instead \
+     *        of 0xf for masking and use uSrc in repetitive fashion...  */ \
     cShift &= a_cBitsWidth - 1; \
     if (cShift) \
     { \
         uint ## a_cBitsWidth ## _t const uDst = *puDst; \
+        uint ## a_cBitsWidth ## _t const uDst    = *puDst; \
         uint ## a_cBitsWidth ## _t       uResult = uDst << cShift; \
         uResult |= uSrc >> (a_cBitsWidth - cShift); \
         *puDst = uResult; \
+        \
-        /* Calc EFLAGS.  CF is the last bit shifted out of puDst. The OF flag \
-           indicates a sign change for a single shift, whereas intel documents \
-           setting it to zero for higher shift counts and AMD just says it's \
-           undefined, however AMD x3990 sets it according to the last sub-shift. \
-           On AMD x3990 the AF flag is always set. */ \
         uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
+        if (true /*AMD*/) \
+        { \
+            fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth((uDst << (cShift - 1)) ^ uResult); /* Set according to last shift. */ \
+            fEfl |= X86_EFL_AF; \
+        } \
+        else \
+        { \
+            if (cShift == 1) \
+                fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth((uDst ^ uResult)); \
+            fEfl |= X86_EFL_AF; /* ? */ \
+        } \
+        AssertCompile(X86_EFL_CF_BIT == 0); \
+        fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); /* Set according to the first shift. */ \
+        fEfl |= (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; /* CF = last bit shifted out */ \
+        fEfl |= g_afParity[uResult & 0xff]; \
+        fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
+        fEfl |= X86_EFL_CALC_ZF(uResult); \
+        *pfEFlags = fEfl; \
+    } \
+}\
+\
+IEM_DECL_IMPL_DEF(void, iemAImpl_shld_u ## a_cBitsWidth ## _intel,(uint ## a_cBitsWidth ## _t *puDst, \
+                                                                   uint ## a_cBitsWidth ## _t uSrc, uint8_t cShift, \
+                                                                   uint32_t *pfEFlags)) \
+{ \
+    iemAImpl_shld_u ## a_cBitsWidth(puDst, uSrc, cShift, pfEFlags); \
+} \
+\
+IEM_DECL_IMPL_DEF(void, iemAImpl_shld_u ## a_cBitsWidth ## _amd,(uint ## a_cBitsWidth ## _t *puDst, \
+                                                                 uint ## a_cBitsWidth ## _t uSrc, uint8_t cShift, \
+                                                                 uint32_t *pfEFlags)) \
+{ \
+    cShift &= a_cBitsWidth - 1; \
+    if (cShift) \
+    { \
+        uint ## a_cBitsWidth ## _t const uDst    = *puDst; \
+        uint ## a_cBitsWidth ## _t       uResult = uDst << cShift; \
+        uResult |= uSrc >> (a_cBitsWidth - cShift); \
+        *puDst = uResult; \
+        \
+        uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
+        fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth((uDst << (cShift - 1)) ^ uResult); /* Set according to last shift. */ \
+        fEfl |= X86_EFL_AF; \
         AssertCompile(X86_EFL_CF_BIT == 0); \
         fEfl |= (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; /* CF = last bit shifted out */ \
 …
     } \
+}
 EMIT_SHLD(64)
 # if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
 …
 /*
  * SHRD
+ *
+ * EFLAGS behaviour seems to be the same as with SHLD:
+ *  - CF is the last bit shifted out of puDst.
+ *  - AF is always cleared by Intel 10980XE.
+ *  - AF is always set by AMD 3990X.
+ *  - OF is set according to the first shift on Intel 10980XE, it seems.
+ *  - OF is set according to the last sub-shift on AMD 3990X.
+ *  - ZF, SF and PF are calculated according to the result by both vendors.
  */
 #define EMIT_SHRD(a_cBitsWidth) \
 IEM_DECL_IMPL_DEF(void, iemAImpl_shrd_u ## a_cBitsWidth,(uint ## a_cBitsWidth ## _t *puDst, \
                                                          uint ## a_cBitsWidth ## _t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
+{ \
+    /** @todo this is wrong for 16-bit, where it should be 0x1f not 0xf and \
+     *        source used twice or something like that. */ \
+    cShift &= a_cBitsWidth - 1; \
+    if (cShift) \
+    { \
+        uint ## a_cBitsWidth ## _t const uDst    = *puDst; \
+        uint ## a_cBitsWidth ## _t       uResult = uDst >> cShift; \
+        uResult |= uSrc << (a_cBitsWidth - cShift); \
+        *puDst = uResult; \
+        \
+        uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
+        fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uSrc << (a_cBitsWidth - 1))); \
+        AssertCompile(X86_EFL_CF_BIT == 0); \
+        fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
+        fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
+        fEfl |= X86_EFL_CALC_ZF(uResult); \
+        fEfl |= g_afParity[uResult & 0xff]; \
+        *pfEFlags = fEfl; \
+    } \
+} \
+\
+IEM_DECL_IMPL_DEF(void, iemAImpl_shrd_u ## a_cBitsWidth ## _intel,(uint ## a_cBitsWidth ## _t *puDst, \
+                                                                   uint ## a_cBitsWidth ## _t uSrc, uint8_t cShift, \
+                                                                   uint32_t *pfEFlags)) \
+{ \
+    iemAImpl_shrd_u ## a_cBitsWidth(puDst, uSrc, cShift, pfEFlags); \
+} \
+\
+IEM_DECL_IMPL_DEF(void, iemAImpl_shrd_u ## a_cBitsWidth ## _amd,(uint ## a_cBitsWidth ## _t *puDst, \
+                                                                 uint ## a_cBitsWidth ## _t uSrc, uint8_t cShift, \
+                                                                 uint32_t *pfEFlags)) \
 { \
     cShift &= a_cBitsWidth - 1; \
 …
         *puDst = uResult; \
+        \
-        /* Calc EFLAGS.  CF is the last bit shifted out of puDst. The OF flag \
-           indicates a sign change for a single shift, whereas intel documents \
-           setting it to zero for higher shift counts and AMD just says it's \
-           undefined, however AMD x3990 sets it according to the last sub-shift. \
-           On AMD x3990 the AF flag is always set. */ \
         uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
+        if (true /*AMD*/) \
+        { \
+            if (cShift > 1) /* Set according to last shift. */ \
+                fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth((uSrc << (a_cBitsWidth - cShift + 1)) ^ uResult); \
+            else \
+                fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ uResult); \
+            fEfl |= X86_EFL_AF; \
+        } \
+        if (cShift > 1) /* Set according to last shift. */ \
+            fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth((uSrc << (a_cBitsWidth - cShift + 1)) ^ uResult); \
         else \
+        { \
+            if (cShift == 1) \
+                fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth((uDst >> (a_cBitsWidth - 1)) ^ (uint32_t)(uResult >> (a_cBitsWidth - 1))); \
+            fEfl |= X86_EFL_AF; /* ? */ \
+        } \
+            fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ uResult); \
+        fEfl |= X86_EFL_AF; \
         AssertCompile(X86_EFL_CF_BIT == 0); \
         fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \

trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsTwoByte0f.cpp.h

-              r94156
+              r94162
     IEMOP_MNEMONIC(shld_Ev_Gv_Ib, "shld Ev,Gv,Ib");
     IEMOP_HLP_MIN_386();
     return FNIEMOP_CALL_1(iemOpCommonShldShrd_Ib, &g_iemAImpl_shld);
+    return FNIEMOP_CALL_1(iemOpCommonShldShrd_Ib, IEMTARGETCPU_EFL_BEHAVIOR_SELECT(g_iemAImpl_shld_eflags));
+}
 …
     IEMOP_MNEMONIC(shld_Ev_Gv_CL, "shld Ev,Gv,CL");
     IEMOP_HLP_MIN_386();
     return FNIEMOP_CALL_1(iemOpCommonShldShrd_CL, &g_iemAImpl_shld);
+    return FNIEMOP_CALL_1(iemOpCommonShldShrd_CL, IEMTARGETCPU_EFL_BEHAVIOR_SELECT(g_iemAImpl_shld_eflags));
+}
 …
     IEMOP_MNEMONIC(shrd_Ev_Gv_Ib, "shrd Ev,Gv,Ib");
     IEMOP_HLP_MIN_386();
     return FNIEMOP_CALL_1(iemOpCommonShldShrd_Ib, &g_iemAImpl_shrd);
+    return FNIEMOP_CALL_1(iemOpCommonShldShrd_Ib, IEMTARGETCPU_EFL_BEHAVIOR_SELECT(g_iemAImpl_shrd_eflags));
+}
 …
     IEMOP_MNEMONIC(shrd_Ev_Gv_CL, "shrd Ev,Gv,CL");
     IEMOP_HLP_MIN_386();
     return FNIEMOP_CALL_1(iemOpCommonShldShrd_CL, &g_iemAImpl_shrd);
+    return FNIEMOP_CALL_1(iemOpCommonShldShrd_CL, IEMTARGETCPU_EFL_BEHAVIOR_SELECT(g_iemAImpl_shrd_eflags));
+}

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats:

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette