Changeset 94170 in vbox for trunk/src

Timestamp:

Mar 11, 2022 1:43:59 PM (3 years ago)

Author:

vboxsync

svn:sync-xref-src-repo-rev:

150426

Message:

VMM/IEM: AMD eflags for mul,imul,div and idiv. bugref:9898

Location:

trunk/src/VBox/VMM

Files:

: 2 edited

VMMAll/IEMAllAImplC.cpp (modified) (12 diffs)
testcase/tstIEMAImpl.cpp (modified) (3 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp

-              r94169
+              r94170
  * MUL
  */
 # define EMIT_MUL(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul) \
 IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u ## a_cBitsWidth, a_Args) \
+# define EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, a_Suffix, a_fIntelFlags) \
+IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_mul_u,a_cBitsWidth,a_Suffix), a_Args) \
 { \
     RTUINT ## a_cBitsWidth2x ## U Result; \
 …
     a_fnStore(Result); \
+    \
+    /* MUL EFLAGS according to Skylake (similar to IMUL). */ \
+    uint32_t fEfl = *pfEFlags & ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF); \
+    if (Result.s.Lo & RT_BIT_64(a_cBitsWidth - 1)) \
+        fEfl |= X86_EFL_SF; \
+    fEfl |= g_afParity[Result.s.Lo & 0xff]; \
+    if (Result.s.Hi != 0) \
+        fEfl |= X86_EFL_CF | X86_EFL_OF; \
+    /* Calc EFLAGS: */ \
+    uint32_t fEfl = *pfEFlags; \
+    if (a_fIntelFlags) \
+    { /* Intel: 6700K and 10980XE behavior */ \
+        fEfl &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF); \
+        if (Result.s.Lo & RT_BIT_64(a_cBitsWidth - 1)) \
+            fEfl |= X86_EFL_SF; \
+        fEfl |= g_afParity[Result.s.Lo & 0xff]; \
+        if (Result.s.Hi != 0) \
+            fEfl |= X86_EFL_CF | X86_EFL_OF; \
+    } \
+    else \
+    { /* AMD: 3990X */ \
+        if (Result.s.Hi != 0) \
+            fEfl |= X86_EFL_CF | X86_EFL_OF; \
+        else \
+            fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
+    } \
     *pfEFlags = fEfl; \
     return 0; \
 } \
+IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u ## a_cBitsWidth ## _intel, a_Args) \
+{ \
+    return iemAImpl_mul_u ## a_cBitsWidth a_CallArgs; \
+} \
+IEM_DECL_IMPL_DEF(int, iemAImpl_mul_u ## a_cBitsWidth ## _amd, a_Args) \
+{ \
+    RTUINT ## a_cBitsWidth2x ## U Result; \
+    a_fnMul(Result, a_fnLoadF1(), uFactor, a_cBitsWidth2x); \
+    a_fnStore(Result); \
+    \
+    /* MUL EFLAGS according to Skylake (similar to IMUL). */ \
+    uint32_t fEfl = *pfEFlags & ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF); \
+    if (Result.s.Lo & RT_BIT_64(a_cBitsWidth - 1)) \
+        fEfl |= X86_EFL_SF; \
+    fEfl |= g_afParity[Result.s.Lo & 0xff]; /* (Skylake behaviour) */ \
+    if (Result.s.Hi != 0) \
+        fEfl |= X86_EFL_CF | X86_EFL_OF; \
+    *pfEFlags = fEfl; \
+    return 0; \
+}
+# define EMIT_MUL(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul) \
+    EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, RT_NOTHING, 1) \
+    EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, _intel,     1) \
+    EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, _amd,       0) \
 EMIT_MUL(64, 128, (uint64_t *puA, uint64_t *puD, uint64_t uFactor, uint32_t *pfEFlags), (puA, puD, uFactor, pfEFlags),
 …
  * as per the lower half of the result.
  */
+# define EMIT_IMUL(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul) \
+IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u ## a_cBitsWidth,a_Args) \
+# define EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, \
+                         a_Suffix, a_fIntelFlags) \
+IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_imul_u,a_cBitsWidth,a_Suffix),a_Args) \
 { \
     RTUINT ## a_cBitsWidth2x ## U Result; \
 …
     a_fnStore(Result); \
+    \
+    fEfl &= ~(X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF | X86_EFL_PF); \
+    if (Result.s.Lo & RT_BIT_64(a_cBitsWidth - 1)) \
+        fEfl |= X86_EFL_SF;  \
+    fEfl |= g_afParity[Result.s.Lo & 0xff]; \
+    if (a_fIntelFlags) \
+    { \
+        fEfl &= ~(X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF | X86_EFL_PF); \
+        if (Result.s.Lo & RT_BIT_64(a_cBitsWidth - 1)) \
+            fEfl |= X86_EFL_SF;  \
+        fEfl |= g_afParity[Result.s.Lo & 0xff]; \
+    } \
     *pfEFlags = fEfl; \
     return 0; \
+} \
+\
+IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u ## a_cBitsWidth ## _intel,a_Args) \
+{ \
+    return iemAImpl_imul_u ## a_cBitsWidth a_CallArgs; \
+} \
+\
+IEM_DECL_IMPL_DEF(int, iemAImpl_imul_u ## a_cBitsWidth ## _amd,a_Args) \
+{ \
+    RTUINT ## a_cBitsWidth2x ## U Result; \
+    /* The SF, ZF, AF and PF flags are "undefined". AMD (3990x) leaves these \
+       flags as is - at least for the two op version. Whereas Intel skylake \
+       always clear AF and ZF and calculates SF and PF as per the lower half \
+       of the result. */ \
+    uint32_t fEfl = *pfEFlags & ~(X86_EFL_CF | X86_EFL_OF); \
+    \
+    uint ## a_cBitsWidth ## _t const uFactor1 = a_fnLoadF1(); \
+    if (!(uFactor1 & RT_BIT_64(a_cBitsWidth - 1))) \
+    { \
+        if (!(uFactor2 & RT_BIT_64(a_cBitsWidth - 1))) \
+        { \
+            a_fnMul(Result, uFactor1, uFactor2, a_cBitsWidth2x); \
+            if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_64(a_cBitsWidth - 1)) \
+                fEfl |= X86_EFL_CF | X86_EFL_OF; \
+        } \
+        else \
+        { \
+            uint ## a_cBitsWidth ## _t const uPositiveFactor2 = UINT ## a_cBitsWidth ## _C(0) - uFactor2; \
+            a_fnMul(Result, uFactor1, uPositiveFactor2, a_cBitsWidth2x); \
+            if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_64(a_cBitsWidth - 1)) \
+                fEfl |= X86_EFL_CF | X86_EFL_OF; \
+            a_fnNeg(Result, a_cBitsWidth2x); \
+        } \
+    } \
+    else \
+    { \
+        if (!(uFactor2 & RT_BIT_64(a_cBitsWidth - 1))) \
+        { \
+            uint ## a_cBitsWidth ## _t const uPositiveFactor1 = UINT ## a_cBitsWidth ## _C(0) - uFactor1; \
+            a_fnMul(Result, uPositiveFactor1, uFactor2, a_cBitsWidth2x); \
+            if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_64(a_cBitsWidth - 1)) \
+                fEfl |= X86_EFL_CF | X86_EFL_OF; \
+            a_fnNeg(Result, a_cBitsWidth2x); \
+        } \
+        else \
+        { \
+            uint ## a_cBitsWidth ## _t const uPositiveFactor1 = UINT ## a_cBitsWidth ## _C(0) - uFactor1; \
+            uint ## a_cBitsWidth ## _t const uPositiveFactor2 = UINT ## a_cBitsWidth ## _C(0) - uFactor2; \
+            a_fnMul(Result, uPositiveFactor1, uPositiveFactor2, a_cBitsWidth2x); \
+            if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_64(a_cBitsWidth - 1)) \
+                fEfl |= X86_EFL_CF | X86_EFL_OF; \
+        } \
+    } \
+    a_fnStore(Result); \
+    *pfEFlags = fEfl; \
+    return 0; \
+}
+}
+# define EMIT_IMUL(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul) \
+    EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, RT_NOTHING, 1) \
+    EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, _intel,     1) \
+    EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, _amd,       0)
 EMIT_IMUL(64, 128, (uint64_t *puA, uint64_t *puD, uint64_t uFactor2, uint32_t *pfEFlags), (puA, puD, uFactor2, pfEFlags),
           MUL_LOAD_F1, MUL_STORE, MULDIV_NEG_U128, MULDIV_MUL_U128)
 …
  * DIV
  */
+# define EMIT_DIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem) \
+IEM_DECL_IMPL_DEF(int, iemAImpl_div_u ## a_cBitsWidth,a_Args) \
+# define EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, \
+                        a_Suffix, a_fIntelFlags) \
+IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_div_u,a_cBitsWidth,a_Suffix),a_Args) \
 { \
-    /* Note! Skylake leaves all flags alone. */ \
-    RT_NOREF_PV(pfEFlags); \
+    \
     RTUINT ## a_cBitsWidth2x ## U Dividend; \
     a_fnLoad(Dividend); \
 …
         a_fnDivRem(Quotient, Remainder, Dividend, uDivisor); \
         a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
+        /** @todo research the undefined DIV flags. */ \
+        \
+        /* Calc EFLAGS: Intel 6700K and 10980XE leaves them alone.  AMD 3990X sets AF and clears PF, ZF and SF. */ \
+        if (!a_fIntelFlags) \
+            *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
         return 0; \
     } \
     /* #DE */ \
     return -1; \
+} \
+\
+IEM_DECL_IMPL_DEF(int, iemAImpl_div_u ## a_cBitsWidth ## _intel,a_Args) \
+{ \
+    return iemAImpl_div_u ## a_cBitsWidth a_CallArgs; \
+} \
+\
+IEM_DECL_IMPL_DEF(int, iemAImpl_div_u ## a_cBitsWidth ## _amd,a_Args) \
+{ \
+    /* Note! Skylake leaves all flags alone. */ \
+    RT_NOREF_PV(pfEFlags); \
+    \
+    RTUINT ## a_cBitsWidth2x ## U Dividend; \
+    a_fnLoad(Dividend); \
+    if (   uDivisor != 0 \
+        && Dividend.s.Hi < uDivisor) \
+    { \
+        RTUINT ## a_cBitsWidth2x ## U Remainder, Quotient; \
+        a_fnDivRem(Quotient, Remainder, Dividend, uDivisor); \
+        a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
+        /** @todo research the undefined DIV flags. */ \
+        return 0; \
+    } \
+    /* #DE */ \
+    return -1; \
+}
+}
+# define EMIT_DIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem) \
+    EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, RT_NOTHING, 1) \
+    EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, _intel,     1) \
+    EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, _amd,       0)
 EMIT_DIV(64,128,(uint64_t *puA, uint64_t *puD, uint64_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
          DIV_LOAD, DIV_STORE, MULDIV_MODDIV_U128)
 …
 /*
  * IDIV
+ */
+# define EMIT_IDIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem) \
+IEM_DECL_IMPL_DEF(int, iemAImpl_idiv_u ## a_cBitsWidth,a_Args) \
+ *
+ * EFLAGS are ignored and left as-is by Intel 6700K and 10980XE.  AMD 3990X will
+ * set AF and clear PF, ZF and SF just like it does for DIV.
+ *
+ */
+# define EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, \
+                         a_Suffix, a_fIntelFlags) \
+IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_idiv_u,a_cBitsWidth,a_Suffix),a_Args) \
 { \
     /* Note! Skylake leaves all flags alone. */ \
-    RT_NOREF_PV(pfEFlags); \
+    \
     /** @todo overflow checks */ \
 …
                 { \
                     a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
+                    if (!a_fIntelFlags) \
+                        *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
                     return 0; \
                 } \
 …
                 { \
                     a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
+                    if (!a_fIntelFlags) \
+                        *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
                     return 0; \
                 } \
 …
                 { \
                     a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, Remainder.s.Lo); \
+                    if (!a_fIntelFlags) \
+                        *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
                     return 0; \
                 } \
 …
                 { \
                     a_fnStore(Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
+                    if (!a_fIntelFlags) \
+                        *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
                     return 0; \
                 } \
 …
     /* #DE */ \
     return -1; \
+} \
+\
+IEM_DECL_IMPL_DEF(int, iemAImpl_idiv_u ## a_cBitsWidth ## _intel,a_Args) \
+{ \
+    return iemAImpl_idiv_u ## a_cBitsWidth a_CallArgs; \
+} \
+\
+IEM_DECL_IMPL_DEF(int, iemAImpl_idiv_u ## a_cBitsWidth ## _amd,a_Args) \
+{ \
+    /* Note! Skylake leaves all flags alone. */ \
+    RT_NOREF_PV(pfEFlags); \
+    \
+    /** @todo overflow checks */ \
+    if (uDivisor != 0) \
+    { \
+        /* \
+         * Convert to unsigned division. \
+         */ \
+        RTUINT ## a_cBitsWidth2x ## U Dividend; \
+        a_fnLoad(Dividend); \
+        bool const fSignedDividend = RT_BOOL(Dividend.s.Hi & RT_BIT_64(a_cBitsWidth - 1)); \
+        if (fSignedDividend) \
+            a_fnNeg(Dividend, a_cBitsWidth2x); \
+        \
+        uint ## a_cBitsWidth ## _t uDivisorPositive; \
+        if (!(uDivisor & RT_BIT_64(a_cBitsWidth - 1))) \
+            uDivisorPositive = uDivisor; \
+        else \
+            uDivisorPositive = UINT ## a_cBitsWidth ## _C(0) - uDivisor; \
+        \
+        RTUINT ## a_cBitsWidth2x ## U Remainder, Quotient; \
+        a_fnDivRem(Quotient, Remainder, Dividend, uDivisorPositive); \
+        \
+        /* \
+         * Setup the result, checking for overflows. \
+         */ \
+        if (!(uDivisor & RT_BIT_64(a_cBitsWidth - 1))) \
+        { \
+            if (!fSignedDividend) \
+            { \
+                /* Positive divisor, positive dividend => result positive. */ \
+                if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint ## a_cBitsWidth ## _t)INT ## a_cBitsWidth ## _MAX) \
+                { \
+                    a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
+                    return 0; \
+                } \
+            } \
+            else \
+            { \
+                /* Positive divisor, negative dividend => result negative. */ \
+                if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_64(a_cBitsWidth - 1)) \
+                { \
+                    a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
+                    return 0; \
+                } \
+            } \
+        } \
+        else \
+        { \
+            if (!fSignedDividend) \
+            { \
+                /* Negative divisor, positive dividend => negative quotient, positive remainder. */ \
+                if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_64(a_cBitsWidth - 1)) \
+                { \
+                    a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, Remainder.s.Lo); \
+                    return 0; \
+                } \
+            } \
+            else \
+            { \
+                /* Negative divisor, negative dividend => positive quotient, negative remainder. */ \
+                if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint ## a_cBitsWidth ## _t)INT ## a_cBitsWidth ## _MAX) \
+                { \
+                    a_fnStore(Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
+                    return 0; \
+                } \
+            } \
+        } \
+    } \
+    /* #DE */ \
+    return -1; \
+}
+}
+# define EMIT_IDIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem) \
+    EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, RT_NOTHING, 1) \
+    EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, _intel,     1) \
+    EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, _amd,       0)
 EMIT_IDIV(64,128,(uint64_t *puA, uint64_t *puD, uint64_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),

trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp

-              r94169
+              r94170
                             X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
     ENTRY_INTEL_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
+    ENTRY_AMD_EX(div_u8,    X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF,
+                            X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF),
+    ENTRY_AMD_EX(div_u8,    X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
     ENTRY_INTEL_EX(div_u8,  X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
+    ENTRY_AMD_EX(idiv_u8,   X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF,
+                            X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF),
+    ENTRY_AMD_EX(idiv_u8,   X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
     ENTRY_INTEL_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
 };
 …
     for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
+    {
-        if (   g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
-            && g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
-            continue;
         RTTestSub(g_hTest, g_aMulDivU8[iFn].pszName);
         MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
         uint32_t const                cTests  = g_aMulDivU8[iFn].cTests;
         uint32_t const                fEflIgn = g_aMulDivU8[iFn].uExtra;
+        for (uint32_t iTest = 0; iTest < cTests; iTest++ )
+        PFNIEMAIMPLMULDIVU8           pfn     = g_aMulDivU8[iFn].pfn;
+        uint32_t const cVars = 1 + (g_aMulDivU8[iFn].idxCpuEflFlavour == g_idxCpuEflFlavour && g_aMulDivU8[iFn].pfnNative);
+        for (uint32_t iVar = 0; iVar < cVars; iVar++)
+        {
+            uint32_t fEfl  = paTests[iTest].fEflIn;
+            uint16_t uDst  = paTests[iTest].uDstIn;
+            int      rc    = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
+            if (   uDst != paTests[iTest].uDstOut
+                || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)
+                || rc != paTests[iTest].rc)
+                RTTestFailed(g_hTest, "#%02u: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
+                                      "  -> efl=%#08x dst=%#06RX16 rc=%d\n"
+                                      "expected %#08x     %#06RX16    %d%s\n",
+                             iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
+                             fEfl, uDst, rc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
+                             EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn));
+            else
+            for (uint32_t iTest = 0; iTest < cTests; iTest++ )
+            {
+                 *g_pu16  = paTests[iTest].uDstIn;
+                 *g_pfEfl = paTests[iTest].fEflIn;
+                 rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
+                 RTTEST_CHECK(g_hTest, *g_pu16  == paTests[iTest].uDstOut);
+                 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn));
+                 RTTEST_CHECK(g_hTest, rc  == paTests[iTest].rc);
+                uint32_t fEfl  = paTests[iTest].fEflIn;
+                uint16_t uDst  = paTests[iTest].uDstIn;
+                int      rc    = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
+                if (   uDst != paTests[iTest].uDstOut
+                    || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)
+                    || rc != paTests[iTest].rc)
+                    RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
+                                          "  %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
+                                          "%sexpected %#08x     %#06RX16    %d%s\n",
+                                 iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
+                                 iVar ? "  " : "", fEfl, uDst, rc,
+                                 iVar ? "  " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
+                                 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn));
+                else
+                {
+                     *g_pu16  = paTests[iTest].uDstIn;
+                     *g_pfEfl = paTests[iTest].fEflIn;
+                     rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
+                     RTTEST_CHECK(g_hTest, *g_pu16  == paTests[iTest].uDstOut);
+                     RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn));
+                     RTTEST_CHECK(g_hTest, rc  == paTests[iTest].rc);
+                }
+            }
+            pfn = g_aMulDivU8[iFn].pfnNative;
+        }
+    }
 …
 } a_aSubTests [] = \
 { \
+    ENTRY_AMD_EX(mul_u ## a_cBits,    X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, \
+                                      X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF /** @todo check out AMD flags */ ), \
+    ENTRY_AMD_EX(mul_u ## a_cBits,    X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
     ENTRY_INTEL_EX(mul_u ## a_cBits,  X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
+    ENTRY_AMD_EX(imul_u ## a_cBits,   X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, \
+                                      X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), \
+    ENTRY_AMD_EX(imul_u ## a_cBits,   X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
     ENTRY_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
+    ENTRY_AMD_EX(div_u ## a_cBits,    X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF,  \
+                                      X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF), \
+    ENTRY_AMD_EX(div_u ## a_cBits,    X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
     ENTRY_INTEL_EX(div_u ## a_cBits,  X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
+    ENTRY_AMD_EX(idiv_u ## a_cBits,   X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, \
+                                      X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF), \
+    ENTRY_AMD_EX(idiv_u ## a_cBits,   X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
     ENTRY_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
 }; \

Note: See TracChangeset for help on using the changeset viewer.

Changeset 94170 in vbox for trunk/src

Legend:

trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp

trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp

Download in other formats: