VirtualBox

Changeset 104420 in vbox


Ignore:
Timestamp:
Apr 24, 2024 2:34:11 PM (11 months ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
162919
Message:

VMM/IEM: Add native emitters for the IEM_MC_REL_CALL_S16_AND_FINISH/IEM_MC_REL_CALL_S32_AND_FINISH/IEM_MC_REL_CALL_S64_AND_FINISH, IEM_MC_IND_CALL_U16_AND_FINISH/IEM_MC_IND_CALL_U32_AND_FINISH/IEM_MC_IND_CALL_U64_AND_FINISH and IEM_MC_RETN_AND_FINISH IEM MC statements, bugref:10376

Location:
trunk/src/VBox/VMM/VMMAll
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veLiveness.cpp

    r104270 r104420  
    338338#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u32NewEIP)                                            IEM_LIVENESS_PC64_JMP_NO_FLAGS()
    339339#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP)                                 IEM_LIVENESS_PC64_JMP_WITH_FLAGS()
     340
     341#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr)                      do { IEM_LIVENESS_PC16_JMP_NO_FLAGS();   IEM_LIVENESS_STACK(); } while (0)
     342#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr)                      do { IEM_LIVENESS_PC32_JMP_NO_FLAGS();   IEM_LIVENESS_STACK(); } while (0)
     343#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr)                      do { IEM_LIVENESS_PC64_JMP_NO_FLAGS();   IEM_LIVENESS_STACK(); } while (0)
     344#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr)           do { IEM_LIVENESS_PC16_JMP_WITH_FLAGS(); IEM_LIVENESS_STACK(); } while (0)
     345#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr)           do { IEM_LIVENESS_PC32_JMP_WITH_FLAGS(); IEM_LIVENESS_STACK(); } while (0)
     346#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr)           do { IEM_LIVENESS_PC64_JMP_WITH_FLAGS(); IEM_LIVENESS_STACK(); } while (0)
     347#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr)                      do { IEM_LIVENESS_PC16_JMP_NO_FLAGS();   IEM_LIVENESS_STACK(); } while (0)
     348#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr)                      do { IEM_LIVENESS_PC32_JMP_NO_FLAGS();   IEM_LIVENESS_STACK(); } while (0)
     349#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr)                      do { IEM_LIVENESS_PC64_JMP_NO_FLAGS();   IEM_LIVENESS_STACK(); } while (0)
     350#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr)           do { IEM_LIVENESS_PC16_JMP_WITH_FLAGS(); IEM_LIVENESS_STACK(); } while (0)
     351#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr)           do { IEM_LIVENESS_PC32_JMP_WITH_FLAGS(); IEM_LIVENESS_STACK(); } while (0)
     352#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr)           do { IEM_LIVENESS_PC64_JMP_WITH_FLAGS(); IEM_LIVENESS_STACK(); } while (0)
     353#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC32(a_i64, a_cbInstr)                      do { IEM_LIVENESS_PC32_JMP_NO_FLAGS();   IEM_LIVENESS_STACK(); } while (0)
     354#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr)                      do { IEM_LIVENESS_PC64_JMP_NO_FLAGS();   IEM_LIVENESS_STACK(); } while (0)
     355#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i64, a_cbInstr)           do { IEM_LIVENESS_PC32_JMP_WITH_FLAGS(); IEM_LIVENESS_STACK(); } while (0)
     356#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr)           do { IEM_LIVENESS_PC64_JMP_WITH_FLAGS(); IEM_LIVENESS_STACK(); } while (0)
     357#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr)                 do { IEM_LIVENESS_PC16_JMP_NO_FLAGS();   IEM_LIVENESS_STACK(); } while (0)
     358#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr)                 do { IEM_LIVENESS_PC32_JMP_NO_FLAGS();   IEM_LIVENESS_STACK(); } while (0)
     359#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC64(a_u16NewIP, a_cbInstr)                 do { IEM_LIVENESS_PC64_JMP_NO_FLAGS();   IEM_LIVENESS_STACK(); } while (0)
     360#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr)      do { IEM_LIVENESS_PC16_JMP_WITH_FLAGS(); IEM_LIVENESS_STACK(); } while (0)
     361#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr)      do { IEM_LIVENESS_PC32_JMP_WITH_FLAGS(); IEM_LIVENESS_STACK(); } while (0)
     362#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP, a_cbInstr)      do { IEM_LIVENESS_PC64_JMP_WITH_FLAGS(); IEM_LIVENESS_STACK(); } while (0)
     363#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr)                do { IEM_LIVENESS_PC32_JMP_NO_FLAGS();   IEM_LIVENESS_STACK(); } while (0)
     364#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP, a_cbInstr)                do { IEM_LIVENESS_PC64_JMP_NO_FLAGS();   IEM_LIVENESS_STACK(); } while (0)
     365#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr)     do { IEM_LIVENESS_PC32_JMP_WITH_FLAGS(); IEM_LIVENESS_STACK(); } while (0)
     366#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP, a_cbInstr)     do { IEM_LIVENESS_PC64_JMP_WITH_FLAGS(); IEM_LIVENESS_STACK(); } while (0)
     367#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u32NewRIP, a_cbInstr)                do { IEM_LIVENESS_PC64_JMP_NO_FLAGS();   IEM_LIVENESS_STACK(); } while (0)
     368#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewRIP, a_cbInstr)     do { IEM_LIVENESS_PC64_JMP_WITH_FLAGS(); IEM_LIVENESS_STACK(); } while (0)
     369
     370#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr)                              do { IEM_LIVENESS_PC16_JMP_NO_FLAGS();   IEM_LIVENESS_STACK(); } while (0)
     371#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_enmEffOpSize)              do { IEM_LIVENESS_PC32_JMP_NO_FLAGS();   IEM_LIVENESS_STACK(); } while (0)
     372#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_enmEffOpSize)              do { IEM_LIVENESS_PC64_JMP_NO_FLAGS();   IEM_LIVENESS_STACK(); } while (0)
     373#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr)                   do { IEM_LIVENESS_PC16_JMP_WITH_FLAGS(); IEM_LIVENESS_STACK(); } while (0)
     374#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_enmEffOpSize)   do { IEM_LIVENESS_PC32_JMP_WITH_FLAGS(); IEM_LIVENESS_STACK(); } while (0)
     375#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_enmEffOpSize)   do { IEM_LIVENESS_PC64_JMP_WITH_FLAGS(); IEM_LIVENESS_STACK(); } while (0)
    340376
    341377/* Effective address stuff is rather complicated... */
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8vePython.py

    r104419 r104420  
    8181    'IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS':            (None, True,  True,  True,  ),
    8282
    83     'IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16':                      (None, True,  True,  False, ),
    84     'IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32':                      (None, True,  True,  False, ),
    85     'IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64':                      (None, True,  True,  False, ),
    86     'IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32':                      (None, True,  True,  False, ),
     83    'IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16':                      (None, True,  True,  True, ),
     84    'IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32':                      (None, True,  True,  True, ),
     85    'IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64':                      (None, True,  True,  True, ),
     86    'IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32':                      (None, True,  True,  True, ),
    8787    'IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC64':                      (None, True,  True,  False, ), # @todo These should never be called - can't encode this
    8888    'IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC32':                      (None, True,  True,  False, ), # @todo These should never be called - can't encode this
    89     'IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64':                      (None, True,  True,  False, ),
    90 
    91     'IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS':           (None, True,  True,  False, ),
    92     'IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS':           (None, True,  True,  False, ),
    93     'IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS':           (None, True,  True,  False, ),
    94     'IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS':           (None, True,  True,  False, ),
     89    'IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64':                      (None, True,  True,  True, ),
     90
     91    'IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS':           (None, True,  True,  True, ),
     92    'IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS':           (None, True,  True,  True, ),
     93    'IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS':           (None, True,  True,  True, ),
     94    'IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS':           (None, True,  True,  True, ),
    9595    'IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS':           (None, True,  True,  False, ), # @todo These should never be called - can't encode this
    9696    'IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC32_WITH_FLAGS':           (None, True,  True,  False, ), # @todo These should never be called - can't encode this
    97     'IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS':           (None, True,  True,  False, ),
     97    'IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS':           (None, True,  True,  True, ),
    9898
    9999    'IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16':                       (None, True,  True,  True,  ),
     
    115115    'IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS':            (None, True,  True,  True,  ),
    116116
    117     'IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16':                      (None, True,  True,  False, ),
    118     'IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32':                      (None, True,  True,  False, ),
     117    'IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16':                      (None, True,  True,  True, ),
     118    'IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32':                      (None, True,  True,  True, ),
    119119    'IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC64':                      (None, True,  True,  False, ), # @todo These should never be called - can be called on AMD but not on Intel, 'call ax' in 64-bit code is valid and should push a 16-bit IP IIRC.
    120     'IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC16':                      (None, True,  True,  False, ),
    121     'IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32':                      (None, True,  True,  False, ),
     120    'IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC16':                      (None, True,  True,  True, ),
     121    'IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32':                      (None, True,  True,  True, ),
    122122    'IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC64':                      (None, True,  True,  False, ), # @todo These should never be called - can't encode this.
    123     'IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64':                      (None, True,  True,  False, ),
    124 
    125     'IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS':           (None, True,  True,  False, ),
    126     'IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS':           (None, True,  True,  False, ),
     123    'IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64':                      (None, True,  True,  True, ),
     124
     125    'IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS':           (None, True,  True,  True, ),
     126    'IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS':           (None, True,  True,  True, ),
    127127    'IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS':           (None, True,  True,  False, ), # @todo These should never be called - this is valid, see above.
    128     'IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC16_WITH_FLAGS':           (None, True,  True,  False, ),
    129     'IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS':           (None, True,  True,  False, ),
     128    'IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC16_WITH_FLAGS':           (None, True,  True,  True, ),
     129    'IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS':           (None, True,  True,  True, ),
    130130    'IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS':           (None, True,  True,  False, ), # @todo These should never be called - can't encode this.
    131     'IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS':           (None, True,  True,  False, ),
    132 
    133     'IEM_MC_RETN_AND_FINISH_THREADED_PC16':                              (None, True,  True,  False, ),
    134     'IEM_MC_RETN_AND_FINISH_THREADED_PC32':                              (None, True,  True,  False, ),
    135     'IEM_MC_RETN_AND_FINISH_THREADED_PC64':                              (None, True,  True,  False, ),
    136 
    137     'IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS':                   (None, True,  True,  False, ),
    138     'IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS':                   (None, True,  True,  False, ),
    139     'IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS':                   (None, True,  True,  False, ),
     131    'IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS':           (None, True,  True,  True, ),
     132
     133    'IEM_MC_RETN_AND_FINISH_THREADED_PC16':                              (None, True,  True,  True, ),
     134    'IEM_MC_RETN_AND_FINISH_THREADED_PC32':                              (None, True,  True,  True, ),
     135    'IEM_MC_RETN_AND_FINISH_THREADED_PC64':                              (None, True,  True,  True, ),
     136
     137    'IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS':                   (None, True,  True,  True, ),
     138    'IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS':                   (None, True,  True,  True, ),
     139    'IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS':                   (None, True,  True,  True, ),
    140140
    141141    'IEM_MC_CALC_RM_EFF_ADDR_THREADED_16':                               (None, False, False, True,  ),
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h

    r104357 r104420  
    844844}
    845845
     846
     847
     848/*********************************************************************************************************************************
     849*   Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters).    *
     850*********************************************************************************************************************************/
     851
     852/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
     853 *        this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
     854DECL_FORCE_INLINE_THROW(uint32_t)
     855iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
     856{
     857    /* Use16BitSp: */
     858#ifdef RT_ARCH_AMD64
     859    off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
     860    off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
     861#else
     862    /* sub regeff, regrsp, #cbMem */
     863    pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
     864    /* and regeff, regeff, #0xffff */
     865    Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
     866    pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0,  false /*f64Bit*/);
     867    /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
     868    pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
     869#endif
     870    return off;
     871}
     872
     873
     874DECL_FORCE_INLINE(uint32_t)
     875iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
     876{
     877    /* Use32BitSp: */
     878    off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
     879    off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
     880    return off;
     881}
     882
     883
     884DECL_INLINE_THROW(uint32_t)
     885iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
     886                          uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
     887{
     888    /*
     889     * Assert sanity.
     890     */
     891#ifdef VBOX_STRICT
     892    if (RT_BYTE2(cBitsVarAndFlat) != 0)
     893    {
     894        Assert(   (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
     895               || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
     896               || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
     897        Assert(   pfnFunction
     898               == (  cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
     899                   : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
     900                   : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
     901                   : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
     902                   : UINT64_C(0xc000b000a0009000) ));
     903    }
     904    else
     905        Assert(   pfnFunction
     906               == (  cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
     907                   : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
     908                   : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
     909                   : UINT64_C(0xc000b000a0009000) ));
     910#endif
     911
     912#ifdef VBOX_STRICT
     913    /*
     914     * Check that the fExec flags we've got make sense.
     915     */
     916    off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
     917#endif
     918
     919    /*
     920     * To keep things simple we have to commit any pending writes first as we
     921     * may end up making calls.
     922     */
     923    /** @todo we could postpone this till we make the call and reload the
     924     * registers after returning from the call. Not sure if that's sensible or
     925     * not, though. */
     926    off = iemNativeRegFlushPendingWrites(pReNative, off);
     927
     928    /*
     929     * First we calculate the new RSP and the effective stack pointer value.
     930     * For 64-bit mode and flat 32-bit these two are the same.
     931     * (Code structure is very similar to that of PUSH)
     932     */
     933    uint8_t const cbMem       = RT_BYTE1(cBitsVarAndFlat) / 8;
     934    bool const    fIsSegReg   = RT_BYTE3(cBitsVarAndFlat) != 0;
     935    bool const    fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
     936    uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
     937                              ? cbMem : sizeof(uint16_t);
     938    uint8_t const cBitsFlat   = RT_BYTE2(cBitsVarAndFlat);      RT_NOREF(cBitsFlat);
     939    uint8_t const idxRegRsp   = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
     940                                                                kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
     941    uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
     942    uint32_t      offFixupJumpToUseOtherBitSp = UINT32_MAX;
     943    if (cBitsFlat != 0)
     944    {
     945        Assert(idxRegEffSp == idxRegRsp);
     946        Assert(cBitsFlat == 32 || cBitsFlat == 64);
     947        Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
     948        if (cBitsFlat == 64)
     949            off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
     950        else
     951            off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
     952    }
     953    else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
     954    {
     955        Assert(idxRegEffSp != idxRegRsp);
     956        uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
     957                                                                     kIemNativeGstRegUse_ReadOnly);
     958#ifdef RT_ARCH_AMD64
     959        PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
     960#else
     961        PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
     962#endif
     963        off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
     964        iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
     965        offFixupJumpToUseOtherBitSp = off;
     966        if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
     967        {
     968            off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
     969            off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
     970        }
     971        else
     972        {
     973            off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
     974            off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
     975        }
     976        IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     977    }
     978    /* SpUpdateEnd: */
     979    uint32_t const offLabelSpUpdateEnd = off;
     980
     981    /*
     982     * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
     983     * we're skipping lookup).
     984     */
     985    uint8_t const  iSegReg           = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
     986    IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
     987    uint16_t const uTlbSeqNo         = pReNative->uTlbSeqNo++;
     988    uint32_t const idxLabelTlbMiss   = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
     989    uint32_t const idxLabelTlbLookup = !TlbState.fSkip
     990                                     ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
     991                                     : UINT32_MAX;
     992    uint8_t const  idxRegMemResult   = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
     993
     994
     995    if (!TlbState.fSkip)
     996        off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
     997    else
     998        off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
     999
     1000    /*
     1001     * Use16BitSp:
     1002     */
     1003    if (cBitsFlat == 0)
     1004    {
     1005#ifdef RT_ARCH_AMD64
     1006        PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
     1007#else
     1008        PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
     1009#endif
     1010        iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
     1011        if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
     1012            off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
     1013        else
     1014            off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
     1015        off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
     1016        IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     1017    }
     1018
     1019    /*
     1020     * TlbMiss:
     1021     *
     1022     * Call helper to do the pushing.
     1023     */
     1024    iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
     1025
     1026#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
     1027    off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
     1028#else
     1029    RT_NOREF(idxInstr);
     1030#endif
     1031
     1032    /* Save variables in volatile registers. */
     1033    uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
     1034                                     | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
     1035                                     | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
     1036                                     | (RT_BIT_32(idxRegPc));
     1037    off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
     1038
     1039    if (   idxRegPc == IEMNATIVE_CALL_ARG1_GREG
     1040        && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
     1041    {
     1042        /* Swap them using ARG0 as temp register: */
     1043        off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
     1044        off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
     1045        off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
     1046    }
     1047    else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
     1048    {
     1049        /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
     1050        off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
     1051
     1052        /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
     1053        if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
     1054            off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
     1055    }
     1056    else
     1057    {
     1058        /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
     1059        off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
     1060
     1061        /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
     1062        off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
     1063    }
     1064
     1065    /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
     1066    off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
     1067
     1068    /* Done setting up parameters, make the call. */
     1069    off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
     1070
     1071    /* Restore variables and guest shadow registers to volatile registers. */
     1072    off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
     1073    off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
     1074
     1075#ifdef IEMNATIVE_WITH_TLB_LOOKUP
     1076    if (!TlbState.fSkip)
     1077    {
     1078        /* end of TlbMiss - Jump to the done label. */
     1079        uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
     1080        off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
     1081
     1082        /*
     1083         * TlbLookup:
     1084         */
     1085        off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
     1086                                           IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
     1087
     1088        /*
     1089         * Emit code to do the actual storing / fetching.
     1090         */
     1091        PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
     1092# ifdef VBOX_WITH_STATISTICS
     1093        off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
     1094                                                  RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
     1095# endif
     1096        switch (cbMemAccess)
     1097        {
     1098            case 2:
     1099                off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
     1100                break;
     1101            case 4:
     1102                if (!fIsIntelSeg)
     1103                    off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
     1104                else
     1105                {
     1106                    /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
     1107                       PUSH FS in real mode, so we have to try emulate that here.
     1108                       We borrow the now unused idxReg1 from the TLB lookup code here. */
     1109                    uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
     1110                                                                                        kIemNativeGstReg_EFlags);
     1111                    if (idxRegEfl != UINT8_MAX)
     1112                    {
     1113#ifdef ARCH_AMD64
     1114                        off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
     1115                        off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
     1116                                                         UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
     1117#else
     1118                        off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
     1119                                                              off, TlbState.idxReg1, idxRegEfl,
     1120                                                              UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
     1121#endif
     1122                        iemNativeRegFreeTmp(pReNative, idxRegEfl);
     1123                    }
     1124                    else
     1125                    {
     1126                        off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
     1127                                                              RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
     1128                        off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
     1129                                                         UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
     1130                    }
     1131                    /* ASSUMES the upper half of idxRegPc is ZERO. */
     1132                    off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
     1133                    off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
     1134                }
     1135                break;
     1136            case 8:
     1137                off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
     1138                break;
     1139            default:
     1140                AssertFailed();
     1141        }
     1142
     1143        iemNativeRegFreeTmp(pReNative, idxRegMemResult);
     1144        TlbState.freeRegsAndReleaseVars(pReNative);
     1145
     1146        /*
     1147         * TlbDone:
     1148         *
     1149         * Commit the new RSP value.
     1150         */
     1151        iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
     1152    }
     1153#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
     1154
     1155#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
     1156    off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
     1157#endif
     1158    iemNativeRegFreeTmp(pReNative, idxRegRsp);
     1159    if (idxRegEffSp != idxRegRsp)
     1160        iemNativeRegFreeTmp(pReNative, idxRegEffSp);
     1161
     1162    return off;
     1163}
     1164
     1165
     1166/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
     1167#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
     1168    off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
     1169
     1170/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
     1171 *  clears flags. */
     1172#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
     1173    IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
     1174    off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
     1175
     1176/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
     1177#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
     1178    off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
     1179
     1180/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
     1181 *  clears flags. */
     1182#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
     1183    IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
     1184    off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
     1185
     1186#undef  IEM_MC_IND_CALL_U16_AND_FINISH
     1187
     1188
     1189/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
     1190#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
     1191    off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
     1192
     1193/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
     1194 *  clears flags. */
     1195#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
     1196    IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
     1197    off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
     1198
     1199#undef  IEM_MC_IND_CALL_U32_AND_FINISH
     1200
     1201
     1202/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
     1203 *  an extra parameter, for use in 64-bit code. */
     1204#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
     1205    off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
     1206
     1207
     1208/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
     1209 *  an extra parameter, for use in 64-bit code and we need to check and clear
     1210 *  flags. */
     1211#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
     1212    IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
     1213    off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
     1214
     1215#undef  IEM_MC_IND_CALL_U64_AND_FINISH
     1216
     1217/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
     1218 *  iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
     1219DECL_INLINE_THROW(uint32_t)
     1220iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
     1221                                    uint8_t idxInstr, uint8_t cbVar)
     1222{
     1223    IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
     1224    IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
     1225
     1226    /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
     1227    off = iemNativeRegFlushPendingWrites(pReNative, off);
     1228
     1229#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
     1230    Assert(pReNative->Core.offPc == 0);
     1231
     1232    STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
     1233#endif
     1234
     1235    /* Get a register with the new PC loaded from idxVarPc.
     1236       Note! This ASSUMES that the high bits of the GPR is zeroed. */
     1237    uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
     1238
     1239    /* Check limit (may #GP(0) + exit TB). */
     1240    if (!f64Bit)
     1241/** @todo we can skip this test in FLAT 32-bit mode. */
     1242        off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
     1243    /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
     1244    else if (cbVar > sizeof(uint32_t))
     1245        off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
     1246
     1247#if 1
     1248    /* Allocate a temporary PC register, we don't want it shadowed. */
     1249    uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
     1250                                                             kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
     1251#else
     1252    /* Allocate a temporary PC register. */
     1253    uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
     1254                                                             true /*fNoVolatileRegs*/);
     1255#endif
     1256
     1257    /* Perform the addition and push the variable to the guest stack. */
     1258    /** @todo Flat variants for PC32 variants. */
     1259    switch (cbVar)
     1260    {
     1261        case sizeof(uint16_t):
     1262            off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
     1263            /* Truncate the result to 16-bit IP. */
     1264            off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
     1265            off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16,  0, 0, 0),
     1266                                            (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
     1267            break;
     1268        case sizeof(uint32_t):
     1269            off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
     1270            /** @todo In FLAT mode we can use the flat variant. */
     1271            off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32,  0, 0, 0),
     1272                                            (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
     1273            break;
     1274        case sizeof(uint64_t):
     1275            off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
     1276            off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64,  64, 0, 0),
     1277                                            (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
     1278            break;
     1279        default:
     1280            AssertFailed();
     1281    }
     1282
     1283    /* RSP got changed, so do this again. */
     1284    off = iemNativeRegFlushPendingWrites(pReNative, off);
     1285
     1286    /* Store the result. */
     1287    off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
     1288
     1289#if 1
     1290    /* Need to transfer the shadow information to the new RIP register. */
     1291    iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
     1292#else
     1293    /* Sync the new PC. */
     1294    off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
     1295#endif
     1296    iemNativeVarRegisterRelease(pReNative, idxVarPc);
     1297    iemNativeRegFreeTmp(pReNative, idxPcReg);
     1298    /** @todo implictly free the variable? */
     1299
     1300    return off;
     1301}
     1302
     1303
     1304/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
     1305 *  an extra parameter, for use in 16-bit code on a pre-386 CPU. */
     1306#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
     1307    off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
     1308
     1309/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
     1310 *  an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
     1311 *  flags. */
     1312#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
     1313    IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
     1314    off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
     1315
     1316/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
     1317 *  an extra parameter, for use in 16-bit and 32-bit code on 386+. */
     1318#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
     1319    off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
     1320
     1321/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
     1322 *  an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
     1323 *  flags. */
     1324#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
     1325    IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
     1326    off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
     1327
     1328/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
     1329 *  an extra parameter, for use in 16-bit and 32-bit code on 386+. */
     1330#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
     1331    off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
     1332
     1333/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
     1334 *  an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
     1335 *  flags. */
     1336#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
     1337    IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
     1338    off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
     1339
     1340#undef  IEM_MC_REL_CALL_S16_AND_FINISH
     1341
     1342/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
     1343 *  iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
     1344DECL_INLINE_THROW(uint32_t)
     1345iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
     1346                                       uint8_t idxInstr)
     1347{
     1348    /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
     1349    off = iemNativeRegFlushPendingWrites(pReNative, off);
     1350
     1351#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
     1352    Assert(pReNative->Core.offPc == 0);
     1353
     1354    STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
     1355#endif
     1356
     1357    /* Allocate a temporary PC register. */
     1358    uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
     1359                                                                kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
     1360    uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
     1361
     1362    /* Calculate the new RIP. */
     1363    off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
     1364    /* Truncate the result to 16-bit IP. */
     1365    off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
     1366    off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
     1367    off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
     1368
     1369    /* Truncate the result to 16-bit IP. */
     1370    off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
     1371
     1372    /* Check limit (may #GP(0) + exit TB). */
     1373    off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
     1374
     1375    /* Perform the addition and push the variable to the guest stack. */
     1376    off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16,  0, 0, 0),
     1377                                    (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
     1378
     1379    /* RSP got changed, so flush again. */
     1380    off = iemNativeRegFlushPendingWrites(pReNative, off);
     1381
     1382    /* Store the result. */
     1383    off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
     1384
     1385    /* Need to transfer the shadow information to the new RIP register. */
     1386    iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
     1387    iemNativeRegFreeTmp(pReNative, idxPcRegOld);
     1388    iemNativeRegFreeTmp(pReNative, idxPcRegNew);
     1389
     1390    return off;
     1391}
     1392
     1393
     1394/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
     1395 *  an extra parameter, for use in 16-bit and 32-bit code on 386+. */
     1396#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
     1397    off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
     1398
     1399/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
     1400 *  an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
     1401 *  flags. */
     1402#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
     1403    IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
     1404    off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
     1405
     1406#undef  IEM_MC_REL_CALL_S32_AND_FINISH
     1407
     1408/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
     1409 *  iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
     1410DECL_INLINE_THROW(uint32_t)
     1411iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
     1412                                      uint8_t idxInstr)
     1413{
     1414    /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
     1415    off = iemNativeRegFlushPendingWrites(pReNative, off);
     1416
     1417#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
     1418    Assert(pReNative->Core.offPc == 0);
     1419
     1420    STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
     1421#endif
     1422
     1423    /* Allocate a temporary PC register. */
     1424    uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
     1425                                                                kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
     1426    uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
     1427
     1428    /* Update the EIP to get the return address. */
     1429    off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
     1430
     1431    /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
     1432    off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
     1433    off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
     1434    /** @todo we can skip this test in FLAT 32-bit mode. */
     1435    off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
     1436
     1437    /* Perform Perform the return address to the guest stack. */
     1438    /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
     1439    off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32,  0, 0, 0),
     1440                                    (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
     1441
     1442    /* RSP got changed, so do this again. */
     1443    off = iemNativeRegFlushPendingWrites(pReNative, off);
     1444
     1445    /* Store the result. */
     1446    off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
     1447
     1448    /* Need to transfer the shadow information to the new RIP register. */
     1449    iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
     1450    iemNativeRegFreeTmp(pReNative, idxPcRegNew);
     1451    iemNativeRegFreeTmp(pReNative, idxPcRegOld);
     1452
     1453    return off;
     1454}
     1455
     1456
     1457/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
     1458 *  an extra parameter, for use in 64-bit code. */
     1459#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
     1460    off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
     1461
     1462/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
     1463 *  an extra parameter, for use in 64-bit code and we need to check and clear
     1464 *  flags. */
     1465#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
     1466    IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
     1467    off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
     1468
     1469#undef  IEM_MC_REL_CALL_S64_AND_FINISH
     1470
     1471/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
     1472 *  iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
     1473DECL_INLINE_THROW(uint32_t)
     1474iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
     1475                                      uint8_t idxInstr)
     1476{
     1477    /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
     1478    off = iemNativeRegFlushPendingWrites(pReNative, off);
     1479
     1480#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
     1481    Assert(pReNative->Core.offPc == 0);
     1482
     1483    STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
     1484#endif
     1485
     1486    /* Allocate a temporary PC register. */
     1487    uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
     1488                                                                kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
     1489    uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
     1490
     1491    /* Update the RIP to get the return address. */
     1492    off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
     1493
     1494    /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
     1495    off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
     1496    off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
     1497    off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
     1498
     1499    /* Perform Perform the return address to the guest stack. */
     1500    off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64,  64, 0, 0),
     1501                                    (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
     1502
     1503    /* RSP got changed, so do this again. */
     1504    off = iemNativeRegFlushPendingWrites(pReNative, off);
     1505
     1506    /* Store the result. */
     1507    off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
     1508
     1509    /* Need to transfer the shadow information to the new RIP register. */
     1510    iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
     1511    iemNativeRegFreeTmp(pReNative, idxPcRegNew);
     1512    iemNativeRegFreeTmp(pReNative, idxPcRegOld);
     1513
     1514    return off;
     1515}
     1516
     1517
     1518/*********************************************************************************************************************************
     1519*   Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters).    *
     1520*********************************************************************************************************************************/
     1521
     1522DECL_FORCE_INLINE_THROW(uint32_t)
     1523iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
     1524                                    uint16_t cbPopAdd, uint8_t idxRegTmp)
     1525{
     1526    /* Use16BitSp: */
     1527#ifdef RT_ARCH_AMD64
     1528    off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
     1529    off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
     1530    off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
     1531    RT_NOREF(idxRegTmp);
     1532#elif defined(RT_ARCH_ARM64)
     1533    /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
     1534    pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
     1535    /* add tmp, regrsp, #cbMem */
     1536    uint16_t const cbCombined = cbMem + cbPopAdd;
     1537    pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
     1538    if (cbCombined >= RT_BIT_32(12))
     1539        pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
     1540                                                   false /*f64Bit*/, false /*fSetFlags*/,  true /*fShift12*/);
     1541    /* and tmp, tmp, #0xffff */
     1542    Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
     1543    pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
     1544    /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
     1545    pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
     1546#else
     1547# error "Port me"
     1548#endif
     1549    return off;
     1550}
     1551
     1552
     1553DECL_FORCE_INLINE_THROW(uint32_t)
     1554iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
     1555                                    uint16_t cbPopAdd)
     1556{
     1557    /* Use32BitSp: */
     1558    off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
     1559    off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
     1560    return off;
     1561}
     1562
     1563
     1564/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
     1565#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
     1566    off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
     1567
     1568/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
     1569#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
     1570    off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
     1571
     1572/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
     1573#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
     1574    off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
     1575
     1576/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
     1577 *  clears flags. */
     1578#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
     1579    IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
     1580    off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
     1581
     1582/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
     1583 *  clears flags. */
     1584#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
     1585    IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
     1586    off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
     1587
     1588/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
     1589 *  clears flags. */
     1590#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
     1591    IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
     1592    off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
     1593
     1594/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
     1595DECL_INLINE_THROW(uint32_t)
     1596iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
     1597                  IEMMODE enmEffOpSize, uint8_t idxInstr)
     1598{
     1599    RT_NOREF(cbInstr);
     1600
     1601#ifdef VBOX_STRICT
     1602    /*
     1603     * Check that the fExec flags we've got make sense.
     1604     */
     1605    off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
     1606#endif
     1607
     1608    /*
     1609     * To keep things simple we have to commit any pending writes first as we
     1610     * may end up making calls.
     1611     */
     1612    off = iemNativeRegFlushPendingWrites(pReNative, off);
     1613
     1614    /*
     1615     * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
     1616     * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
     1617     * directly as the effective stack pointer.
     1618     * (Code structure is very similar to that of PUSH)
     1619     *
     1620     * Note! As a simplification, we treat opsize overridden returns (o16 ret)
     1621     *       in FLAT 32-bit mode as if we weren't in FLAT mode since these
     1622     *       aren't commonly used (or useful) and thus not in need of optimizing.
     1623     *
     1624     * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
     1625     *       as the shadowed register would remain modified even if the return address throws a \#GP(0)
     1626     *       due to being outside the CS limit causing a wrong stack pointer value in the guest (see
     1627     *       the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
     1628     *       to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
     1629     */
     1630    uint8_t   const cbMem           =   enmEffOpSize == IEMMODE_64BIT
     1631                                      ? sizeof(uint64_t)
     1632                                      : enmEffOpSize == IEMMODE_32BIT
     1633                                      ? sizeof(uint32_t)
     1634                                      : sizeof(uint16_t);
     1635    bool      const fFlat           = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
     1636    uintptr_t const pfnFunction     = fFlat
     1637                                      ?   enmEffOpSize == IEMMODE_64BIT
     1638                                        ? (uintptr_t)iemNativeHlpStackFlatFetchU64
     1639                                        : (uintptr_t)iemNativeHlpStackFlatFetchU32
     1640                                      :   enmEffOpSize == IEMMODE_32BIT
     1641                                        ? (uintptr_t)iemNativeHlpStackFetchU32
     1642                                        : (uintptr_t)iemNativeHlpStackFetchU16;
     1643    uint8_t   const idxRegRsp       = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
     1644                                                                      fFlat ? kIemNativeGstRegUse_ForUpdate : kIemNativeGstRegUse_Calculation,
     1645                                                                      true /*fNoVolatileRegs*/);
     1646    uint8_t   const idxRegEffSp     = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
     1647    /** @todo can do a better job picking the register here. For cbMem >= 4 this
     1648     *        will be the resulting register value. */
     1649    uint8_t   const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too.  */
     1650
     1651    uint32_t        offFixupJumpToUseOtherBitSp = UINT32_MAX;
     1652    if (fFlat)
     1653        Assert(idxRegEffSp == idxRegRsp);
     1654    else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
     1655    {
     1656        Assert(idxRegEffSp != idxRegRsp);
     1657        uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
     1658                                                                     kIemNativeGstRegUse_ReadOnly);
     1659#ifdef RT_ARCH_AMD64
     1660        PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
     1661#else
     1662        PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
     1663#endif
     1664        off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
     1665        iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
     1666        offFixupJumpToUseOtherBitSp = off;
     1667        if (enmEffOpSize == IEMMODE_32BIT)
     1668        {
     1669            off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
     1670            off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
     1671        }
     1672        else
     1673        {
     1674            Assert(enmEffOpSize == IEMMODE_16BIT);
     1675            off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
     1676            off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
     1677                                                      idxRegMemResult);
     1678        }
     1679        IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     1680    }
     1681    /* SpUpdateEnd: */
     1682    uint32_t const offLabelSpUpdateEnd = off;
     1683
     1684    /*
     1685     * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
     1686     * we're skipping lookup).
     1687     */
     1688    uint8_t const  iSegReg           = fFlat ? UINT8_MAX : X86_SREG_SS;
     1689    IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
     1690    uint16_t const uTlbSeqNo         = pReNative->uTlbSeqNo++;
     1691    uint32_t const idxLabelTlbMiss   = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
     1692    uint32_t const idxLabelTlbLookup = !TlbState.fSkip
     1693                                     ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
     1694                                     : UINT32_MAX;
     1695
     1696    if (!TlbState.fSkip)
     1697        off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
     1698    else
     1699        off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
     1700
     1701    /*
     1702     * Use16BitSp:
     1703     */
     1704    if (!fFlat)
     1705    {
     1706#ifdef RT_ARCH_AMD64
     1707        PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
     1708#else
     1709        PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
     1710#endif
     1711        iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
     1712        if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
     1713            off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
     1714                                                      idxRegMemResult);
     1715        else
     1716            off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
     1717        off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
     1718        IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     1719    }
     1720
     1721    /*
     1722     * TlbMiss:
     1723     *
     1724     * Call helper to do the pushing.
     1725     */
     1726    iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
     1727
     1728#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
     1729    off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
     1730#else
     1731    RT_NOREF(idxInstr);
     1732#endif
     1733
     1734    uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
     1735                                     | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
     1736                                     | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
     1737    off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
     1738
     1739
     1740    /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
     1741    if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
     1742        off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
     1743
     1744    /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
     1745    off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
     1746
     1747    /* Done setting up parameters, make the call. */
     1748    off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
     1749
     1750    /* Move the return register content to idxRegMemResult. */
     1751    if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
     1752        off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
     1753
     1754    /* Restore variables and guest shadow registers to volatile registers. */
     1755    off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
     1756    off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
     1757
     1758#ifdef IEMNATIVE_WITH_TLB_LOOKUP
     1759    if (!TlbState.fSkip)
     1760    {
     1761        /* end of TlbMiss - Jump to the done label. */
     1762        uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
     1763        off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
     1764
     1765        /*
     1766         * TlbLookup:
     1767         */
     1768        off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
     1769                                           idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
     1770
     1771        /*
     1772         * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
     1773         */
     1774        PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
     1775# ifdef VBOX_WITH_STATISTICS
     1776        off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
     1777                                                  RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
     1778# endif
     1779        switch (cbMem)
     1780        {
     1781            case 2:
     1782                off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
     1783                break;
     1784            case 4:
     1785                off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
     1786                break;
     1787            case 8:
     1788                off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
     1789                break;
     1790            default:
     1791                AssertFailed();
     1792        }
     1793
     1794        TlbState.freeRegsAndReleaseVars(pReNative);
     1795
     1796        /*
     1797         * TlbDone:
     1798         *
     1799         * Set the new RSP value (FLAT accesses needs to calculate it first) and
     1800         * commit the popped register value.
     1801         */
     1802        iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
     1803    }
     1804#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
     1805
     1806    /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
     1807    if (!f64Bit)
     1808/** @todo we can skip this test in FLAT 32-bit mode. */
     1809        off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
     1810    /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
     1811    else if (enmEffOpSize == IEMMODE_64BIT)
     1812        off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
     1813
     1814    /* Complete RSP calculation for FLAT mode. */
     1815    if (idxRegEffSp == idxRegRsp)
     1816    {
     1817        if (enmEffOpSize == IEMMODE_64BIT)
     1818            off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
     1819        else
     1820        {
     1821            Assert(enmEffOpSize == IEMMODE_32BIT);
     1822            off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
     1823        }
     1824    }
     1825
     1826    /* Commit the result and clear any current guest shadows for RIP. */
     1827    off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
     1828    off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
     1829    iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult,  kIemNativeGstReg_Pc, off);
     1830
     1831    /* Need to transfer the shadowing information to the host register containing the updated value now. */
     1832    if (!fFlat)
     1833        iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
     1834
     1835    iemNativeRegFreeTmp(pReNative, idxRegRsp);
     1836    if (idxRegEffSp != idxRegRsp)
     1837        iemNativeRegFreeTmp(pReNative, idxRegEffSp);
     1838    iemNativeRegFreeTmp(pReNative, idxRegMemResult);
     1839    return off;
     1840}
    8461841
    8471842
     
    62857280
    62867281
    6287 DECL_FORCE_INLINE_THROW(uint32_t)
    6288 iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
    6289 {
    6290     /* Use16BitSp: */
    6291 #ifdef RT_ARCH_AMD64
    6292     off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
    6293     off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
    6294 #else
    6295     /* sub regeff, regrsp, #cbMem */
    6296     pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
    6297     /* and regeff, regeff, #0xffff */
    6298     Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
    6299     pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0,  false /*f64Bit*/);
    6300     /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
    6301     pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
    6302 #endif
    6303     return off;
    6304 }
    6305 
    6306 
    6307 DECL_FORCE_INLINE(uint32_t)
    6308 iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
    6309 {
    6310     /* Use32BitSp: */
    6311     off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
    6312     off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
    6313     return off;
    6314 }
    6315 
    6316 
    63177282/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
    63187283DECL_INLINE_THROW(uint32_t)
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette