Changeset 105491 in vbox
- Timestamp:
- Jul 24, 2024 2:51:20 PM (4 months ago)
- Location:
- trunk/src/VBox/VMM
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/VMMAll/IEMAllInstTwoByte0f.cpp.h
r105488 r105491 749 749 } 750 750 } 751 752 753 /** 754 * A body preprocessor variant of iemOpCommonSseFp_FullFull_To_Full in order 755 * to support native emitters for certain instructions. 756 */ 757 #define SSE_FP_BODY_FullFull_To_Full(a_Ins, a_pImplExpr, a_fRegNativeArchs, a_fMemNativeArchs) \ 758 PFNIEMAIMPLFPSSEF2U128 const pfnU128 = (a_pImplExpr); \ 759 uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm); \ 760 if (IEM_IS_MODRM_REG_MODE(bRm)) \ 761 { \ 762 /* \ 763 * XMM, XMM. \ 764 */ \ 765 IEM_MC_BEGIN(IEM_MC_F_NOT_286_OR_OLDER, 0); \ 766 IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX_EX(fSse); \ 767 IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT(); \ 768 IEM_MC_PREPARE_SSE_USAGE(); \ 769 IEM_MC_NATIVE_IF(a_fRegNativeArchs) { \ 770 IEM_MC_NATIVE_EMIT_2_EX(RT_CONCAT3(iemNativeEmit_,a_Ins,_rr_u128), IEM_GET_MODRM_REG(pVCpu, bRm), IEM_GET_MODRM_RM(pVCpu, bRm)); \ 771 } IEM_MC_NATIVE_ELSE() { \ 772 IEM_MC_LOCAL(X86XMMREG, SseRes); \ 773 IEM_MC_ARG_LOCAL_REF(PX86XMMREG, pSseRes, SseRes, 0); \ 774 IEM_MC_ARG(PCX86XMMREG, pSrc1, 1); \ 775 IEM_MC_REF_XREG_XMM_CONST(pSrc1, IEM_GET_MODRM_REG(pVCpu, bRm)); \ 776 IEM_MC_ARG(PCX86XMMREG, pSrc2, 2); \ 777 IEM_MC_REF_XREG_XMM_CONST(pSrc2, IEM_GET_MODRM_RM(pVCpu, bRm)); \ 778 IEM_MC_CALL_SSE_AIMPL_3(pfnU128, pSseRes, pSrc1, pSrc2); \ 779 IEM_MC_STORE_XREG_XMM(IEM_GET_MODRM_REG(pVCpu, bRm), SseRes); \ 780 } IEM_MC_NATIVE_ENDIF(); \ 781 IEM_MC_ADVANCE_RIP_AND_FINISH(); \ 782 IEM_MC_END(); \ 783 } \ 784 else \ 785 { \ 786 /* \ 787 * XMM, [mem128]. \ 788 */ \ 789 IEM_MC_BEGIN(IEM_MC_F_NOT_286_OR_OLDER, 0); \ 790 IEM_MC_LOCAL(X86XMMREG, uSrc2); \ 791 IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc); \ 792 IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0); \ 793 IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX_EX(fSse); \ 794 IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT(); \ 795 IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(uSrc2, pVCpu->iem.s.iEffSeg, GCPtrEffSrc); \ 796 IEM_MC_PREPARE_SSE_USAGE(); \ 797 IEM_MC_NATIVE_IF(a_fRegNativeArchs) { \ 798 IEM_MC_NATIVE_EMIT_2_EX(RT_CONCAT3(iemNativeEmit_,a_Ins,_rv_u128), IEM_GET_MODRM_REG(pVCpu, bRm), uSrc2); \ 799 } IEM_MC_NATIVE_ELSE() { \ 800 IEM_MC_LOCAL(X86XMMREG, SseRes); \ 801 IEM_MC_ARG_LOCAL_REF(PX86XMMREG, pSseRes, SseRes, 0); \ 802 IEM_MC_ARG(PCX86XMMREG, pSrc1, 1); \ 803 IEM_MC_ARG_LOCAL_REF(PCX86XMMREG, pSrc2, uSrc2, 2); \ 804 IEM_MC_REF_XREG_XMM_CONST(pSrc1, IEM_GET_MODRM_REG(pVCpu, bRm)); \ 805 IEM_MC_CALL_SSE_AIMPL_3(pfnU128, pSseRes, pSrc1, pSrc2); \ 806 IEM_MC_STORE_XREG_XMM(IEM_GET_MODRM_REG(pVCpu, bRm), SseRes); \ 807 } IEM_MC_NATIVE_ENDIF(); \ 808 IEM_MC_ADVANCE_RIP_AND_FINISH(); \ 809 IEM_MC_END(); \ 810 } void(0) 751 811 752 812 … … 5372 5432 { 5373 5433 IEMOP_MNEMONIC2(RM, ADDPS, addps, Vps, Wps, DISOPTYPE_HARMLESS, 0); 5374 return FNIEMOP_CALL_1(iemOpCommonSseFp_FullFull_To_Full, iemAImpl_addps_u128);5434 SSE_FP_BODY_FullFull_To_Full(addps, iemAImpl_addps_u128, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64); 5375 5435 } 5376 5436 … … 5380 5440 { 5381 5441 IEMOP_MNEMONIC2(RM, ADDPD, addpd, Vpd, Wpd, DISOPTYPE_HARMLESS, 0); 5382 return FNIEMOP_CALL_1(iemOpCommonSse2Fp_FullFull_To_Full, iemAImpl_addpd_u128);5442 SSE_FP_BODY_FullFull_To_Full(addpd, iemAImpl_addpd_u128, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64); 5383 5443 } 5384 5444 … … 5404 5464 { 5405 5465 IEMOP_MNEMONIC2(RM, MULPS, mulps, Vps, Wps, DISOPTYPE_HARMLESS, 0); 5406 return FNIEMOP_CALL_1(iemOpCommonSseFp_FullFull_To_Full, iemAImpl_mulps_u128);5466 SSE_FP_BODY_FullFull_To_Full(mulps, iemAImpl_mulps_u128, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64); 5407 5467 } 5408 5468 … … 5543 5603 { 5544 5604 IEMOP_MNEMONIC2(RM, SUBPS, subps, Vps, Wps, DISOPTYPE_HARMLESS, 0); 5545 return FNIEMOP_CALL_1(iemOpCommonSseFp_FullFull_To_Full, iemAImpl_subps_u128);5605 SSE_FP_BODY_FullFull_To_Full(subps, iemAImpl_subps_u128, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64); 5546 5606 } 5547 5607 -
trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veEmit-x86.h
r105484 r105491 2573 2573 IEMNATIVE_NATIVE_EMIT_PMOV_S_Z_U128(pmovsxdq, false, kArmv8InstrShiftSz_U32, 0x25); 2574 2574 2575 2576 /** 2577 * Updates the MXCSR exception flags, raising any unmasked exceptions. 2578 */ 2579 DECL_INLINE_THROW(uint32_t) 2580 iemNativeEmitMxcsrUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxInstr, uint8_t const idxSimdGstRegDst, uint8_t const idxSimdRegRes) 2581 { 2582 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ForUpdate); 2583 uint8_t const idxRegMxCsrXcptFlags = iemNativeRegAllocTmp(pReNative, &off); 2584 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off); 2585 2586 #ifdef RT_ARCH_AMD64 2587 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8); 2588 2589 /* stmxcsr */ 2590 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8) 2591 pbCodeBuf[off++] = X86_OP_REX_B; 2592 pbCodeBuf[off++] = 0x0f; 2593 pbCodeBuf[off++] = 0xae; 2594 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7); 2595 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp)); 2596 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp)); 2597 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp)); 2598 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp)); 2599 2600 /* Load MXCSR, mask everything except status flags and or into guest MXCSR. */ 2601 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp)); 2602 2603 /* Store the flags in the MXCSR xcpt flags register. */ 2604 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegMxCsrXcptFlags, idxRegTmp); 2605 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegMxCsrXcptFlags, X86_MXCSR_XCPT_FLAGS); 2606 2607 /* Clear the status flags in the temporary copy and write it back to MXCSR. */ 2608 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, ~X86_MXCSR_XCPT_FLAGS); 2609 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp)); 2610 2611 pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8); 2612 2613 /* ldmxcsr */ 2614 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8) 2615 pbCodeBuf[off++] = X86_OP_REX_B; 2616 pbCodeBuf[off++] = 0x0f; 2617 pbCodeBuf[off++] = 0xae; 2618 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7); 2619 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp)); 2620 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp)); 2621 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp)); 2622 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp)); 2623 2624 #elif defined(RT_ARCH_ARM64) 2625 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7); 2626 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegMxCsrXcptFlags, ARMV8_AARCH64_SYSREG_FPSR); 2627 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR); /* Clear FPSR for next instruction. */ 2628 pu32CodeBuf[off++] = Armv8A64MkInstrUxtb(idxRegMxCsrXcptFlags, idxRegMxCsrXcptFlags); /* Ensure there are only the exception flags set (clears QC, and any possible NZCV flags). */ 2629 2630 /* 2631 * The exception flags layout differs between MXCSR and FPSR of course: 2632 * 2633 * Bit FPSR MXCSR 2634 * 0 IOC ------> IE 2635 * 2636 * 1 DZC ---- DE <-+ 2637 * \ | 2638 * 2 OFC --- -> ZE | 2639 * \ | 2640 * 3 UFC -- --> OE | 2641 * \ | 2642 * 4 IXC - ---> UE | 2643 * \ | 2644 * 5 ----> PE | 2645 * 6 | 2646 * 7 IDC --------------+ 2647 */ 2648 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegMxCsrXcptFlags, 1); /* Shift the block of flags starting at DZC to the least significant bits. */ 2649 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegMxCsrXcptFlags, idxRegTmp, 2, 4); /* Insert DZC, OFC, UFC and IXC into the MXCSR positions. */ 2650 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegMxCsrXcptFlags, 6); /* Shift IDC (now at 6) into the LSB. */ 2651 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegMxCsrXcptFlags, idxRegTmp, 1, 1); /* Insert IDC into the MXCSR positions. */ 2652 #else 2653 # error "Port me" 2654 #endif 2655 2656 /* 2657 * If PE is set together with OE/UE and neither are masked 2658 * PE needs to be cleared, because on real hardware 2659 * an exception is generated with only OE/UE being set, 2660 * but because we mask all exceptions PE will get set as well. 2661 */ 2662 /** @todo On ARM we can combine the load+and into one and instruction. */ 2663 /** @todo r=aeichner Can this be done more optimal? */ 2664 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off); 2665 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsrXcptFlags); 2666 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_OE | X86_MXCSR_UE); 2667 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp2, idxRegMxCsr); 2668 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp2, X86_MXCSR_OM | X86_MXCSR_UM); 2669 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp2, X86_MXCSR_XCPT_MASK_SHIFT); 2670 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp2, idxRegTmp2, false /*f64Bit*/); 2671 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp2, idxRegTmp); 2672 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp2, X86_MXCSR_OE | X86_MXCSR_UE); 2673 2674 uint32_t offFixup = off; 2675 off = iemNativeEmitJzToFixed(pReNative, off, off); 2676 off = iemNativeEmitBitClearInGpr32(pReNative, off, idxRegMxCsrXcptFlags, X86_MXCSR_PE_BIT); 2677 iemNativeFixupFixedJump(pReNative, offFixup, off); 2678 iemNativeRegFreeTmp(pReNative, idxRegTmp2); 2679 2680 2681 /* Set the MXCSR flags now. */ 2682 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegMxCsrXcptFlags); 2683 2684 /* 2685 * Make sure we don't have any outstanding guest register writes as we may 2686 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX. 2687 */ 2688 off = iemNativeRegFlushPendingWrites(pReNative, off); 2689 2690 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING 2691 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr)); 2692 #else 2693 RT_NOREF(idxInstr); 2694 #endif 2695 2696 /* Check whether an exception is pending and only update the guest SIMD register if it isn't. */ 2697 /* mov tmp, varmxcsr */ 2698 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr); 2699 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */ 2700 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT); 2701 /* tmp = ~tmp */ 2702 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/); 2703 /* tmp &= mxcsr */ 2704 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegMxCsrXcptFlags, idxRegTmp); 2705 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxRegMxCsrXcptFlags, X86_MXCSR_XCPT_FLAGS, 2706 kIemNativeLabelType_RaiseSseAvxFpRelated); 2707 2708 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegDst), 2709 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite); 2710 2711 /* Move result to guest SIMD register (at this point there is no exception being raised). */ 2712 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegRes); 2713 2714 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 2715 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst); 2716 iemNativeRegFreeTmp(pReNative, idxRegTmp); 2717 iemNativeRegFreeTmp(pReNative, idxRegMxCsrXcptFlags); 2718 iemNativeRegFreeTmp(pReNative, idxRegMxCsr); 2719 return off; 2720 } 2721 2722 2723 /** 2724 * Common emitter for packed floating point instructions with 3 operands - register, register variant. 2725 */ 2726 DECL_INLINE_THROW(uint32_t) iemNativeEmitSimdFp3OpCommon_rr_u128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxInstr, 2727 uint8_t const idxSimdGstRegDst, uint8_t const idxSimdGstRegSrc, 2728 #ifdef RT_ARCH_AMD64 2729 uint8_t const bPrefixX86, uint8_t const bOpcX86 2730 #elif defined(RT_ARCH_ARM64) 2731 ARMV8INSTRVECFPOP const enmFpOp, ARMV8INSTRVECFPSZ const enmFpSz 2732 #endif 2733 ) 2734 { 2735 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegDst), 2736 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly); 2737 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegSrc), 2738 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly); 2739 2740 #ifdef RT_ARCH_AMD64 2741 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdRegDst); 2742 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5); 2743 if (bPrefixX86 != 0) 2744 pCodeBuf[off++] = bPrefixX86; 2745 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 || idxSimdRegSrc >= 8) 2746 pCodeBuf[off++] = (idxSimdRegSrc >= 8 ? X86_OP_REX_B : 0) 2747 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0); 2748 pCodeBuf[off++] = 0x0f; 2749 pCodeBuf[off++] = bOpcX86; 2750 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, idxSimdRegSrc & 7); 2751 #elif defined(RT_ARCH_ARM64) 2752 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1); 2753 pCodeBuf[off++] = Armv8A64MkVecInstrFp3Op(enmFpOp, enmFpSz, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdRegDst, idxSimdRegSrc); 2754 #else 2755 # error "Port me" 2756 #endif 2757 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst); 2758 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc); 2759 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 2760 return iemNativeEmitMxcsrUpdate(pReNative, off, idxInstr, idxSimdGstRegDst, IEMNATIVE_SIMD_REG_FIXED_TMP0); 2761 } 2762 2763 2764 /** 2765 * Common emitter for packed floating point instructions with 3 operands - register, local variable variant. 2766 */ 2767 DECL_INLINE_THROW(uint32_t) iemNativeEmitSimdFp3OpCommon_rv_u128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxInstr, 2768 uint8_t const idxSimdGstRegDst, uint8_t const idxVarSrc, 2769 #ifdef RT_ARCH_AMD64 2770 uint8_t const bPrefixX86, uint8_t const bOpcX86 2771 #elif defined(RT_ARCH_ARM64) 2772 ARMV8INSTRVECFPOP const enmFpOp, ARMV8INSTRVECFPSZ const enmFpSz 2773 #endif 2774 ) 2775 { 2776 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegDst), 2777 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly); 2778 uint8_t const idxSimdRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/); 2779 2780 #ifdef RT_ARCH_AMD64 2781 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdRegDst); 2782 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5); 2783 if (bPrefixX86 != 0) 2784 pCodeBuf[off++] = bPrefixX86; 2785 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 || idxSimdRegSrc >= 8) 2786 pCodeBuf[off++] = (idxSimdRegSrc >= 8 ? X86_OP_REX_B : 0) 2787 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0); 2788 pCodeBuf[off++] = 0x0f; 2789 pCodeBuf[off++] = bOpcX86; 2790 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, idxSimdRegSrc & 7); 2791 #elif defined(RT_ARCH_ARM64) 2792 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1); 2793 pCodeBuf[off++] = Armv8A64MkVecInstrFp3Op(enmFpOp, enmFpSz, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdRegDst, idxSimdRegSrc); 2794 #else 2795 # error "Port me" 2796 #endif 2797 iemNativeVarRegisterRelease(pReNative, idxVarSrc); 2798 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst); 2799 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 2800 return iemNativeEmitMxcsrUpdate(pReNative, off, idxInstr, idxSimdGstRegDst, IEMNATIVE_SIMD_REG_FIXED_TMP0); 2801 } 2802 2803 2804 /** 2805 * Common emitter for packed floating point instructions with 3 operands. 2806 */ 2807 #ifdef RT_ARCH_AMD64 2808 # define IEMNATIVE_NATIVE_EMIT_FP_3OP_U128(a_Instr, a_enmArmOp, a_ArmElemSz, a_bPrefixX86, a_bOpcX86) \ 2809 DECL_FORCE_INLINE_THROW(uint32_t) \ 2810 RT_CONCAT3(iemNativeEmit_,a_Instr,_rr_u128)(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxInstr, \ 2811 uint8_t const idxSimdGstRegDst, uint8_t const idxSimdGstRegSrc) \ 2812 { \ 2813 return iemNativeEmitSimdFp3OpCommon_rr_u128(pReNative, off, idxInstr, idxSimdGstRegDst, idxSimdGstRegSrc, \ 2814 a_bPrefixX86, a_bOpcX86); \ 2815 } \ 2816 DECL_FORCE_INLINE_THROW(uint32_t) \ 2817 RT_CONCAT3(iemNativeEmit_,a_Instr,_rv_u128)(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxInstr, \ 2818 uint8_t const idxSimdGstRegDst, uint8_t const idxVarSrc) \ 2819 { \ 2820 return iemNativeEmitSimdFp3OpCommon_rv_u128(pReNative, off, idxInstr, idxSimdGstRegDst, idxVarSrc, \ 2821 a_bPrefixX86, a_bOpcX86); \ 2822 } \ 2823 typedef int ignore_semicolon 2824 #elif defined(RT_ARCH_ARM64) 2825 # define IEMNATIVE_NATIVE_EMIT_FP_3OP_U128(a_Instr, a_enmArmOp, a_ArmElemSz, a_bPrefixX86, a_bOpcX86) \ 2826 DECL_FORCE_INLINE_THROW(uint32_t) \ 2827 RT_CONCAT3(iemNativeEmit_,a_Instr,_rr_u128)(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxInstr, \ 2828 uint8_t const idxSimdGstRegDst, uint8_t const idxSimdGstRegSrc) \ 2829 { \ 2830 return iemNativeEmitSimdFp3OpCommon_rr_u128(pReNative, off, idxInstr, idxSimdGstRegDst, idxSimdGstRegSrc, \ 2831 a_enmArmOp, a_ArmElemSz); \ 2832 } \ 2833 DECL_FORCE_INLINE_THROW(uint32_t) \ 2834 RT_CONCAT3(iemNativeEmit_,a_Instr,_rv_u128)(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxInstr, \ 2835 uint8_t const idxSimdGstRegDst, uint8_t const idxVarSrc) \ 2836 { \ 2837 return iemNativeEmitSimdFp3OpCommon_rv_u128(pReNative, off, idxInstr, idxSimdGstRegDst, idxVarSrc, \ 2838 a_enmArmOp, a_ArmElemSz); \ 2839 } \ 2840 typedef int ignore_semicolon 2841 #else 2842 # error "Port me" 2843 #endif 2844 2845 2846 IEMNATIVE_NATIVE_EMIT_FP_3OP_U128(mulps, kArmv8VecInstrFpOp_Mul, kArmv8VecInstrFpSz_4x_Single, 0, 0x59); 2847 IEMNATIVE_NATIVE_EMIT_FP_3OP_U128(addps, kArmv8VecInstrFpOp_Add, kArmv8VecInstrFpSz_4x_Single, 0, 0x58); 2848 IEMNATIVE_NATIVE_EMIT_FP_3OP_U128(addpd, kArmv8VecInstrFpOp_Add, kArmv8VecInstrFpSz_2x_Double, X86_OP_PRF_SIZE_OP, 0x58); 2849 IEMNATIVE_NATIVE_EMIT_FP_3OP_U128(subps, kArmv8VecInstrFpOp_Sub, kArmv8VecInstrFpSz_4x_Single, 0, 0x5c); 2850 2575 2851 #endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */ 2576 2852 -
trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h
r105318 r105491 5984 5984 5985 5985 /********************************************************************************************************************************* 5986 * Bitfield manipulation * 5987 *********************************************************************************************************************************/ 5988 5989 /** 5990 * Emits code for clearing. 5991 */ 5992 DECL_FORCE_INLINE(uint32_t) 5993 iemNativeEmitBitClearInGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const iGpr, uint8_t iBit) 5994 { 5995 Assert(iBit < 32); 5996 5997 #if defined(RT_ARCH_AMD64) 5998 /* btr r32, imm8 */ 5999 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5); 6000 6001 if (iGpr >= 8) 6002 pbCodeBuf[off++] = X86_OP_REX_B; 6003 pbCodeBuf[off++] = 0x0f; 6004 pbCodeBuf[off++] = 0xba; 6005 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGpr & 7); 6006 pbCodeBuf[off++] = iBit; 6007 #elif defined(RT_ARCH_ARM64) 6008 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1); 6009 6010 pu32CodeBuf[off++] = Armv8A64MkInstrBfc(iGpr, iBit /*offFirstBit*/, 1 /*cBits*/, true /*f64Bit*/); 6011 #else 6012 # error "Port me" 6013 #endif 6014 6015 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 6016 return off; 6017 } 6018 6019 6020 /********************************************************************************************************************************* 5986 6021 * Compare and Testing * 5987 6022 *********************************************************************************************************************************/
Note:
See TracChangeset
for help on using the changeset viewer.