Changeset 103819 in vbox for trunk/src/VBox/VMM/include
- Timestamp:
- Mar 13, 2024 9:31:09 AM (13 months ago)
- svn:sync-xref-src-repo-rev:
- 162188
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h
r103816 r103819 7407 7407 * vbroadcast needs a memory operand or another xmm register to work... */ 7408 7408 7409 /* pinsrd vec src, gpr, #0 (ASSUMES SSE4.1). */7409 /* pinsrd vecdst, gpr, #0 (ASSUMES SSE4.1). */ 7410 7410 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7411 7411 if (iVecRegDst >= 8 || iGprSrc >= 8) … … 7424 7424 pCodeBuf[off++] = X86_OP_VEX3; 7425 7425 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X 7426 | 0x02 /* opcode map. */ 7426 7427 | ( iVecRegDst >= 8 7427 7428 ? 0 … … 7433 7434 else 7434 7435 { 7435 /* pinsrd vecsrc, gpr, #1 (ASSUMES SSE4.1). */ 7436 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7437 if (iVecRegDst >= 8 || iGprSrc >= 8) 7438 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 7439 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 7440 pCodeBuf[off++] = 0x0f; 7441 pCodeBuf[off++] = 0x3a; 7442 pCodeBuf[off++] = 0x22; 7443 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 7444 pCodeBuf[off++] = 0x00; 7445 7446 /* pinsrd vecsrc, gpr, #2 (ASSUMES SSE4.1). */ 7447 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7448 if (iVecRegDst >= 8 || iGprSrc >= 8) 7449 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 7450 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 7451 pCodeBuf[off++] = 0x0f; 7452 pCodeBuf[off++] = 0x3a; 7453 pCodeBuf[off++] = 0x22; 7454 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 7455 pCodeBuf[off++] = 0x00; 7456 7457 /* pinsrd vecsrc, gpr, #3 (ASSUMES SSE4.1). */ 7458 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7459 if (iVecRegDst >= 8 || iGprSrc >= 8) 7460 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 7461 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 7462 pCodeBuf[off++] = 0x0f; 7463 pCodeBuf[off++] = 0x3a; 7464 pCodeBuf[off++] = 0x22; 7465 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 7466 pCodeBuf[off++] = 0x00; 7467 } 7468 #elif defined(RT_ARCH_ARM64) 7469 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */ 7470 Assert(!(iVecRegDst & 0x1) || !f256Bit); 7471 7472 /* dup vecsrc, gpr */ 7473 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32); 7474 if (f256Bit) 7475 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32); 7476 #else 7477 # error "port me" 7478 #endif 7479 return off; 7480 } 7481 7482 7483 /** 7484 * Emits a vecdst[x] = gprsrc broadcast, 32-bit. 7485 */ 7486 DECL_INLINE_THROW(uint32_t) 7487 iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false) 7488 { 7489 #ifdef RT_ARCH_AMD64 7490 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 12 : 28), off, iVecRegDst, iGprSrc, f256Bit); 7491 #elif defined(RT_ARCH_ARM64) 7492 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit); 7493 #else 7494 # error "port me" 7495 #endif 7496 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 7497 return off; 7498 } 7499 7500 7501 /** 7502 * Emits a vecdst = gprsrc broadcast, 64-bit. 7503 */ 7504 DECL_FORCE_INLINE(uint32_t) 7505 iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false) 7506 { 7507 #ifdef RT_ARCH_AMD64 7508 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears, 7509 * vbroadcast needs a memory operand or another xmm register to work... */ 7510 7511 /* pinsrq vecsrc, gpr, #0 (ASSUMES SSE4.1). */ 7512 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7513 if (iVecRegDst >= 8 || iGprSrc >= 8) 7514 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 7515 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 7516 pCodeBuf[off++] = 0x0f; 7517 pCodeBuf[off++] = 0x3a; 7518 pCodeBuf[off++] = 0x22; 7519 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 7520 pCodeBuf[off++] = 0x00; 7521 7522 if (f256Bit) 7523 { 7524 /* When broadcasting the entire ymm register we can use vbroadcastsd now. */ 7525 /* vbroadcastsd ymm, xmm (ASSUMES AVX2). */ 7526 pCodeBuf[off++] = X86_OP_VEX3; 7527 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X 7528 | ( iVecRegDst >= 8 7529 ? 0 7530 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R); 7531 pCodeBuf[off++] = 0x7d; 7532 pCodeBuf[off++] = 0x19; 7533 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7); 7534 } 7535 else 7536 { 7537 /* pinsrq vecsrc, gpr, #1 (ASSUMES SSE4.1). */ 7436 /* pinsrd vecdst, gpr, #1 (ASSUMES SSE4.1). */ 7538 7437 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7539 7438 if (iVecRegDst >= 8 || iGprSrc >= 8) … … 7545 7444 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 7546 7445 pCodeBuf[off++] = 0x01; 7446 7447 /* pinsrd vecdst, gpr, #2 (ASSUMES SSE4.1). */ 7448 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7449 if (iVecRegDst >= 8 || iGprSrc >= 8) 7450 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 7451 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 7452 pCodeBuf[off++] = 0x0f; 7453 pCodeBuf[off++] = 0x3a; 7454 pCodeBuf[off++] = 0x22; 7455 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 7456 pCodeBuf[off++] = 0x02; 7457 7458 /* pinsrd vecdst, gpr, #3 (ASSUMES SSE4.1). */ 7459 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7460 if (iVecRegDst >= 8 || iGprSrc >= 8) 7461 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 7462 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 7463 pCodeBuf[off++] = 0x0f; 7464 pCodeBuf[off++] = 0x3a; 7465 pCodeBuf[off++] = 0x22; 7466 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 7467 pCodeBuf[off++] = 0x03; 7468 } 7469 #elif defined(RT_ARCH_ARM64) 7470 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */ 7471 Assert(!(iVecRegDst & 0x1) || !f256Bit); 7472 7473 /* dup vecsrc, gpr */ 7474 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32); 7475 if (f256Bit) 7476 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32); 7477 #else 7478 # error "port me" 7479 #endif 7480 return off; 7481 } 7482 7483 7484 /** 7485 * Emits a vecdst[x] = gprsrc broadcast, 32-bit. 7486 */ 7487 DECL_INLINE_THROW(uint32_t) 7488 iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false) 7489 { 7490 #ifdef RT_ARCH_AMD64 7491 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 12 : 28), off, iVecRegDst, iGprSrc, f256Bit); 7492 #elif defined(RT_ARCH_ARM64) 7493 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit); 7494 #else 7495 # error "port me" 7496 #endif 7497 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 7498 return off; 7499 } 7500 7501 7502 /** 7503 * Emits a vecdst = gprsrc broadcast, 64-bit. 7504 */ 7505 DECL_FORCE_INLINE(uint32_t) 7506 iemNativeEmitSimdBroadcastGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false) 7507 { 7508 #ifdef RT_ARCH_AMD64 7509 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears, 7510 * vbroadcast needs a memory operand or another xmm register to work... */ 7511 7512 /* pinsrq vecdst, gpr, #0 (ASSUMES SSE4.1). */ 7513 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7514 pCodeBuf[off++] = X86_OP_REX_W 7515 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 7516 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 7517 pCodeBuf[off++] = 0x0f; 7518 pCodeBuf[off++] = 0x3a; 7519 pCodeBuf[off++] = 0x22; 7520 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 7521 pCodeBuf[off++] = 0x00; 7522 7523 if (f256Bit) 7524 { 7525 /* When broadcasting the entire ymm register we can use vbroadcastsd now. */ 7526 /* vbroadcastsd ymm, xmm (ASSUMES AVX2). */ 7527 pCodeBuf[off++] = X86_OP_VEX3; 7528 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X 7529 | 0x02 /* opcode map. */ 7530 | ( iVecRegDst >= 8 7531 ? 0 7532 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R); 7533 pCodeBuf[off++] = 0x7d; 7534 pCodeBuf[off++] = 0x19; 7535 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7); 7536 } 7537 else 7538 { 7539 /* pinsrq vecdst, gpr, #1 (ASSUMES SSE4.1). */ 7540 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7541 pCodeBuf[off++] = X86_OP_REX_W 7542 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 7543 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 7544 pCodeBuf[off++] = 0x0f; 7545 pCodeBuf[off++] = 0x3a; 7546 pCodeBuf[off++] = 0x22; 7547 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 7548 pCodeBuf[off++] = 0x01; 7547 7549 } 7548 7550 #elif defined(RT_ARCH_ARM64)
Note:
See TracChangeset
for help on using the changeset viewer.