Changeset 102623 in vbox for trunk/src/VBox/VMM/include
- Timestamp:
- Dec 16, 2023 12:00:51 AM (14 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h
r102593 r102623 167 167 168 168 /** 169 * Emits loading a constant into a 64-bit GPR 170 */ 171 DECL_INLINE_THROW(uint32_t) 172 iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64) 173 { 174 if (!uImm64) 175 return iemNativeEmitGprZero(pReNative, off, iGpr); 176 177 #ifdef RT_ARCH_AMD64 178 if (uImm64 <= UINT32_MAX) 169 * Variant of iemNativeEmitLoadGprImm64 where the caller ensures sufficent 170 * buffer space. 171 * 172 * Max buffer consumption: 173 * - AMD64: 10 instruction bytes. 174 * - ARM64: 4 instruction words (16 bytes). 175 */ 176 DECLINLINE(uint32_t) iemNativeEmitLoadGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint64_t uImm64) 177 { 178 #ifdef RT_ARCH_AMD64 179 if (uImm64 == 0) 180 { 181 /* xor gpr, gpr */ 182 if (iGpr >= 8) 183 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B; 184 pCodeBuf[off++] = 0x33; 185 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7); 186 } 187 else if (uImm64 <= UINT32_MAX) 179 188 { 180 189 /* mov gpr, imm32 */ 181 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);182 190 if (iGpr >= 8) 183 p bCodeBuf[off++] = X86_OP_REX_B;184 p bCodeBuf[off++] = 0xb8 + (iGpr & 7);185 p bCodeBuf[off++] = RT_BYTE1(uImm64);186 p bCodeBuf[off++] = RT_BYTE2(uImm64);187 p bCodeBuf[off++] = RT_BYTE3(uImm64);188 p bCodeBuf[off++] = RT_BYTE4(uImm64);191 pCodeBuf[off++] = X86_OP_REX_B; 192 pCodeBuf[off++] = 0xb8 + (iGpr & 7); 193 pCodeBuf[off++] = RT_BYTE1(uImm64); 194 pCodeBuf[off++] = RT_BYTE2(uImm64); 195 pCodeBuf[off++] = RT_BYTE3(uImm64); 196 pCodeBuf[off++] = RT_BYTE4(uImm64); 189 197 } 190 198 else 191 199 { 192 200 /* mov gpr, imm64 */ 193 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);194 201 if (iGpr < 8) 195 p bCodeBuf[off++] = X86_OP_REX_W;202 pCodeBuf[off++] = X86_OP_REX_W; 196 203 else 197 pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B; 198 pbCodeBuf[off++] = 0xb8 + (iGpr & 7); 199 pbCodeBuf[off++] = RT_BYTE1(uImm64); 200 pbCodeBuf[off++] = RT_BYTE2(uImm64); 201 pbCodeBuf[off++] = RT_BYTE3(uImm64); 202 pbCodeBuf[off++] = RT_BYTE4(uImm64); 203 pbCodeBuf[off++] = RT_BYTE5(uImm64); 204 pbCodeBuf[off++] = RT_BYTE6(uImm64); 205 pbCodeBuf[off++] = RT_BYTE7(uImm64); 206 pbCodeBuf[off++] = RT_BYTE8(uImm64); 207 } 208 209 #elif defined(RT_ARCH_ARM64) 210 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4); 211 204 pCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B; 205 pCodeBuf[off++] = 0xb8 + (iGpr & 7); 206 pCodeBuf[off++] = RT_BYTE1(uImm64); 207 pCodeBuf[off++] = RT_BYTE2(uImm64); 208 pCodeBuf[off++] = RT_BYTE3(uImm64); 209 pCodeBuf[off++] = RT_BYTE4(uImm64); 210 pCodeBuf[off++] = RT_BYTE5(uImm64); 211 pCodeBuf[off++] = RT_BYTE6(uImm64); 212 pCodeBuf[off++] = RT_BYTE7(uImm64); 213 pCodeBuf[off++] = RT_BYTE8(uImm64); 214 } 215 216 #elif defined(RT_ARCH_ARM64) 212 217 /* 213 218 * We need to start this sequence with a 'mov grp, imm16, lsl #x' and … … 220 225 * the remainder. 221 226 */ 222 uint32_t fMovK = 0; 223 /* mov gpr, imm16 */ 224 uint32_t uImmPart = ((uint32_t)((uImm64 >> 0) & UINT32_C(0xffff)) << 5); 225 if (uImmPart) 226 { 227 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | (UINT32_C(0) << 21) | uImmPart | iGpr; 228 fMovK |= RT_BIT_32(29); 229 } 230 /* mov[k] gpr, imm16, lsl #16 */ 231 uImmPart = ((uint32_t)((uImm64 >> 16) & UINT32_C(0xffff)) << 5); 232 if (uImmPart) 233 { 234 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | fMovK | (UINT32_C(1) << 21) | uImmPart | iGpr; 235 fMovK |= RT_BIT_32(29); 236 } 237 /* mov[k] gpr, imm16, lsl #32 */ 238 uImmPart = ((uint32_t)((uImm64 >> 32) & UINT32_C(0xffff)) << 5); 239 if (uImmPart) 240 { 241 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | fMovK | (UINT32_C(2) << 21) | uImmPart | iGpr; 242 fMovK |= RT_BIT_32(29); 243 } 244 /* mov[k] gpr, imm16, lsl #48 */ 245 uImmPart = ((uint32_t)((uImm64 >> 48) & UINT32_C(0xffff)) << 5); 246 if (uImmPart) 247 pu32CodeBuf[off++] = UINT32_C(0xd2800000) | fMovK | (UINT32_C(3) << 21) | uImmPart | iGpr; 248 249 /** @todo there is an inverted mask variant we might want to explore if it 250 * reduces the number of instructions... */ 227 unsigned cZeroHalfWords = !( uImm64 & UINT16_MAX) 228 + !((uImm64 >> 16) & UINT16_MAX) 229 + !((uImm64 >> 32) & UINT16_MAX) 230 + !((uImm64 >> 48) & UINT16_MAX); 231 unsigned cFfffHalfWords = cZeroHalfWords >= 2 ? 0 /* skip */ 232 : ( (uImm64 & UINT16_MAX) == UINT16_MAX) 233 + (((uImm64 >> 16) & UINT16_MAX) == UINT16_MAX) 234 + (((uImm64 >> 32) & UINT16_MAX) == UINT16_MAX) 235 + (((uImm64 >> 48) & UINT16_MAX) == UINT16_MAX); 236 if (cFfffHalfWords <= cZeroHalfWords) 237 { 238 uint32_t fMovBase = UINT32_C(0xd2800000) | iGpr; 239 240 /* movz gpr, imm16 */ 241 uint32_t uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff)); 242 if (uImmPart || cZeroHalfWords == 4) 243 { 244 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5); 245 fMovBase |= RT_BIT_32(29); 246 } 247 /* mov[z/k] gpr, imm16, lsl #16 */ 248 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff)); 249 if (uImmPart) 250 { 251 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5); 252 fMovBase |= RT_BIT_32(29); 253 } 254 /* mov[z/k] gpr, imm16, lsl #32 */ 255 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff)); 256 if (uImmPart) 257 { 258 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5); 259 fMovBase |= RT_BIT_32(29); 260 } 261 /* mov[z/k] gpr, imm16, lsl #48 */ 262 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff)); 263 if (uImmPart) 264 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5); 265 } 266 else 267 { 268 uint32_t fMovBase = UINT32_C(0x92800000) | iGpr; 269 270 /* find the first half-word that isn't UINT16_MAX. */ 271 uint32_t const iHwNotFfff = (uImm64 & UINT16_MAX) != UINT16_MAX ? 0 272 : ((uImm64 >> 16) & UINT16_MAX) != UINT16_MAX ? 1 273 : ((uImm64 >> 32) & UINT16_MAX) != UINT16_MAX ? 2 : 3; 274 275 /* movn gpr, imm16, lsl #iHwNotFfff*16 */ 276 uint32_t uImmPart = (uint32_t)(~(uImm64 >> (iHwNotFfff * 16)) & UINT32_C(0xffff)) << 5; 277 pCodeBuf[off++] = fMovBase | (iHwNotFfff << 21) | uImmPart; 278 fMovBase |= RT_BIT_32(30) | RT_BIT_32(29); /* -> movk */ 279 /* movk gpr, imm16 */ 280 if (iHwNotFfff != 0) 281 { 282 uImmPart = (uint32_t)((uImm64 >> 0) & UINT32_C(0xffff)); 283 if (uImmPart != UINT32_C(0xffff)) 284 pCodeBuf[off++] = fMovBase | (UINT32_C(0) << 21) | (uImmPart << 5); 285 } 286 /* movk gpr, imm16, lsl #16 */ 287 if (iHwNotFfff != 1) 288 { 289 uImmPart = (uint32_t)((uImm64 >> 16) & UINT32_C(0xffff)); 290 if (uImmPart != UINT32_C(0xffff)) 291 pCodeBuf[off++] = fMovBase | (UINT32_C(1) << 21) | (uImmPart << 5); 292 } 293 /* movk gpr, imm16, lsl #32 */ 294 if (iHwNotFfff != 2) 295 { 296 uImmPart = (uint32_t)((uImm64 >> 32) & UINT32_C(0xffff)); 297 if (uImmPart != UINT32_C(0xffff)) 298 pCodeBuf[off++] = fMovBase | (UINT32_C(2) << 21) | (uImmPart << 5); 299 } 300 /* movk gpr, imm16, lsl #48 */ 301 if (iHwNotFfff != 3) 302 { 303 uImmPart = (uint32_t)((uImm64 >> 48) & UINT32_C(0xffff)); 304 if (uImmPart != UINT32_C(0xffff)) 305 pCodeBuf[off++] = fMovBase | (UINT32_C(3) << 21) | (uImmPart << 5); 306 } 307 } 308 251 309 /** @todo load into 'w' register instead of 'x' when imm64 <= UINT32_MAX? 252 310 * clang 12.x does that, only to use the 'x' version for the 253 311 * addressing in the following ldr). */ 254 312 313 #else 314 # error "port me" 315 #endif 316 return off; 317 } 318 319 320 /** 321 * Emits loading a constant into a 64-bit GPR 322 */ 323 DECL_INLINE_THROW(uint32_t) 324 iemNativeEmitLoadGprImm64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint64_t uImm64) 325 { 326 #ifdef RT_ARCH_AMD64 327 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 10), off, iGpr, uImm64); 328 #elif defined(RT_ARCH_ARM64) 329 off = iemNativeEmitLoadGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGpr, uImm64); 330 #else 331 # error "port me" 332 #endif 333 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 334 return off; 335 } 336 337 338 /** 339 * Variant of iemNativeEmitLoadGpr32Imm where the caller ensures sufficent 340 * buffer space. 341 * 342 * Max buffer consumption: 343 * - AMD64: 6 instruction bytes. 344 * - ARM64: 2 instruction words (8 bytes). 345 * 346 * @note The top 32 bits will be cleared. 347 */ 348 DECLINLINE(uint32_t) iemNativeEmitLoadGpr32ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGpr, uint32_t uImm32) 349 { 350 #ifdef RT_ARCH_AMD64 351 if (uImm32 == 0) 352 { 353 /* xor gpr, gpr */ 354 if (iGpr >= 8) 355 pCodeBuf[off++] = X86_OP_REX_R | X86_OP_REX_B; 356 pCodeBuf[off++] = 0x33; 357 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGpr & 7, iGpr & 7); 358 } 359 else 360 { 361 /* mov gpr, imm32 */ 362 if (iGpr >= 8) 363 pCodeBuf[off++] = X86_OP_REX_B; 364 pCodeBuf[off++] = 0xb8 + (iGpr & 7); 365 pCodeBuf[off++] = RT_BYTE1(uImm32); 366 pCodeBuf[off++] = RT_BYTE2(uImm32); 367 pCodeBuf[off++] = RT_BYTE3(uImm32); 368 pCodeBuf[off++] = RT_BYTE4(uImm32); 369 } 370 371 #elif defined(RT_ARCH_ARM64) 372 if ((uImm32 >> 16) == 0) 373 /* movz gpr, imm16 */ 374 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32, 0, false /*f64Bit*/); 375 else if ((uImm32 & UINT32_C(0xffff)) == 0) 376 /* movz gpr, imm16, lsl #16 */ 377 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 >> 16, 1, false /*f64Bit*/); 378 else if ((uImm32 & UINT32_C(0xffff)) == UINT32_C(0xffff)) 379 /* movn gpr, imm16, lsl #16 */ 380 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32 >> 16, 1, false /*f64Bit*/); 381 else if ((uImm32 >> 16) == UINT32_C(0xffff)) 382 /* movn gpr, imm16 */ 383 pCodeBuf[off++] = Armv8A64MkInstrMovN(iGpr, ~uImm32, 0, false /*f64Bit*/); 384 else 385 { 386 pCodeBuf[off++] = Armv8A64MkInstrMovZ(iGpr, uImm32 & UINT32_C(0xffff), 0, false /*f64Bit*/); 387 pCodeBuf[off++] = Armv8A64MkInstrMovK(iGpr, uImm32 >> 16, 1, false /*f64Bit*/); 388 } 389 390 #else 391 # error "port me" 392 #endif 393 return off; 394 } 395 396 397 /** 398 * Emits loading a constant into a 32-bit GPR. 399 * @note The top 32 bits will be cleared. 400 */ 401 DECL_INLINE_THROW(uint32_t) 402 iemNativeEmitLoadGprImm32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t uImm32) 403 { 404 #ifdef RT_ARCH_AMD64 405 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 6), off, iGpr, uImm32); 406 #elif defined(RT_ARCH_ARM64) 407 off = iemNativeEmitLoadGpr32ImmEx(iemNativeInstrBufEnsure(pReNative, off, 2), off, iGpr, uImm32); 255 408 #else 256 409 # error "port me"
Note:
See TracChangeset
for help on using the changeset viewer.