Changeset 104051 in vbox for trunk/src/VBox/VMM
- Timestamp:
- Mar 26, 2024 2:10:26 AM (11 months ago)
- svn:sync-xref-src-repo-rev:
- 162442
- Location:
- trunk/src/VBox/VMM
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm
r103909 r104051 274 274 275 275 ;; 276 ; This is handy for generating absolutly correct EFLAGS. 277 ;%define IEM_AIMPL_WITH_LOAD_AND_SAVE_ALL_STATUS_FLAGS 278 279 ;; 276 280 ; Load the relevant flags from [%1] if there are undefined flags (%3). 277 281 ; 278 282 ; @remarks Clobbers T0, stack. Changes EFLAGS. 279 ; @param A2 The register pointing to the flags.280 283 ; @param 1 The parameter (A0..A3) pointing to the eflags. 281 284 ; @param 2 The set of modified flags. 282 285 ; @param 3 The set of undefined flags. 283 ; @param 4 Force loading the flags. 284 ; 285 %macro IEM_MAYBE_LOAD_FLAGS 3-4 1 286 %if (%3 + %4) != 0 286 ; @param 4 The flags that must be loaded. 287 ; 288 %macro IEM_MAYBE_LOAD_FLAGS 4 289 %ifdef IEM_AIMPL_WITH_LOAD_AND_SAVE_ALL_STATUS_FLAGS 290 pushf ; store current flags 291 mov T0_32, [%1] ; load the guest flags 292 and dword [xSP], ~(%2 | %3 | X86_EFL_STATUS_BITS) ; mask out the modified and undefined flags 293 and T0_32, (%2 | %3 | X86_EFL_STATUS_BITS) ; select the modified and undefined flags. 294 or [xSP], T0 ; merge guest flags with host flags. 295 popf ; load the mixed flags. 296 297 %elif (%3 + %4) != 0 298 %if 1 ; This approach seems faster on intel 10980XE 299 %if (%3 | %4) == X86_EFL_CF 300 ; Use bt to load bit into CF 301 bt dword [%1], X86_EFL_CF_BIT 302 %else 303 ; Use ADD to set OF and SHAF for the rest. ASSUMES T0_32 is eax! 304 mov eax, [%1] 305 %if (%3 | %4) == X86_EFL_OF 306 ; Use ADD to set OF. 307 shl eax, 31 - X86_EFL_OF_BIT 308 add eax, 80000000h 309 %elif ((%3 | %4) & X86_EFL_OF) != 0 310 ; Use ADD to set OF. 311 xchg al, ah 312 shl al, 15 - X86_EFL_OF_BIT 313 add al, 80h 314 ; Use SAHF to set the other status flags. 315 sahf 316 %else ; OF not needed; so al -> ah and load ah into eflags. 317 %if 1 ; Pretty similar on 10980XE, but shl seems faster on average. 318 shl eax, 8 319 %else 320 xchg al, ah 321 %endif 322 sahf 323 %endif 324 %endif 325 326 %else 287 327 pushf ; store current flags 288 328 mov T0_32, [%1] ; load the guest flags … … 291 331 or [xSP], T0 ; merge guest flags with host flags. 292 332 popf ; load the mixed flags. 333 %endif 293 334 %endif 294 335 %endmacro … … 298 339 ; 299 340 ; @remarks Clobbers T0, stack. Changes EFLAGS. 300 ; @param A2 The register pointing to the flags.301 341 ; @param 1 The parameter (A0..A3) pointing to the eflags. 302 342 ; @param 2 The set of flags to load. … … 304 344 ; 305 345 %macro IEM_LOAD_FLAGS 3 346 %ifdef IEM_AIMPL_WITH_LOAD_AND_SAVE_ALL_STATUS_FLAGS 347 pushf ; store current flags 348 mov T0_32, [%1] ; load the guest flags 349 and dword [xSP], ~(%2 | %3 | X86_EFL_STATUS_BITS) ; mask out the modified, undefined and status flags 350 and T0_32, (%2 | %3 | X86_EFL_STATUS_BITS) ; select the modified, undefined and status flags. 351 or [xSP], T0 ; merge guest flags with host flags. 352 popf ; load the mixed flags. 353 354 %elif 1 ; This approach seems faster on intel 10980XE 355 %if (%3 | %2) == X86_EFL_CF 356 ; Use bt to load bit into CF 357 bt dword [%1], X86_EFL_CF_BIT 358 %else 359 mov eax, [%1] ; ASSUMES T0_32 is eax!! 360 %if (%3 | %2) == X86_EFL_OF 361 ; Use ADD to set OF. 362 shl eax, 31 - X86_EFL_OF_BIT 363 add eax, 80000000h 364 %elif ((%3 | %2) & X86_EFL_OF) != 0 365 ; Use ADD to set OF. 366 xchg al, ah 367 shl al, 15 - X86_EFL_OF_BIT 368 add al, 80h 369 ; Use SAHF to set the other status flags. 370 sahf 371 %else ; OF not needed; so al -> ah and load ah into eflags. 372 %if 1 ; Pretty similar on 10980XE, but shl seems faster on average. 373 shl eax, 8 374 %else 375 xchg al, ah 376 %endif 377 sahf 378 %endif 379 %endif ; (%3 | %2) != X86_EFL_CF 380 381 %else 306 382 pushf ; store current flags 307 383 mov T0_32, [%1] ; load the guest flags … … 310 386 or [xSP], T0 ; merge guest flags with host flags. 311 387 popf ; load the mixed flags. 388 %endif 312 389 %endmacro 313 390 … … 319 396 ; @param 2 The mask of modified flags to save. 320 397 ; @param 3 The mask of undefined flags to (maybe) save. 321 ; 322 %macro IEM_SAVE_FLAGS 3 323 %if (%2 | %3) != 0 398 ; @param 4 The mask of flags that are zeroed (and thus doesn't require loading, just clearing) 399 ; 400 %macro IEM_SAVE_FLAGS 3-4 0 401 %if (%2 | %3 | %4) != 0 402 mov T1_32, [%1] ; flags 403 %ifdef IEM_AIMPL_WITH_LOAD_AND_SAVE_ALL_STATUS_FLAGS 324 404 pushf 325 pop T1 326 mov T0_32, [%1] ; flags 327 and T0_32, ~(%2 | %3) ; clear the modified & undefined flags. 328 and T1_32, (%2 | %3) ; select the modified and undefined flags. 405 pop T0 406 and T1_32, ~(%2 | %3 | %4 | X86_EFL_STATUS_BITS) ; clear the modified & undefined & zeroed & status flags. 407 and T0_32, (%2 | %3 | X86_EFL_STATUS_BITS) ; select the modified, undefined and status flags. 408 %else 409 %if (%2 | %3 | %4) == X86_EFL_CF 410 setc T0_8 411 %elif (%2 | %3) == X86_EFL_OF 412 seto T0_8 413 shl T0_32, X86_EFL_OF_BIT 414 %elif (%2 | %3) == X86_EFL_ZF 415 setz T0_8 ; On 10980XE this is faster than the next option 5596 vs 5936 ps/call (cmpxchg8b-positive). 416 shl T0_32, X86_EFL_ZF_BIT 417 %elif (%2 | %3) <= 0xff 418 lahf 419 movzx eax, ah ; ASSUMES T0_32 is eax! 420 %elif 1 ; The locked functions are generally faster on 10980XE with this approach 421 lahf ; while there seems only to be a tiny advantage in most other test. 422 movzx eax, ah ; ASSUMES T0_32 is eax! 423 jno .of_is_clear 424 or eax, X86_EFL_OF 425 .of_is_clear: 426 %else 427 pushf ; this is a bit slow 428 pop T0 429 %endif 430 and T1_32, ~(%2 | %3 | %4) ; clear the modified & undefined & zeroed flags. 431 and T0_32, (%2 | %3) ; select the modified and undefined flags. 432 %endif 329 433 or T0_32, T1_32 ; combine the flags. 330 434 mov [%1], T0_32 ; save the flags. … … 494 598 ; @param 3 The modified flags. 495 599 ; @param 4 The undefined flags. 496 ; @param 5 Force flag loading (ADC, SBC). 497 ; 498 %macro IEMIMPL_BIN_OP 5 600 ; @param 5 The flags that must be loaded (ADC, SBC). 601 ; @param 6 The flags that will be zeroed by the operation. 602 ; 603 %macro IEMIMPL_BIN_OP 6 499 604 BEGINCODE 500 605 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12 … … 502 607 IEM_MAYBE_LOAD_FLAGS A2, %3, %4, %5 503 608 %1 byte [A0], A1_8 504 IEM_SAVE_FLAGS A2, %3, %4 609 IEM_SAVE_FLAGS A2, %3, %4, %6 505 610 EPILOGUE_3_ARGS 506 611 ENDPROC iemAImpl_ %+ %1 %+ _u8 … … 510 615 IEM_MAYBE_LOAD_FLAGS A2, %3, %4, %5 511 616 %1 word [A0], A1_16 512 IEM_SAVE_FLAGS A2, %3, %4 617 IEM_SAVE_FLAGS A2, %3, %4, %6 513 618 EPILOGUE_3_ARGS 514 619 ENDPROC iemAImpl_ %+ %1 %+ _u16 … … 518 623 IEM_MAYBE_LOAD_FLAGS A2, %3, %4, %5 519 624 %1 dword [A0], A1_32 520 IEM_SAVE_FLAGS A2, %3, %4 625 IEM_SAVE_FLAGS A2, %3, %4, %6 521 626 EPILOGUE_3_ARGS 522 627 ENDPROC iemAImpl_ %+ %1 %+ _u32 … … 527 632 IEM_MAYBE_LOAD_FLAGS A2, %3, %4, %5 528 633 %1 qword [A0], A1 529 IEM_SAVE_FLAGS A2, %3, %4 634 IEM_SAVE_FLAGS A2, %3, %4, %6 530 635 EPILOGUE_3_ARGS_EX 8 531 636 ENDPROC iemAImpl_ %+ %1 %+ _u64 … … 538 643 IEM_MAYBE_LOAD_FLAGS A2, %3, %4, %5 539 644 lock %1 byte [A0], A1_8 540 IEM_SAVE_FLAGS A2, %3, %4 645 IEM_SAVE_FLAGS A2, %3, %4, %6 541 646 EPILOGUE_3_ARGS 542 647 ENDPROC iemAImpl_ %+ %1 %+ _u8_locked … … 546 651 IEM_MAYBE_LOAD_FLAGS A2, %3, %4, %5 547 652 lock %1 word [A0], A1_16 548 IEM_SAVE_FLAGS A2, %3, %4 653 IEM_SAVE_FLAGS A2, %3, %4, %6 549 654 EPILOGUE_3_ARGS 550 655 ENDPROC iemAImpl_ %+ %1 %+ _u16_locked … … 554 659 IEM_MAYBE_LOAD_FLAGS A2, %3, %4, %5 555 660 lock %1 dword [A0], A1_32 556 IEM_SAVE_FLAGS A2, %3, %4 661 IEM_SAVE_FLAGS A2, %3, %4, %6 557 662 EPILOGUE_3_ARGS 558 663 ENDPROC iemAImpl_ %+ %1 %+ _u32_locked … … 563 668 IEM_MAYBE_LOAD_FLAGS A2, %3, %4, %5 564 669 lock %1 qword [A0], A1 565 IEM_SAVE_FLAGS A2, %3, %4 670 IEM_SAVE_FLAGS A2, %3, %4, %6 566 671 EPILOGUE_3_ARGS_EX 8 567 672 ENDPROC iemAImpl_ %+ %1 %+ _u64_locked … … 570 675 %endmacro 571 676 572 ; instr,lock, modified-flags, undefined flags, force loadingflags573 IEMIMPL_BIN_OP add, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, 0 574 IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, 1575 IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, 0 576 IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, 1577 IEMIMPL_BIN_OP or, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF,0578 IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF, 0579 IEMIMPL_BIN_OP and, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF, 0580 IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, 0581 IEMIMPL_BIN_OP test, 0, (X86_EFL_ OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF, 0677 ; instr,lock, modified-flags, undefined flags, must be loaded, zeroed flags 678 IEMIMPL_BIN_OP add, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, 0, 0 679 IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, X86_EFL_CF, 0 680 IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, 0, 0 681 IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, X86_EFL_CF, 0 682 IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, 0, 0 683 IEMIMPL_BIN_OP or, 1, (X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF), X86_EFL_AF, 0, X86_EFL_OF | X86_EFL_CF 684 IEMIMPL_BIN_OP xor, 1, (X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF), X86_EFL_AF, 0, X86_EFL_OF | X86_EFL_CF 685 IEMIMPL_BIN_OP and, 1, (X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF), X86_EFL_AF, 0, X86_EFL_OF | X86_EFL_CF 686 IEMIMPL_BIN_OP test, 0, (X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF), X86_EFL_AF, 0, X86_EFL_OF | X86_EFL_CF 582 687 583 688 … … 595 700 ; @param 2 The modified flags. 596 701 ; @param 3 The undefined flags. 597 ; 598 %macro IEMIMPL_VEX_BIN_OP 3 702 ; @param 4 The zeroed flags. 703 ; 704 %macro IEMIMPL_VEX_BIN_OP 4 599 705 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16 600 706 PROLOGUE_4_ARGS 601 IEM_MAYBE_LOAD_FLAGS A3, %2, %3 707 IEM_MAYBE_LOAD_FLAGS A3, %2, %3, 0 ;; @todo do we need to load undefined flags for any platform? 602 708 %1 T0_32, A1_32, A2_32 603 709 mov [A0], T0_32 604 IEM_SAVE_FLAGS A3, %2, %3 710 IEM_SAVE_FLAGS A3, %2, %3, %4 605 711 EPILOGUE_4_ARGS 606 712 ENDPROC iemAImpl_ %+ %1 %+ _u32 … … 609 715 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16 610 716 PROLOGUE_4_ARGS 611 IEM_MAYBE_LOAD_FLAGS A3, %2, %3 717 IEM_MAYBE_LOAD_FLAGS A3, %2, %3, 0 612 718 %1 T0, A1, A2 613 719 mov [A0], T0 614 IEM_SAVE_FLAGS A3, %2, %3 720 IEM_SAVE_FLAGS A3, %2, %3, %4 615 721 EPILOGUE_4_ARGS 616 722 ENDPROC iemAImpl_ %+ %1 %+ _u64 … … 618 724 %endmacro 619 725 620 ; instr, modified-flags, undefined-flags621 IEMIMPL_VEX_BIN_OP andn, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_CF), (X86_EFL_AF | X86_EFL_PF)622 IEMIMPL_VEX_BIN_OP bextr, (X86_EFL_OF | X86_EFL_ZF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_AF | X86_EFL_PF)623 IEMIMPL_VEX_BIN_OP bzhi, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_CF), (X86_EFL_AF | X86_EFL_PF)726 ; instr, modified-flags, undefined-flags, zeroed-flags 727 IEMIMPL_VEX_BIN_OP andn, X86_EFL_SF | X86_EFL_ZF, X86_EFL_AF | X86_EFL_PF, X86_EFL_OF | X86_EFL_CF 728 IEMIMPL_VEX_BIN_OP bextr, X86_EFL_ZF, X86_EFL_SF | X86_EFL_AF | X86_EFL_PF, X86_EFL_OF | X86_EFL_CF 729 IEMIMPL_VEX_BIN_OP bzhi, X86_EFL_SF | X86_EFL_ZF | X86_EFL_CF, X86_EFL_AF | X86_EFL_PF, X86_EFL_OF 624 730 625 731 ;; … … 635 741 ; @param 2 The modified flags. 636 742 ; @param 3 The undefined flags. 637 ; 638 %macro IEMIMPL_VEX_BIN_OP_2 3 743 ; @param 4 The zeroed flags. 744 ; 745 %macro IEMIMPL_VEX_BIN_OP_2 4 639 746 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12 640 747 PROLOGUE_4_ARGS 641 IEM_MAYBE_LOAD_FLAGS A2, %2, %3 748 IEM_MAYBE_LOAD_FLAGS A2, %2, %3, 0 ;; @todo check if any undefined flags are passed thru 642 749 mov T0_32, [A0] 643 750 %1 T0_32, A1_32 644 751 mov [A0], T0_32 645 IEM_SAVE_FLAGS A2, %2, %3 752 IEM_SAVE_FLAGS A2, %2, %3, %4 646 753 EPILOGUE_4_ARGS 647 754 ENDPROC iemAImpl_ %+ %1 %+ _u32 … … 650 757 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12 651 758 PROLOGUE_4_ARGS 652 IEM_MAYBE_LOAD_FLAGS A2, %2, %3 759 IEM_MAYBE_LOAD_FLAGS A2, %2, %3, 0 653 760 mov T0, [A0] 654 761 %1 T0, A1 655 762 mov [A0], T0 656 IEM_SAVE_FLAGS A2, %2, %3 763 IEM_SAVE_FLAGS A2, %2, %3, %4 657 764 EPILOGUE_4_ARGS 658 765 ENDPROC iemAImpl_ %+ %1 %+ _u64 … … 660 767 %endmacro 661 768 662 ; instr, modified-flags, undefined-flags663 IEMIMPL_VEX_BIN_OP_2 blsr, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_CF), (X86_EFL_AF | X86_EFL_PF)664 IEMIMPL_VEX_BIN_OP_2 blsmsk, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_CF), (X86_EFL_AF | X86_EFL_PF)665 IEMIMPL_VEX_BIN_OP_2 blsi, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_CF), (X86_EFL_AF | X86_EFL_PF)769 ; instr, modified-flags, undefined-flags zeroed-flags 770 IEMIMPL_VEX_BIN_OP_2 blsr, (X86_EFL_SF | X86_EFL_ZF | X86_EFL_CF), (X86_EFL_AF | X86_EFL_PF), X86_EFL_OF 771 IEMIMPL_VEX_BIN_OP_2 blsmsk, (X86_EFL_SF | X86_EFL_ZF | X86_EFL_CF), (X86_EFL_AF | X86_EFL_PF), X86_EFL_OF 772 IEMIMPL_VEX_BIN_OP_2 blsi, (X86_EFL_SF | X86_EFL_ZF | X86_EFL_CF), (X86_EFL_AF | X86_EFL_PF), X86_EFL_OF 666 773 667 774 … … 874 981 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12 875 982 PROLOGUE_3_ARGS 876 IEM_MAYBE_LOAD_FLAGS A2, %3, %4 983 IEM_MAYBE_LOAD_FLAGS A2, %3, %4, 0 877 984 %1 word [A0], A1_16 878 IEM_SAVE_FLAGS A2, %3, %4 985 IEM_SAVE_FLAGS A2, %3, %4, 0 879 986 EPILOGUE_3_ARGS 880 987 ENDPROC iemAImpl_ %+ %1 %+ _u16 … … 882 989 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12 883 990 PROLOGUE_3_ARGS 884 IEM_MAYBE_LOAD_FLAGS A2, %3, %4 991 IEM_MAYBE_LOAD_FLAGS A2, %3, %4, 0 885 992 %1 dword [A0], A1_32 886 IEM_SAVE_FLAGS A2, %3, %4 993 IEM_SAVE_FLAGS A2, %3, %4, 0 887 994 EPILOGUE_3_ARGS 888 995 ENDPROC iemAImpl_ %+ %1 %+ _u32 … … 891 998 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16 892 999 PROLOGUE_3_ARGS 893 IEM_MAYBE_LOAD_FLAGS A2, %3, %4 1000 IEM_MAYBE_LOAD_FLAGS A2, %3, %4, 0 894 1001 %1 qword [A0], A1 895 IEM_SAVE_FLAGS A2, %3, %4 1002 IEM_SAVE_FLAGS A2, %3, %4, 0 896 1003 EPILOGUE_3_ARGS_EX 8 897 1004 ENDPROC iemAImpl_ %+ %1 %+ _u64 … … 902 1009 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12 903 1010 PROLOGUE_3_ARGS 904 IEM_MAYBE_LOAD_FLAGS A2, %3, %4 1011 IEM_MAYBE_LOAD_FLAGS A2, %3, %4, 0 905 1012 lock %1 word [A0], A1_16 906 IEM_SAVE_FLAGS A2, %3, %4 1013 IEM_SAVE_FLAGS A2, %3, %4, 0 907 1014 EPILOGUE_3_ARGS 908 1015 ENDPROC iemAImpl_ %+ %1 %+ _u16_locked … … 910 1017 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12 911 1018 PROLOGUE_3_ARGS 912 IEM_MAYBE_LOAD_FLAGS A2, %3, %4 1019 IEM_MAYBE_LOAD_FLAGS A2, %3, %4, 0 913 1020 lock %1 dword [A0], A1_32 914 IEM_SAVE_FLAGS A2, %3, %4 1021 IEM_SAVE_FLAGS A2, %3, %4, 0 915 1022 EPILOGUE_3_ARGS 916 1023 ENDPROC iemAImpl_ %+ %1 %+ _u32_locked … … 919 1026 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16 920 1027 PROLOGUE_3_ARGS 921 IEM_MAYBE_LOAD_FLAGS A2, %3, %4 1028 IEM_MAYBE_LOAD_FLAGS A2, %3, %4, 0 922 1029 lock %1 qword [A0], A1 923 IEM_SAVE_FLAGS A2, %3, %4 1030 IEM_SAVE_FLAGS A2, %3, %4, 0 924 1031 EPILOGUE_3_ARGS_EX 8 925 1032 ENDPROC iemAImpl_ %+ %1 %+ _u64_locked … … 927 1034 %endif ; locked 928 1035 %endmacro 1036 1037 ; Undefined flags are passed thru here by the intel and amd CPUs we have. 929 1038 ; modified efl, undefined eflags 930 IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)931 IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)932 IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)933 IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)1039 IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), 0 ;passed-thru (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF) 1040 IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), 0 ;passed-thru (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF) 1041 IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), 0 ;passed-thru (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF) 1042 IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), 0 ;passed-thru (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF) 934 1043 935 1044 ;; … … 944 1053 ; In the ZF case the destination register is 'undefined', however it seems that 945 1054 ; both AMD and Intel just leaves it as is. The undefined EFLAGS differs between 946 ; AMD and Intel and accor idng to https://www.sandpile.org/x86/flags.htm between1055 ; AMD and Intel and according to https://www.sandpile.org/x86/flags.htm between 947 1056 ; Intel microarchitectures. We only implement 'intel' and 'amd' variation with 948 ; the behaviour of more recent CPUs (Intel 10980X and AMD 3990X). 1057 ; the behaviour of more recent CPUs (Intel 10980XE and AMD 3990X). 1058 ; 1059 ; Intel: Clear all and calculate PF in addition to ZF. 1060 ; AMD: Passthru all flags other than ZF. 949 1061 ; 950 1062 ; @param 1 The instruction mnemonic. … … 955 1067 %macro IEMIMPL_BIT_OP2 4 956 1068 BEGINCODE 1069 ; 16-bit 1070 957 1071 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12 958 1072 PROLOGUE_3_ARGS 959 IEM_MAYBE_LOAD_FLAGS A2, %2, %3 1073 IEM_MAYBE_LOAD_FLAGS A2, %2, %3, %3 ; Must load undefined flags since AMD passes them thru 960 1074 %1 T0_16, A1_16 961 1075 %if %4 != 0 … … 964 1078 mov [A0], T0_16 965 1079 .unchanged_dst: 966 IEM_SAVE_FLAGS A2, %2, %3 1080 IEM_SAVE_FLAGS A2, %2, %3, 0 967 1081 EPILOGUE_3_ARGS 968 1082 ENDPROC iemAImpl_ %+ %1 %+ _u16 969 1083 970 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16 %+ _intel, 12 971 PROLOGUE_3_ARGS 972 %1 T1_16, A1_16 973 %if %4 != 0 974 jz .unchanged_dst 975 %endif 976 mov [A0], T1_16 977 IEM_ADJUST_FLAGS_WITH_PARITY A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF | X86_EFL_ZF, 0, T1 978 EPILOGUE_3_ARGS 979 .unchanged_dst: 980 IEM_ADJUST_FLAGS A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF, X86_EFL_ZF | X86_EFL_PF 981 EPILOGUE_3_ARGS 982 ENDPROC iemAImpl_ %+ %1 %+ _u16_intel 983 984 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16 %+ _amd, 12 985 PROLOGUE_3_ARGS 986 %1 T0_16, A1_16 987 %if %4 != 0 988 jz .unchanged_dst 989 %endif 990 mov [A0], T0_16 991 .unchanged_dst: 992 IEM_SAVE_AND_ADJUST_FLAGS A2, %2, 0, 0 ; Only the ZF flag is modified on AMD Zen 2. 993 EPILOGUE_3_ARGS 994 ENDPROC iemAImpl_ %+ %1 %+ _u16_amd 995 1084 ;bad;BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16 %+ _intel, 12 1085 ;bad; PROLOGUE_3_ARGS 1086 ;bad; %1 T1_16, A1_16 1087 ;bad; jz .unchanged_dst 1088 ;bad; mov [A0], T1_16 1089 ;bad; IEM_ADJUST_FLAGS_WITH_PARITY A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF | X86_EFL_ZF, 0, T1 1090 ;bad; EPILOGUE_3_ARGS 1091 ;bad;.unchanged_dst: 1092 ;bad;%if %4 != 0 1093 ;bad; mov [A0], T1_16 1094 ;bad;%endif 1095 ;bad; IEM_ADJUST_FLAGS A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF, X86_EFL_ZF | X86_EFL_PF 1096 ;bad; EPILOGUE_3_ARGS 1097 ;bad;ENDPROC iemAImpl_ %+ %1 %+ _u16_intel 1098 ;bad; 1099 ;bad;BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16 %+ _amd, 12 1100 ;bad; PROLOGUE_3_ARGS 1101 ;bad; %1 T0_16, A1_16 1102 ;bad;%if %4 != 0 1103 ;bad; jz .unchanged_dst 1104 ;bad;%endif 1105 ;bad; mov [A0], T0_16 1106 ;bad;.unchanged_dst: 1107 ;bad; IEM_SAVE_AND_ADJUST_FLAGS A2, %2, 0, 0 ; Only the ZF flag is modified on AMD Zen 2. 1108 ;bad; EPILOGUE_3_ARGS 1109 ;bad;ENDPROC iemAImpl_ %+ %1 %+ _u16_amd 1110 1111 ; 32-bit 996 1112 997 1113 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12 998 1114 PROLOGUE_3_ARGS 999 IEM_MAYBE_LOAD_FLAGS A2, %2, %3 1115 IEM_MAYBE_LOAD_FLAGS A2, %2, %3, %3 ; Must load undefined flags since AMD passes them thru 1000 1116 %1 T0_32, A1_32 1001 1117 %if %4 != 0 … … 1004 1120 mov [A0], T0_32 1005 1121 .unchanged_dst: 1006 IEM_SAVE_FLAGS A2, %2, %3 1122 IEM_SAVE_FLAGS A2, %2, %3, 0 1007 1123 EPILOGUE_3_ARGS 1008 1124 ENDPROC iemAImpl_ %+ %1 %+ _u32 1009 1125 1010 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32 %+ _intel, 121011 PROLOGUE_3_ARGS1012 %1 T1_32, A1_321013 %if %4 != 01014 jz .unchanged_dst1015 %endif1016 mov [A0], T1_321017 IEM_ADJUST_FLAGS_WITH_PARITY A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF | X86_EFL_ZF, 0, T11018 EPILOGUE_3_ARGS1019 .unchanged_dst:1020 IEM_ADJUST_FLAGS A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF, X86_EFL_ZF | X86_EFL_PF1021 EPILOGUE_3_ARGS1022 ENDPROC iemAImpl_ %+ %1 %+ _u32_intel1023 1024 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32 %+ _amd, 121025 PROLOGUE_3_ARGS1026 %1 T0_32, A1_321027 %if %4 != 01028 jz .unchanged_dst1029 %endif1030 mov [A0], T0_321031 .unchanged_dst:1032 IEM_SAVE_AND_ADJUST_FLAGS A2, %2, 0, 0 ; Only the ZF flag is modified on AMD Zen 2.1033 EPILOGUE_3_ARGS1034 ENDPROC iemAImpl_ %+ %1 %+ _u32_amd1126 ;bad;BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32 %+ _intel, 12 1127 ;bad; PROLOGUE_3_ARGS 1128 ;bad; %1 T1_32, A1_32 1129 ;bad;%if %4 != 0 1130 ;bad; jz .unchanged_dst 1131 ;bad;%endif 1132 ;bad; mov [A0], T1_32 1133 ;bad; IEM_ADJUST_FLAGS_WITH_PARITY A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF | X86_EFL_ZF, 0, T1 1134 ;bad; EPILOGUE_3_ARGS 1135 ;bad;.unchanged_dst: 1136 ;bad; IEM_ADJUST_FLAGS A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF, X86_EFL_ZF | X86_EFL_PF 1137 ;bad; EPILOGUE_3_ARGS 1138 ;bad;ENDPROC iemAImpl_ %+ %1 %+ _u32_intel 1139 ;bad; 1140 ;bad;BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32 %+ _amd, 12 1141 ;bad; PROLOGUE_3_ARGS 1142 ;bad; %1 T0_32, A1_32 1143 ;bad;%if %4 != 0 1144 ;bad; jz .unchanged_dst 1145 ;bad;%endif 1146 ;bad; mov [A0], T0_32 1147 ;bad;.unchanged_dst: 1148 ;bad; IEM_SAVE_AND_ADJUST_FLAGS A2, %2, 0, 0 ; Only the ZF flag is modified on AMD Zen 2. 1149 ;bad; EPILOGUE_3_ARGS 1150 ;bad;ENDPROC iemAImpl_ %+ %1 %+ _u32_amd 1035 1151 1036 1152 1037 1153 %ifdef RT_ARCH_AMD64 1154 ; 64-bit 1038 1155 1039 1156 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16 1040 1157 PROLOGUE_3_ARGS 1041 IEM_MAYBE_LOAD_FLAGS A2, %2, %3 1158 IEM_MAYBE_LOAD_FLAGS A2, %2, %3, %3 ; Must load undefined flags since AMD passes them thru 1042 1159 %1 T0, A1 1043 1160 %if %4 != 0 … … 1046 1163 mov [A0], T0 1047 1164 .unchanged_dst: 1048 IEM_SAVE_FLAGS A2, %2, %3 1165 IEM_SAVE_FLAGS A2, %2, %3, 0 1049 1166 EPILOGUE_3_ARGS_EX 8 1050 1167 ENDPROC iemAImpl_ %+ %1 %+ _u64 1051 1168 1052 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64 %+ _intel, 16 1053 PROLOGUE_3_ARGS 1054 IEM_MAYBE_LOAD_FLAGS A2, %2, %3 1055 %1 T1, A1 1056 %if %4 != 0 1057 jz .unchanged_dst 1058 %endif 1059 mov [A0], T1 1060 IEM_ADJUST_FLAGS_WITH_PARITY A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF | X86_EFL_ZF, 0, T1 1061 EPILOGUE_3_ARGS 1062 .unchanged_dst: 1063 IEM_ADJUST_FLAGS A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF, X86_EFL_ZF | X86_EFL_PF 1064 EPILOGUE_3_ARGS 1065 ENDPROC iemAImpl_ %+ %1 %+ _u64_intel 1066 1067 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64 %+ _amd, 16 1068 PROLOGUE_3_ARGS 1069 %1 T0, A1 1070 %if %4 != 0 1071 jz .unchanged_dst 1072 %endif 1073 mov [A0], T0 1074 .unchanged_dst: 1075 IEM_SAVE_AND_ADJUST_FLAGS A2, %2, 0, 0 ; Only the ZF flag is modified on AMD Zen 2. 1076 EPILOGUE_3_ARGS_EX 8 1077 ENDPROC iemAImpl_ %+ %1 %+ _u64_amd 1169 ;bad;BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64 %+ _intel, 16 1170 ;bad; PROLOGUE_3_ARGS 1171 ;bad; %1 T1, A1 1172 ;bad;%if %4 != 0 1173 ;bad; jz .unchanged_dst 1174 ;bad;%endif 1175 ;bad; mov [A0], T1 1176 ;bad; IEM_ADJUST_FLAGS_WITH_PARITY A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF | X86_EFL_ZF, 0, T1 1177 ;bad; EPILOGUE_3_ARGS 1178 ;bad;.unchanged_dst: 1179 ;bad; IEM_ADJUST_FLAGS A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF, X86_EFL_ZF | X86_EFL_PF 1180 ;bad; EPILOGUE_3_ARGS 1181 ;bad;ENDPROC iemAImpl_ %+ %1 %+ _u64_intel 1182 ;bad; 1183 ;bad;BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64 %+ _amd, 16 1184 ;bad; PROLOGUE_3_ARGS 1185 ;bad; %1 T0, A1 1186 ;bad;%if %4 != 0 1187 ;bad; jz .unchanged_dst 1188 ;bad;%endif 1189 ;bad; mov [A0], T0 1190 ;bad;.unchanged_dst: 1191 ;bad; IEM_SAVE_AND_ADJUST_FLAGS A2, %2, 0, 0 ; Only the ZF flag is modified on AMD Zen 2. 1192 ;bad; EPILOGUE_3_ARGS_EX 8 1193 ;bad;ENDPROC iemAImpl_ %+ %1 %+ _u64_amd 1078 1194 1079 1195 %endif ; RT_ARCH_AMD64 … … 1102 1218 ; @param 2 The modified flags. 1103 1219 ; @param 3 The undefined flags. 1104 ; 1105 %macro IEMIMPL_BIT_OP3 3 1220 ; @param 4 The zeroed flags. 1221 ; 1222 %macro IEMIMPL_BIT_OP3 4 1106 1223 BEGINCODE 1107 1224 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12 … … 1110 1227 %1 T0_16, A1_16 1111 1228 mov [A0], T0_16 1112 IEM_SAVE_FLAGS A2, %2, %3 1229 IEM_SAVE_FLAGS A2, %2, %3, %4 1113 1230 EPILOGUE_3_ARGS 1114 1231 ENDPROC iemAImpl_ %+ %1 %+ _u16 … … 1119 1236 %1 T0_32, A1_32 1120 1237 mov [A0], T0_32 1121 IEM_SAVE_FLAGS A2, %2, %3 1238 IEM_SAVE_FLAGS A2, %2, %3, %4 1122 1239 EPILOGUE_3_ARGS 1123 1240 ENDPROC iemAImpl_ %+ %1 %+ _u32 … … 1129 1246 %1 T0, A1 1130 1247 mov [A0], T0 1131 IEM_SAVE_FLAGS A2, %2, %3 1248 IEM_SAVE_FLAGS A2, %2, %3, %4 1132 1249 EPILOGUE_3_ARGS_EX 8 1133 1250 ENDPROC iemAImpl_ %+ %1 %+ _u64 1134 1251 %endif ; RT_ARCH_AMD64 1135 1252 %endmacro 1136 IEMIMPL_BIT_OP3 popcnt, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF), 01253 IEMIMPL_BIT_OP3 popcnt, X86_EFL_ZF, 0, X86_EFL_CF | X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF 1137 1254 1138 1255 … … 1145 1262 ; @param 2 Undefined EFLAGS. 1146 1263 ; @param 3 Function suffix. 1147 ; @param 4 EFLAGS variation: 0 for native, 1 for intel (ignored),1264 ; @param 4 EFLAGS variation: 0 for native, 1 for intel, 1148 1265 ; 2 for AMD (set AF, clear PF, ZF and SF). 1149 1266 %macro IEMIMPL_IMUL_TWO 4 1150 1267 BEGINPROC_FASTCALL iemAImpl_imul_two_u16 %+ %3, 12 1151 1268 PROLOGUE_3_ARGS 1152 IEM_MAYBE_LOAD_FLAGS A2, %1, %2 1269 IEM_MAYBE_LOAD_FLAGS A2, %1, %2, %2 ; Undefined flags may be passed thru (AMD) 1153 1270 imul A1_16, word [A0] 1154 1271 mov [A0], A1_16 1155 1272 %if %4 != 1 1156 IEM_SAVE_FLAGS A2, %1, %2 1273 IEM_SAVE_FLAGS A2, %1, %2, 0 1157 1274 %else 1158 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %1, X86_EFL_AF | X86_EFL_ZF, A1_16, 16, A1 1275 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %1, X86_EFL_AF | X86_EFL_ZF, A1_16, 16, A1 ; intel 1159 1276 %endif 1160 1277 EPILOGUE_3_ARGS … … 1163 1280 BEGINPROC_FASTCALL iemAImpl_imul_two_u32 %+ %3, 12 1164 1281 PROLOGUE_3_ARGS 1165 IEM_MAYBE_LOAD_FLAGS A2, %1, %2 1282 IEM_MAYBE_LOAD_FLAGS A2, %1, %2, %2 ; Undefined flags may be passed thru (AMD) 1166 1283 imul A1_32, dword [A0] 1167 1284 mov [A0], A1_32 1168 1285 %if %4 != 1 1169 IEM_SAVE_FLAGS A2, %1, %2 1286 IEM_SAVE_FLAGS A2, %1, %2, 0 1170 1287 %else 1171 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %1, X86_EFL_AF | X86_EFL_ZF, A1_32, 32, A1 1288 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %1, X86_EFL_AF | X86_EFL_ZF, A1_32, 32, A1 ; intel 1172 1289 %endif 1173 1290 EPILOGUE_3_ARGS … … 1177 1294 BEGINPROC_FASTCALL iemAImpl_imul_two_u64 %+ %3, 16 1178 1295 PROLOGUE_3_ARGS 1179 IEM_MAYBE_LOAD_FLAGS A2, %1, %2 1296 IEM_MAYBE_LOAD_FLAGS A2, %1, %2, %2 ; Undefined flags may be passed thru (AMD) 1180 1297 imul A1, qword [A0] 1181 1298 mov [A0], A1 1182 1299 %if %4 != 1 1183 IEM_SAVE_FLAGS A2, %1, %2 1300 IEM_SAVE_FLAGS A2, %1, %2, 0 1184 1301 %else 1185 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %1, X86_EFL_AF | X86_EFL_ZF, A1, 64, A1 1302 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %1, X86_EFL_AF | X86_EFL_ZF, A1, 64, A1 ; intel 1186 1303 %endif 1187 1304 EPILOGUE_3_ARGS_EX 8 … … 1189 1306 %endif ; RT_ARCH_AMD64 1190 1307 %endmacro 1308 ; The SF, ZF, AF and PF flags are "undefined". AMD (3990x) leaves these 1309 ; flags as is. Whereas Intel skylake (6700K and 10980XE (Cascade Lake)) always 1310 ; clear AF and ZF and calculates SF and PF as per the lower half of the result. 1191 1311 IEMIMPL_IMUL_TWO X86_EFL_OF | X86_EFL_CF, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, , 0 1192 1312 IEMIMPL_IMUL_TWO X86_EFL_OF | X86_EFL_CF, 0, _intel, 1 … … 1290 1410 xadd [A0], T0_8 1291 1411 mov [A1], T0_8 1292 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0 1412 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, 0 1293 1413 EPILOGUE_3_ARGS 1294 1414 ENDPROC iemAImpl_xadd_u8 … … 1300 1420 xadd [A0], T0_16 1301 1421 mov [A1], T0_16 1302 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0 1422 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, 0 1303 1423 EPILOGUE_3_ARGS 1304 1424 ENDPROC iemAImpl_xadd_u16 … … 1310 1430 xadd [A0], T0_32 1311 1431 mov [A1], T0_32 1312 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0 1432 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, 0 1313 1433 EPILOGUE_3_ARGS 1314 1434 ENDPROC iemAImpl_xadd_u32 … … 1321 1441 xadd [A0], T0 1322 1442 mov [A1], T0 1323 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0 1443 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, 0 1324 1444 EPILOGUE_3_ARGS 1325 1445 ENDPROC iemAImpl_xadd_u64 … … 1332 1452 lock xadd [A0], T0_8 1333 1453 mov [A1], T0_8 1334 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0 1454 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, 0 1335 1455 EPILOGUE_3_ARGS 1336 1456 ENDPROC iemAImpl_xadd_u8_locked … … 1342 1462 lock xadd [A0], T0_16 1343 1463 mov [A1], T0_16 1344 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0 1464 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, 0 1345 1465 EPILOGUE_3_ARGS 1346 1466 ENDPROC iemAImpl_xadd_u16_locked … … 1352 1472 lock xadd [A0], T0_32 1353 1473 mov [A1], T0_32 1354 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0 1474 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, 0 1355 1475 EPILOGUE_3_ARGS 1356 1476 ENDPROC iemAImpl_xadd_u32_locked … … 1363 1483 lock xadd [A0], T0 1364 1484 mov [A1], T0 1365 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0 1485 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, 0 1366 1486 EPILOGUE_3_ARGS 1367 1487 ENDPROC iemAImpl_xadd_u64_locked … … 1402 1522 mov [r11], eax 1403 1523 mov [r11 + 4], edx 1404 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)1524 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0, 0 ; clobbers T0+T1 (eax, r11) 1405 1525 1406 1526 pop rbx … … 1422 1542 mov [rsi], eax 1423 1543 mov [rsi + 4], edx 1424 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)1544 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0, 0 ; clobbers T0+T1 (eax, r11) 1425 1545 1426 1546 pop rbx … … 1449 1569 mov [esi], eax 1450 1570 mov [esi + 4], edx 1451 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)1571 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0, 0 ; clobbers T0+T1 (eax, edi) 1452 1572 1453 1573 pop ebp … … 1477 1597 mov [r11], eax 1478 1598 mov [r11 + 4], edx 1479 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)1599 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0, 0 ; clobbers T0+T1 (eax, r11) 1480 1600 1481 1601 pop rbx … … 1497 1617 mov [rsi], eax 1498 1618 mov [rsi + 4], edx 1499 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)1619 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0, 0 ; clobbers T0+T1 (eax, r11) 1500 1620 1501 1621 pop rbx … … 1524 1644 mov [esi], eax 1525 1645 mov [esi + 4], edx 1526 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)1646 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0, 0 ; clobbers T0+T1 (eax, edi) 1527 1647 1528 1648 pop ebp … … 1568 1688 mov [r11], rax 1569 1689 mov [r11 + 8], rdx 1570 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)1690 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0, 0 ; clobbers T0+T1 (eax, r11) 1571 1691 1572 1692 pop rbx … … 1588 1708 mov [rsi], rax 1589 1709 mov [rsi + 8], rdx 1590 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)1710 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0, 0 ; clobbers T0+T1 (eax, r11) 1591 1711 1592 1712 pop rbx … … 1613 1733 mov [r11], rax 1614 1734 mov [r11 + 8], rdx 1615 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)1735 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0, 0 ; clobbers T0+T1 (eax, r11) 1616 1736 1617 1737 pop rbx … … 1633 1753 mov [rsi], rax 1634 1754 mov [rsi + 8], rdx 1635 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)1755 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0, 0 ; clobbers T0+T1 (eax, r11) 1636 1756 1637 1757 pop rbx … … 1660 1780 %1 cmpxchg [A0], A2_8 1661 1781 mov [A1], al 1662 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)1782 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0, 0 ; clobbers T0+T1 (eax, r11/edi) 1663 1783 EPILOGUE_4_ARGS 1664 1784 ENDPROC iemAImpl_cmpxchg_u8 %+ %2 … … 1670 1790 %1 cmpxchg [A0], A2_16 1671 1791 mov [A1], ax 1672 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)1792 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0, 0 ; clobbers T0+T1 (eax, r11/edi) 1673 1793 EPILOGUE_4_ARGS 1674 1794 ENDPROC iemAImpl_cmpxchg_u16 %+ %2 … … 1680 1800 %1 cmpxchg [A0], A2_32 1681 1801 mov [A1], eax 1682 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)1802 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0, 0 ; clobbers T0+T1 (eax, r11/edi) 1683 1803 EPILOGUE_4_ARGS 1684 1804 ENDPROC iemAImpl_cmpxchg_u32 %+ %2 … … 1691 1811 %1 cmpxchg [A0], A2 1692 1812 mov [A1], rax 1693 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)1813 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0, 0 ; clobbers T0+T1 (eax, r11/edi) 1694 1814 EPILOGUE_4_ARGS 1695 1815 %else … … 1722 1842 mov [esi], eax 1723 1843 mov [esi + 4], edx 1724 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, edi)1844 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0, 0 ; clobbers T0+T1 (eax, edi) 1725 1845 1726 1846 pop ebp … … 1742 1862 IEMIMPL_CMPXCHG , , 1743 1863 IEMIMPL_CMPXCHG lock, _locked 1864 1865 1744 1866 1745 1867 ;; … … 1763 1885 IEM_MAYBE_LOAD_FLAGS A1, %2, %3, 0 1764 1886 %1 byte [A0] 1765 IEM_SAVE_FLAGS A1, %2, %3 1887 IEM_SAVE_FLAGS A1, %2, %3, 0 1766 1888 EPILOGUE_2_ARGS 1767 1889 ENDPROC iemAImpl_ %+ %1 %+ _u8 … … 1771 1893 IEM_MAYBE_LOAD_FLAGS A1, %2, %3, 0 1772 1894 lock %1 byte [A0] 1773 IEM_SAVE_FLAGS A1, %2, %3 1895 IEM_SAVE_FLAGS A1, %2, %3, 0 1774 1896 EPILOGUE_2_ARGS 1775 1897 ENDPROC iemAImpl_ %+ %1 %+ _u8_locked … … 1779 1901 IEM_MAYBE_LOAD_FLAGS A1, %2, %3, 0 1780 1902 %1 word [A0] 1781 IEM_SAVE_FLAGS A1, %2, %3 1903 IEM_SAVE_FLAGS A1, %2, %3, 0 1782 1904 EPILOGUE_2_ARGS 1783 1905 ENDPROC iemAImpl_ %+ %1 %+ _u16 … … 1787 1909 IEM_MAYBE_LOAD_FLAGS A1, %2, %3, 0 1788 1910 lock %1 word [A0] 1789 IEM_SAVE_FLAGS A1, %2, %3 1911 IEM_SAVE_FLAGS A1, %2, %3, 0 1790 1912 EPILOGUE_2_ARGS 1791 1913 ENDPROC iemAImpl_ %+ %1 %+ _u16_locked … … 1795 1917 IEM_MAYBE_LOAD_FLAGS A1, %2, %3, 0 1796 1918 %1 dword [A0] 1797 IEM_SAVE_FLAGS A1, %2, %3 1919 IEM_SAVE_FLAGS A1, %2, %3, 0 1798 1920 EPILOGUE_2_ARGS 1799 1921 ENDPROC iemAImpl_ %+ %1 %+ _u32 … … 1803 1925 IEM_MAYBE_LOAD_FLAGS A1, %2, %3, 0 1804 1926 lock %1 dword [A0] 1805 IEM_SAVE_FLAGS A1, %2, %3 1927 IEM_SAVE_FLAGS A1, %2, %3, 0 1806 1928 EPILOGUE_2_ARGS 1807 1929 ENDPROC iemAImpl_ %+ %1 %+ _u32_locked … … 1812 1934 IEM_MAYBE_LOAD_FLAGS A1, %2, %3, 0 1813 1935 %1 qword [A0] 1814 IEM_SAVE_FLAGS A1, %2, %3 1936 IEM_SAVE_FLAGS A1, %2, %3, 0 1815 1937 EPILOGUE_2_ARGS 1816 1938 ENDPROC iemAImpl_ %+ %1 %+ _u64 … … 1820 1942 IEM_MAYBE_LOAD_FLAGS A1, %2, %3, 0 1821 1943 lock %1 qword [A0] 1822 IEM_SAVE_FLAGS A1, %2, %3 1944 IEM_SAVE_FLAGS A1, %2, %3, 0 1823 1945 EPILOGUE_2_ARGS 1824 1946 ENDPROC iemAImpl_ %+ %1 %+ _u64_locked … … 1906 2028 %1 byte [A1], cl 1907 2029 %endif 1908 IEM_SAVE_FLAGS A2, %2, %3 2030 IEM_SAVE_FLAGS A2, %2, %3, 0 2031 .zero_shift: 1909 2032 EPILOGUE_3_ARGS 1910 2033 ENDPROC iemAImpl_ %+ %1 %+ _u8 … … 1920 2043 %1 word [A1], cl 1921 2044 %endif 1922 IEM_SAVE_FLAGS A2, %2, %3 2045 IEM_SAVE_FLAGS A2, %2, %3, 0 1923 2046 EPILOGUE_3_ARGS 1924 2047 ENDPROC iemAImpl_ %+ %1 %+ _u16 … … 1934 2057 %1 dword [A1], cl 1935 2058 %endif 1936 IEM_SAVE_FLAGS A2, %2, %3 2059 IEM_SAVE_FLAGS A2, %2, %3, 0 1937 2060 EPILOGUE_3_ARGS 1938 2061 ENDPROC iemAImpl_ %+ %1 %+ _u32 … … 1949 2072 %1 qword [A1], cl 1950 2073 %endif 1951 IEM_SAVE_FLAGS A2, %2, %3 2074 IEM_SAVE_FLAGS A2, %2, %3, 0 1952 2075 EPILOGUE_3_ARGS 1953 2076 ENDPROC iemAImpl_ %+ %1 %+ _u64 … … 1956 2079 %endmacro 1957 2080 1958 ;; @todo some questions wrt flags when the shift count is high according to intel docs... 1959 IEMIMPL_SHIFT_OP rol, (X86_EFL_OF | X86_EFL_CF), 0, X86_EFL_CF 1960 IEMIMPL_SHIFT_OP ror, (X86_EFL_OF | X86_EFL_CF), 0, X86_EFL_CF 1961 IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF | X86_EFL_CF), 0, X86_EFL_CF 1962 IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF | X86_EFL_CF), 0, X86_EFL_CF 1963 IEMIMPL_SHIFT_OP shl, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF), 0 1964 IEMIMPL_SHIFT_OP shr, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF), 0 1965 IEMIMPL_SHIFT_OP sar, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF), 0 2081 ; These instructions will NOT modify flags if the masked shift count is zero 2082 ; (the mask is 0x3f for 64-bit instructions and 0x1f for the others). Thus, 2083 ; we have to force load all modified and undefined. 2084 IEMIMPL_SHIFT_OP rol, (X86_EFL_OF | X86_EFL_CF), 0, X86_EFL_CF | X86_EFL_OF 2085 IEMIMPL_SHIFT_OP ror, (X86_EFL_OF | X86_EFL_CF), 0, X86_EFL_CF | X86_EFL_OF 2086 IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF | X86_EFL_CF), 0, X86_EFL_CF | X86_EFL_OF 2087 IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF | X86_EFL_CF), 0, X86_EFL_CF | X86_EFL_OF 2088 IEMIMPL_SHIFT_OP shl, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF), X86_EFL_STATUS_BITS 2089 IEMIMPL_SHIFT_OP shr, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF), X86_EFL_STATUS_BITS 2090 IEMIMPL_SHIFT_OP sar, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF), X86_EFL_STATUS_BITS 1966 2091 1967 2092 … … 1978 2103 ; @param 2 The modified flags. 1979 2104 ; @param 3 The undefined flags. 2105 ; @param 4 The force loaded flags. 1980 2106 ; 1981 2107 ; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments. … … 1983 2109 ; @note the _intel and _amd variants are implemented in C. 1984 2110 ; 1985 %macro IEMIMPL_SHIFT_DBL_OP 32111 %macro IEMIMPL_SHIFT_DBL_OP 4 1986 2112 BEGINCODE 1987 2113 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16 1988 2114 PROLOGUE_4_ARGS 1989 IEM_MAYBE_LOAD_FLAGS A3, %2, %3 2115 ;IEM_LOAD_FLAGS A3, %4, %3 2116 IEM_MAYBE_LOAD_FLAGS A3, %2, %3, %4 1990 2117 %ifdef ASM_CALL64_GCC 1991 2118 xchg A3, A2 … … 1996 2123 %1 [A2], A1_16, cl 1997 2124 %endif 1998 IEM_SAVE_FLAGS A3, %2, %32125 IEM_SAVE_FLAGS A3, %2, %3, 0 1999 2126 EPILOGUE_4_ARGS 2000 2127 ENDPROC iemAImpl_ %+ %1 %+ _u16 … … 2002 2129 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16 2003 2130 PROLOGUE_4_ARGS 2004 IEM_MAYBE_LOAD_FLAGS A3, %2, %3 2131 ;IEM_LOAD_FLAGS A3, %4, %3 2132 IEM_MAYBE_LOAD_FLAGS A3, %2, %3, %4 2005 2133 %ifdef ASM_CALL64_GCC 2006 2134 xchg A3, A2 … … 2011 2139 %1 [A2], A1_32, cl 2012 2140 %endif 2013 IEM_SAVE_FLAGS A3, %2, %32141 IEM_SAVE_FLAGS A3, %2, %3, 0 2014 2142 EPILOGUE_4_ARGS 2015 2143 ENDPROC iemAImpl_ %+ %1 %+ _u32 … … 2018 2146 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20 2019 2147 PROLOGUE_4_ARGS 2020 IEM_MAYBE_LOAD_FLAGS A3, %2, %3 2148 ;IEM_LOAD_FLAGS A3, %4, %3 2149 IEM_MAYBE_LOAD_FLAGS A3, %2, %3, %4 2021 2150 %ifdef ASM_CALL64_GCC 2022 2151 xchg A3, A2 … … 2027 2156 %1 [A2], A1, cl 2028 2157 %endif 2029 IEM_SAVE_FLAGS A3, %2, %32158 IEM_SAVE_FLAGS A3, %2, %3, 0 2030 2159 EPILOGUE_4_ARGS_EX 12 2031 2160 ENDPROC iemAImpl_ %+ %1 %+ _u64 … … 2034 2163 %endmacro 2035 2164 2036 IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF) 2037 IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF) 2165 ; These instructions will NOT modify flags if the masked shift count is zero 2166 ; (the mask is 0x3f for 64-bit instructions and 0x1f for the others). Thus, 2167 ; we have to force load all modified and undefined. 2168 IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF), X86_EFL_STATUS_BITS 2169 IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF), X86_EFL_STATUS_BITS 2038 2170 2039 2171 … … 2063 2195 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8 %+ %4, 12 2064 2196 PROLOGUE_3_ARGS 2065 IEM_MAYBE_LOAD_FLAGS A2, %2, %3 2197 IEM_MAYBE_LOAD_FLAGS A2, %2, %3, %3 ; Undefined flags may be passed thru (AMD) 2066 2198 mov al, [A0] 2067 2199 %1 A1_8 2068 2200 mov [A0], ax 2069 2201 %if %5 != 1 2070 IEM_SAVE_FLAGS A2, %2, %3 2202 IEM_SAVE_FLAGS A2, %2, %3, 0 2071 2203 %else 2072 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %2, X86_EFL_AF | X86_EFL_ZF, ax, 8, xAX 2204 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %2, X86_EFL_AF | X86_EFL_ZF, ax, 8, xAX ; intel 2073 2205 %endif 2074 2206 xor eax, eax … … 2078 2210 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16 %+ %4, 16 2079 2211 PROLOGUE_4_ARGS 2080 IEM_MAYBE_LOAD_FLAGS A3, %2, %3 2212 IEM_MAYBE_LOAD_FLAGS A3, %2, %3, %3 ; Undefined flags may be passed thru (AMD) 2081 2213 mov ax, [A0] 2082 2214 %ifdef ASM_CALL64_GCC … … 2091 2223 %endif 2092 2224 %if %5 != 1 2093 IEM_SAVE_FLAGS A3, %2, %3 2225 IEM_SAVE_FLAGS A3, %2, %3, 0 2094 2226 %else 2095 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A3, %2, X86_EFL_AF | X86_EFL_ZF, ax, 16, xAX 2227 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A3, %2, X86_EFL_AF | X86_EFL_ZF, ax, 16, xAX ; intel 2096 2228 %endif 2097 2229 xor eax, eax … … 2101 2233 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32 %+ %4, 16 2102 2234 PROLOGUE_4_ARGS 2103 IEM_MAYBE_LOAD_FLAGS A3, %2, %3 2235 IEM_MAYBE_LOAD_FLAGS A3, %2, %3, %3 ; Undefined flags may be passed thru (AMD) 2104 2236 mov eax, [A0] 2105 2237 %ifdef ASM_CALL64_GCC … … 2114 2246 %endif 2115 2247 %if %5 != 1 2116 IEM_SAVE_FLAGS A3, %2, %3 2248 IEM_SAVE_FLAGS A3, %2, %3, 0 2117 2249 %else 2118 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A3, %2, X86_EFL_AF | X86_EFL_ZF, eax, 32, xAX 2250 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A3, %2, X86_EFL_AF | X86_EFL_ZF, eax, 32, xAX ; intel 2119 2251 %endif 2120 2252 xor eax, eax … … 2125 2257 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64 %+ %4, 20 2126 2258 PROLOGUE_4_ARGS 2127 IEM_MAYBE_LOAD_FLAGS A3, %2, %3 2259 IEM_MAYBE_LOAD_FLAGS A3, %2, %3, %3 ; Undefined flags may be passed thru (AMD) 2128 2260 mov rax, [A0] 2129 2261 %ifdef ASM_CALL64_GCC … … 2138 2270 %endif 2139 2271 %if %5 != 1 2140 IEM_SAVE_FLAGS A3, %2, %3 2272 IEM_SAVE_FLAGS A3, %2, %3, 0 2141 2273 %else 2142 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A3, %2, X86_EFL_AF | X86_EFL_ZF, rax, 64, xAX 2274 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A3, %2, X86_EFL_AF | X86_EFL_ZF, rax, 64, xAX ; intel 2143 2275 %endif 2144 2276 xor eax, eax … … 2259 2391 %endif 2260 2392 2261 IEM_MAYBE_LOAD_FLAGS A2, %2, %3 2393 IEM_MAYBE_LOAD_FLAGS A2, %2, %3, %3 ; Undefined flags may be passed thru (Intel) 2262 2394 mov ax, [A0] 2263 2395 %1 A1_8 … … 2266 2398 IEM_ADJUST_FLAGS A2, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF, X86_EFL_AF 2267 2399 %else 2268 IEM_SAVE_FLAGS A2, %2, %3 2400 IEM_SAVE_FLAGS A2, %2, %3, 0 2269 2401 %endif 2270 2402 xor eax, eax … … 2325 2457 %endif 2326 2458 2327 IEM_MAYBE_LOAD_FLAGS A3, %2, %3 2459 IEM_MAYBE_LOAD_FLAGS A3, %2, %3, %3 ; Undefined flags may be passed thru (AMD) 2328 2460 %ifdef ASM_CALL64_GCC 2329 2461 mov T1, A2 … … 2344 2476 IEM_ADJUST_FLAGS A3, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF, X86_EFL_AF 2345 2477 %else 2346 IEM_SAVE_FLAGS A3, %2, %3 2478 IEM_SAVE_FLAGS A3, %2, %3, 0 2347 2479 %endif 2348 2480 xor eax, eax … … 2411 2543 %endif 2412 2544 2413 IEM_MAYBE_LOAD_FLAGS A3, %2, %3 2545 IEM_MAYBE_LOAD_FLAGS A3, %2, %3, %3 ; Undefined flags may be passed thru (AMD) 2414 2546 mov eax, [A0] 2415 2547 %ifdef ASM_CALL64_GCC … … 2431 2563 IEM_ADJUST_FLAGS A3, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF, X86_EFL_AF 2432 2564 %else 2433 IEM_SAVE_FLAGS A3, %2, %3 2565 IEM_SAVE_FLAGS A3, %2, %3, 0 2434 2566 %endif 2435 2567 xor eax, eax … … 2499 2631 %endif 2500 2632 2501 IEM_MAYBE_LOAD_FLAGS A3, %2, %3 2633 IEM_MAYBE_LOAD_FLAGS A3, %2, %3, %3 ; Undefined flags may be passed thru (AMD) 2502 2634 mov rax, [A0] 2503 2635 %ifdef ASM_CALL64_GCC … … 2519 2651 IEM_ADJUST_FLAGS A3, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF, X86_EFL_AF 2520 2652 %else 2521 IEM_SAVE_FLAGS A3, %2, %3 2653 IEM_SAVE_FLAGS A3, %2, %3, 0 2522 2654 %endif 2523 2655 xor eax, eax … … 4593 4725 movdqu xmm1, [A1] 4594 4726 ptest xmm0, xmm1 4595 IEM_SAVE_FLAGS A2, X86_EFL_ STATUS_BITS, 04727 IEM_SAVE_FLAGS A2, X86_EFL_ZF | X86_EFL_CF, 0, X86_EFL_OF | X86_EFL_AF | X86_EFL_PF | X86_EFL_SF 4596 4728 4597 4729 IEMIMPL_SSE_EPILOGUE … … 4606 4738 vmovdqu ymm1, [A1] 4607 4739 vptest ymm0, ymm1 4608 IEM_SAVE_FLAGS A2, X86_EFL_ STATUS_BITS, 04740 IEM_SAVE_FLAGS A2, X86_EFL_ZF | X86_EFL_CF, 0, X86_EFL_OF | X86_EFL_AF | X86_EFL_PF | X86_EFL_SF 4609 4741 4610 4742 IEMIMPL_SSE_EPILOGUE … … 5640 5772 call T1 5641 5773 5642 IEM_SAVE_FLAGS A1, X86_EFL_ STATUS_BITS, 05774 IEM_SAVE_FLAGS A1, X86_EFL_CF | X86_EFL_ZF | X86_EFL_SF | X86_EFL_OF, 0, X86_EFL_AF | X86_EFL_PF 5643 5775 mov [T2], ecx 5644 5776 … … 5687 5819 5688 5820 pop xDX 5689 IEM_SAVE_FLAGS A1, X86_EFL_ STATUS_BITS, 05821 IEM_SAVE_FLAGS A1, X86_EFL_CF | X86_EFL_ZF | X86_EFL_SF | X86_EFL_OF, 0, X86_EFL_AF | X86_EFL_PF 5690 5822 mov [T2], ecx 5691 5823 … … 5729 5861 call T1 5730 5862 5731 IEM_SAVE_FLAGS A1, X86_EFL_ STATUS_BITS, 05863 IEM_SAVE_FLAGS A1, X86_EFL_CF | X86_EFL_ZF | X86_EFL_SF | X86_EFL_OF, 0, X86_EFL_AF | X86_EFL_PF 5732 5864 movdqu [A0], xmm0 5733 5865 … … 5775 5907 5776 5908 pop xDX 5777 IEM_SAVE_FLAGS A1, X86_EFL_ STATUS_BITS, 05909 IEM_SAVE_FLAGS A1, X86_EFL_CF | X86_EFL_ZF | X86_EFL_SF | X86_EFL_OF, 0, X86_EFL_AF | X86_EFL_PF 5778 5910 movdqu [A0], xmm0 5779 5911 … … 6295 6427 movdqu xmm1, [A3] 6296 6428 ucomiss xmm0, xmm1 6297 IEM_SAVE_FLAGS A1, X86_EFL_ STATUS_BITS, 06429 IEM_SAVE_FLAGS A1, X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF, 0, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF 6298 6430 6299 6431 SSE_ST_FXSTATE_MXCSR_ONLY_NO_FXSTATE A0 … … 6310 6442 movdqu xmm1, [A3] 6311 6443 vucomiss xmm0, xmm1 6312 IEM_SAVE_FLAGS A1, X86_EFL_ STATUS_BITS, 06444 IEM_SAVE_FLAGS A1, X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF, 0, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF 6313 6445 6314 6446 SSE_ST_FXSTATE_MXCSR_ONLY_NO_FXSTATE A0 … … 6334 6466 movdqu xmm1, [A3] 6335 6467 ucomisd xmm0, xmm1 6336 IEM_SAVE_FLAGS A1, X86_EFL_ STATUS_BITS, 06468 IEM_SAVE_FLAGS A1, X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF, 0, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF 6337 6469 6338 6470 SSE_ST_FXSTATE_MXCSR_ONLY_NO_FXSTATE A0 … … 6349 6481 movdqu xmm1, [A3] 6350 6482 vucomisd xmm0, xmm1 6351 IEM_SAVE_FLAGS A1, X86_EFL_ STATUS_BITS, 06483 IEM_SAVE_FLAGS A1, X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF, 0, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF 6352 6484 6353 6485 SSE_ST_FXSTATE_MXCSR_ONLY_NO_FXSTATE A0 … … 6372 6504 movdqu xmm1, [A3] 6373 6505 comiss xmm0, xmm1 6374 IEM_SAVE_FLAGS A1, X86_EFL_ STATUS_BITS, 06506 IEM_SAVE_FLAGS A1, X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF, 0, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF 6375 6507 6376 6508 SSE_ST_FXSTATE_MXCSR_ONLY_NO_FXSTATE A0 … … 6387 6519 movdqu xmm1, [A3] 6388 6520 vcomiss xmm0, xmm1 6389 IEM_SAVE_FLAGS A1, X86_EFL_ STATUS_BITS, 06521 IEM_SAVE_FLAGS A1, X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF, 0, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF 6390 6522 6391 6523 SSE_ST_FXSTATE_MXCSR_ONLY_NO_FXSTATE A0 … … 6411 6543 movdqu xmm1, [A3] 6412 6544 comisd xmm0, xmm1 6413 IEM_SAVE_FLAGS A1, X86_EFL_ STATUS_BITS, 06545 IEM_SAVE_FLAGS A1, X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF, 0, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF 6414 6546 6415 6547 SSE_ST_FXSTATE_MXCSR_ONLY_NO_FXSTATE A0 … … 6426 6558 movdqu xmm1, [A3] 6427 6559 vcomisd xmm0, xmm1 6428 IEM_SAVE_FLAGS A1, X86_EFL_ STATUS_BITS, 06560 IEM_SAVE_FLAGS A1, X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF, 0, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF 6429 6561 6430 6562 SSE_ST_FXSTATE_MXCSR_ONLY_NO_FXSTATE A0 … … 6696 6828 %1 %2 6697 6829 mov [A0], %2 6698 IEM_SAVE_FLAGS A1, X86_EFL_ STATUS_BITS, 06830 IEM_SAVE_FLAGS A1, X86_EFL_CF, 0, X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF 6699 6831 6700 6832 EPILOGUE_2_ARGS … … 6789 6921 %1 A1_32, [A0] 6790 6922 mov [A0], A1_32 6791 IEM_SAVE_FLAGS A2, %2, 0 6923 IEM_SAVE_FLAGS A2, %2, 0, 0 6792 6924 6793 6925 EPILOGUE_4_ARGS … … 6809 6941 %1 A1, [A0] 6810 6942 mov [A0], A1 6811 IEM_SAVE_FLAGS A2, %2, 0 6943 IEM_SAVE_FLAGS A2, %2, 0, 0 6812 6944 6813 6945 EPILOGUE_4_ARGS -
trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp
r103909 r104051 1398 1398 # endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */ 1399 1399 1400 #endif /* !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) */ 1400 1401 1401 1402 /* … … 1430 1431 } while (0) 1431 1432 1432 1433 1433 /* 1434 1434 * BSF - first (least significant) bit set 1435 1435 */ 1436 #if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) 1436 1437 IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags)) 1437 1438 { 1438 1439 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU64(uSrc)); 1439 1440 } 1441 #endif 1440 1442 1441 1443 IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64_intel,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags)) … … 1449 1451 } 1450 1452 1451 # if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) 1452 1453 #if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) 1453 1454 IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags)) 1454 1455 { 1455 1456 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU32(uSrc)); 1456 1457 } 1458 #endif 1457 1459 1458 1460 IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32_intel,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags)) … … 1467 1469 1468 1470 1471 #if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) 1469 1472 IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags)) 1470 1473 { 1471 1474 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU16(uSrc)); 1472 1475 } 1476 #endif 1473 1477 1474 1478 IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16_intel,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags)) … … 1482 1486 } 1483 1487 1484 # endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */1485 1488 1486 1489 … … 1488 1491 * BSR - last (most significant) bit set 1489 1492 */ 1493 #if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) 1490 1494 IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags)) 1491 1495 { 1492 1496 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU64(uSrc)); 1493 1497 } 1498 #endif 1494 1499 1495 1500 IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64_intel,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags)) … … 1503 1508 } 1504 1509 1505 # if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) 1506 1510 1511 #if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) 1507 1512 IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags)) 1508 1513 { 1509 1514 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU32(uSrc)); 1510 1515 } 1516 #endif 1511 1517 1512 1518 IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32_intel,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags)) … … 1521 1527 1522 1528 1529 #if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) 1523 1530 IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags)) 1524 1531 { 1525 1532 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU16(uSrc)); 1526 1533 } 1534 #endif 1527 1535 1528 1536 IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16_intel,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags)) … … 1535 1543 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitLastSetU16(uSrc)); 1536 1544 } 1537 1538 # endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */1539 1545 1540 1546 … … 1569 1575 * LZCNT - count leading zero bits. 1570 1576 */ 1577 #if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) 1571 1578 IEM_DECL_IMPL_DEF(void, iemAImpl_lzcnt_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags)) 1572 1579 { 1573 1580 iemAImpl_lzcnt_u64_intel(puDst, uSrc, pfEFlags); 1574 1581 } 1582 #endif 1575 1583 1576 1584 IEM_DECL_IMPL_DEF(void, iemAImpl_lzcnt_u64_intel,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags)) … … 1584 1592 } 1585 1593 1586 # if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) 1587 1594 1595 #if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) 1588 1596 IEM_DECL_IMPL_DEF(void, iemAImpl_lzcnt_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags)) 1589 1597 { 1590 1598 iemAImpl_lzcnt_u32_intel(puDst, uSrc, pfEFlags); 1591 1599 } 1600 #endif 1592 1601 1593 1602 IEM_DECL_IMPL_DEF(void, iemAImpl_lzcnt_u32_intel,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags)) … … 1602 1611 1603 1612 1613 #if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) 1604 1614 IEM_DECL_IMPL_DEF(void, iemAImpl_lzcnt_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags)) 1605 1615 { 1606 1616 iemAImpl_lzcnt_u16_intel(puDst, uSrc, pfEFlags); 1607 1617 } 1618 #endif 1608 1619 1609 1620 IEM_DECL_IMPL_DEF(void, iemAImpl_lzcnt_u16_intel,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags)) … … 1616 1627 SET_BIT_CNT_SEARCH_RESULT_AMD(puDst, uSrc, pfEFlags, ASMCountLeadingZerosU16(uSrc)); 1617 1628 } 1618 1619 # endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */1620 1629 1621 1630 … … 1623 1632 * TZCNT - count leading zero bits. 1624 1633 */ 1634 #if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) 1625 1635 IEM_DECL_IMPL_DEF(void, iemAImpl_tzcnt_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags)) 1626 1636 { 1627 1637 iemAImpl_tzcnt_u64_intel(puDst, uSrc, pfEFlags); 1628 1638 } 1639 #endif 1629 1640 1630 1641 IEM_DECL_IMPL_DEF(void, iemAImpl_tzcnt_u64_intel,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags)) … … 1638 1649 } 1639 1650 1640 # if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) 1641 1651 1652 #if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) 1642 1653 IEM_DECL_IMPL_DEF(void, iemAImpl_tzcnt_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags)) 1643 1654 { 1644 1655 iemAImpl_tzcnt_u32_intel(puDst, uSrc, pfEFlags); 1645 1656 } 1657 #endif 1646 1658 1647 1659 IEM_DECL_IMPL_DEF(void, iemAImpl_tzcnt_u32_intel,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags)) … … 1656 1668 1657 1669 1670 #if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) 1658 1671 IEM_DECL_IMPL_DEF(void, iemAImpl_tzcnt_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags)) 1659 1672 { 1660 1673 iemAImpl_tzcnt_u16_intel(puDst, uSrc, pfEFlags); 1661 1674 } 1675 #endif 1662 1676 1663 1677 IEM_DECL_IMPL_DEF(void, iemAImpl_tzcnt_u16_intel,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags)) … … 1671 1685 } 1672 1686 1673 # endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */ 1674 #endif /* !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) */ 1687 1675 1688 1676 1689 /* -
trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp
r103100 r104051 2250 2250 } 2251 2251 2252 2253 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t *, PRTUINT64U, PRTUINT64U, uint32_t *)); 2254 2255 static uint64_t CmpXchg8bBench(uint32_t cIterations, FNIEMAIMPLCMPXCHG8B *pfn, uint64_t const uDstValue, 2256 uint64_t const uOldValue, uint64_t const uNewValue, uint32_t const fEflIn) 2257 { 2258 cIterations /= 4; 2259 RTThreadYield(); 2260 uint64_t const nsStart = RTTimeNanoTS(); 2261 for (uint32_t i = 0; i < cIterations; i++) 2262 { 2263 RTUINT64U uA, uB; 2264 uint32_t fEfl = fEflIn; 2265 uint64_t uDst = uDstValue; 2266 uB.u = uNewValue; 2267 uA.u = uOldValue; 2268 pfn(&uDst, &uA, &uB, &fEfl); 2269 2270 fEfl = fEflIn; 2271 uDst = uDstValue; 2272 uB.u = uNewValue; 2273 uA.u = uOldValue; 2274 pfn(&uDst, &uA, &uB, &fEfl); 2275 2276 fEfl = fEflIn; 2277 uDst = uDstValue; 2278 uB.u = uNewValue; 2279 uA.u = uOldValue; 2280 pfn(&uDst, &uA, &uB, &fEfl); 2281 2282 fEfl = fEflIn; 2283 uDst = uDstValue; 2284 uB.u = uNewValue; 2285 uA.u = uOldValue; 2286 pfn(&uDst, &uA, &uB, &fEfl); 2287 } 2288 return RTTimeNanoTS() - nsStart; 2289 } 2290 2252 2291 static void CmpXchg8bTest(void) 2253 2292 { 2254 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t *, PRTUINT64U, PRTUINT64U, uint32_t *));2255 2293 static struct 2256 2294 { … … 2303 2341 (fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF)); 2304 2342 RTTEST_CHECK(g_hTest, uB.u == uNewValue); 2343 2344 if (iTest == 2 && g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) 2345 { 2346 uint32_t cIterations = EstimateIterations(_64K, CmpXchg8bBench(_64K, s_aFuncs[iFn].pfn, 2347 uOldValue, uOldValue, uNewValue, fEflIn)); 2348 uint64_t cNsRealRun = CmpXchg8bBench(cIterations, s_aFuncs[iFn].pfn, uOldValue, uOldValue, uNewValue, fEflIn); 2349 RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, 2350 "%s-positive", s_aFuncs[iFn].pszName); 2351 2352 cIterations = EstimateIterations(_64K, CmpXchg8bBench(_64K, s_aFuncs[iFn].pfn, 2353 ~uOldValue, uOldValue, uNewValue, fEflIn)); 2354 cNsRealRun = CmpXchg8bBench(cIterations, s_aFuncs[iFn].pfn, ~uOldValue, uOldValue, uNewValue, fEflIn); 2355 RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, 2356 "%s-negative", s_aFuncs[iFn].pszName); 2357 } 2305 2358 } 2306 2359 } … … 2423 2476 #endif 2424 2477 2425 #define TEST_SHIFT_DBL(a_cBits, a_ Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \2478 #define TEST_SHIFT_DBL(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \ 2426 2479 TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \ 2427 2480 \ … … 2435 2488 \ 2436 2489 GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \ 2490 \ 2491 static uint64_t ShiftDblU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn, a_TestType const *pEntry) \ 2492 { \ 2493 uint32_t const fEflIn = pEntry->fEflIn; \ 2494 a_uType const uDstIn = pEntry->uDstIn; \ 2495 a_uType const uSrcIn = pEntry->uSrcIn; \ 2496 a_uType const cShift = pEntry->uMisc; \ 2497 cIterations /= 4; \ 2498 RTThreadYield(); \ 2499 uint64_t const nsStart = RTTimeNanoTS(); \ 2500 for (uint32_t i = 0; i < cIterations; i++) \ 2501 { \ 2502 uint32_t fBenchEfl = fEflIn; \ 2503 a_uType uBenchDst = uDstIn; \ 2504 pfn(&uBenchDst, uSrcIn, cShift, &fBenchEfl); \ 2505 \ 2506 fBenchEfl = fEflIn; \ 2507 uBenchDst = uDstIn; \ 2508 pfn(&uBenchDst, uSrcIn, cShift, &fBenchEfl); \ 2509 \ 2510 fBenchEfl = fEflIn; \ 2511 uBenchDst = uDstIn; \ 2512 pfn(&uBenchDst, uSrcIn, cShift, &fBenchEfl); \ 2513 \ 2514 fBenchEfl = fEflIn; \ 2515 uBenchDst = uDstIn; \ 2516 pfn(&uBenchDst, uSrcIn, cShift, &fBenchEfl); \ 2517 } \ 2518 return RTTimeNanoTS() - nsStart; \ 2519 } \ 2437 2520 \ 2438 2521 static void ShiftDblU ## a_cBits ## Test(void) \ … … 2452 2535 { \ 2453 2536 uint32_t fEfl = paTests[iTest].fEflIn; \ 2454 a_ TypeuDst = paTests[iTest].uDstIn; \2537 a_uType uDst = paTests[iTest].uDstIn; \ 2455 2538 pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \ 2456 2539 if ( uDst != paTests[iTest].uDstOut \ … … 2470 2553 } \ 2471 2554 } \ 2555 \ 2556 /* Benchmark if all succeeded. */ \ 2557 if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \ 2558 { \ 2559 uint32_t const iTest = cTests / 2; \ 2560 uint32_t const cIterations = EstimateIterations(_64K, ShiftDblU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \ 2561 uint64_t const cNsRealRun = ShiftDblU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \ 2562 RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, \ 2563 "%s%s", a_aSubTests[iFn].pszName, iVar ? "-native" : ""); \ 2564 } \ 2565 \ 2566 /* Next variation is native. */ \ 2472 2567 pfn = a_aSubTests[iFn].pfnNative; \ 2473 2568 } \ … … 2543 2638 #endif 2544 2639 2545 #define TEST_UNARY(a_cBits, a_ Type, a_Fmt, a_TestType, a_SubTestType) \2640 #define TEST_UNARY(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \ 2546 2641 TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \ 2547 static a_SubTestType g_aUnaryU ## a_cBits[] = \2642 static a_SubTestType a_aSubTests[] = \ 2548 2643 { \ 2549 2644 ENTRY_BIN(inc_u ## a_cBits), \ … … 2557 2652 }; \ 2558 2653 \ 2559 GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \ 2654 GEN_UNARY(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType) \ 2655 \ 2656 static uint64_t UnaryU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLUNARYU ## a_cBits pfn, a_TestType const *pEntry) \ 2657 { \ 2658 uint32_t const fEflIn = pEntry->fEflIn; \ 2659 a_uType const uDstIn = pEntry->uDstIn; \ 2660 cIterations /= 4; \ 2661 RTThreadYield(); \ 2662 uint64_t const nsStart = RTTimeNanoTS(); \ 2663 for (uint32_t i = 0; i < cIterations; i++) \ 2664 { \ 2665 uint32_t fBenchEfl = fEflIn; \ 2666 a_uType uBenchDst = uDstIn; \ 2667 pfn(&uBenchDst, &fBenchEfl); \ 2668 \ 2669 fBenchEfl = fEflIn; \ 2670 uBenchDst = uDstIn; \ 2671 pfn(&uBenchDst, &fBenchEfl); \ 2672 \ 2673 fBenchEfl = fEflIn; \ 2674 uBenchDst = uDstIn; \ 2675 pfn(&uBenchDst, &fBenchEfl); \ 2676 \ 2677 fBenchEfl = fEflIn; \ 2678 uBenchDst = uDstIn; \ 2679 pfn(&uBenchDst, &fBenchEfl); \ 2680 } \ 2681 return RTTimeNanoTS() - nsStart; \ 2682 } \ 2560 2683 \ 2561 2684 static void UnaryU ## a_cBits ## Test(void) \ 2562 2685 { \ 2563 for (size_t iFn = 0; iFn < RT_ELEMENTS( g_aUnaryU ## a_cBits); iFn++) \2686 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \ 2564 2687 { \ 2565 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS( g_aUnaryU ## a_cBits[iFn])) \2688 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \ 2566 2689 continue; \ 2567 a_TestType const * const paTests = g_aUnaryU ## a_cBits[iFn].paTests; \ 2568 uint32_t const cTests = g_aUnaryU ## a_cBits[iFn].cTests; \ 2690 PFNIEMAIMPLUNARYU ## a_cBits const pfn = a_aSubTests[iFn].pfn; \ 2691 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \ 2692 uint32_t const cTests = a_aSubTests[iFn].cTests; \ 2569 2693 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \ 2570 2694 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \ 2571 2695 { \ 2572 2696 uint32_t fEfl = paTests[iTest].fEflIn; \ 2573 a_ TypeuDst = paTests[iTest].uDstIn; \2574 g_aUnaryU ## a_cBits[iFn].pfn(&uDst, &fEfl); \2697 a_uType uDst = paTests[iTest].uDstIn; \ 2698 pfn(&uDst, &fEfl); \ 2575 2699 if ( uDst != paTests[iTest].uDstOut \ 2576 2700 || fEfl != paTests[iTest].fEflOut) \ … … 2583 2707 *g_pu ## a_cBits = paTests[iTest].uDstIn; \ 2584 2708 *g_pfEfl = paTests[iTest].fEflIn; \ 2585 g_aUnaryU ## a_cBits[iFn].pfn(g_pu ## a_cBits, g_pfEfl); \2709 pfn(g_pu ## a_cBits, g_pfEfl); \ 2586 2710 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \ 2587 2711 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \ 2588 2712 } \ 2589 2713 } \ 2590 FREE_DECOMPRESSED_TESTS(g_aUnaryU ## a_cBits[iFn]); \ 2714 \ 2715 if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \ 2716 { \ 2717 uint32_t const iTest = cTests / 2; \ 2718 uint32_t const cIterations = EstimateIterations(_64K, UnaryU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \ 2719 uint64_t const cNsRealRun = UnaryU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \ 2720 RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, "%s", a_aSubTests[iFn].pszName); \ 2721 } \ 2722 \ 2723 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \ 2591 2724 } \ 2592 2725 } 2593 TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T )2594 TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T )2595 TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T )2596 TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T )2726 TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T, g_aUnaryU8) 2727 TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T, g_aUnaryU16) 2728 TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T, g_aUnaryU32) 2729 TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T, g_aUnaryU64) 2597 2730 2598 2731 #ifdef TSTIEMAIMPL_WITH_GENERATOR … … 2674 2807 #endif 2675 2808 2676 #define TEST_SHIFT(a_cBits, a_ Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \2809 #define TEST_SHIFT(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \ 2677 2810 TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \ 2678 2811 static a_SubTestType a_aSubTests[] = \ … … 2685 2818 ENTRY_BIN_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \ 2686 2819 ENTRY_BIN_AMD( rcr_u ## a_cBits, X86_EFL_OF), \ 2687 ENTRY_BIN_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \2820 ENTRY_BIN_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \ 2688 2821 ENTRY_BIN_AMD( shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \ 2689 2822 ENTRY_BIN_INTEL(shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \ … … 2695 2828 \ 2696 2829 GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \ 2830 \ 2831 static uint64_t ShiftU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLSHIFTU ## a_cBits pfn, a_TestType const *pEntry) \ 2832 { \ 2833 uint32_t const fEflIn = pEntry->fEflIn; \ 2834 a_uType const uDstIn = pEntry->uDstIn; \ 2835 a_uType const cShift = pEntry->uMisc; \ 2836 cIterations /= 4; \ 2837 RTThreadYield(); \ 2838 uint64_t const nsStart = RTTimeNanoTS(); \ 2839 for (uint32_t i = 0; i < cIterations; i++) \ 2840 { \ 2841 uint32_t fBenchEfl = fEflIn; \ 2842 a_uType uBenchDst = uDstIn; \ 2843 pfn(&uBenchDst, cShift, &fBenchEfl); \ 2844 \ 2845 fBenchEfl = fEflIn; \ 2846 uBenchDst = uDstIn; \ 2847 pfn(&uBenchDst, cShift, &fBenchEfl); \ 2848 \ 2849 fBenchEfl = fEflIn; \ 2850 uBenchDst = uDstIn; \ 2851 pfn(&uBenchDst, cShift, &fBenchEfl); \ 2852 \ 2853 fBenchEfl = fEflIn; \ 2854 uBenchDst = uDstIn; \ 2855 pfn(&uBenchDst, cShift, &fBenchEfl); \ 2856 } \ 2857 return RTTimeNanoTS() - nsStart; \ 2858 } \ 2697 2859 \ 2698 2860 static void ShiftU ## a_cBits ## Test(void) \ … … 2712 2874 { \ 2713 2875 uint32_t fEfl = paTests[iTest].fEflIn; \ 2714 a_ TypeuDst = paTests[iTest].uDstIn; \2876 a_uType uDst = paTests[iTest].uDstIn; \ 2715 2877 pfn(&uDst, paTests[iTest].uMisc, &fEfl); \ 2716 2878 if ( uDst != paTests[iTest].uDstOut \ … … 2730 2892 } \ 2731 2893 } \ 2894 \ 2895 /* Benchmark if all succeeded. */ \ 2896 if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \ 2897 { \ 2898 uint32_t const iTest = cTests / 2; \ 2899 uint32_t const cIterations = EstimateIterations(_64K, ShiftU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \ 2900 uint64_t const cNsRealRun = ShiftU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \ 2901 RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, \ 2902 "%s%s", a_aSubTests[iFn].pszName, iVar ? "-native" : ""); \ 2903 } \ 2904 \ 2905 /* Next variation is native. */ \ 2732 2906 pfn = a_aSubTests[iFn].pfnNative; \ 2733 2907 } \ … … 2853 3027 #endif 2854 3028 3029 static uint64_t MulDivU8Bench(uint32_t cIterations, PFNIEMAIMPLMULDIVU8 pfn, MULDIVU8_TEST_T const *pEntry) 3030 { 3031 uint32_t const fEflIn = pEntry->fEflIn; 3032 uint16_t const uDstIn = pEntry->uDstIn; 3033 uint8_t const uSrcIn = pEntry->uSrcIn; 3034 cIterations /= 4; 3035 RTThreadYield(); 3036 uint64_t const nsStart = RTTimeNanoTS(); 3037 for (uint32_t i = 0; i < cIterations; i++) 3038 { 3039 uint32_t fBenchEfl = fEflIn; 3040 uint16_t uBenchDst = uDstIn; 3041 pfn(&uBenchDst, uSrcIn, &fBenchEfl); 3042 3043 fBenchEfl = fEflIn; 3044 uBenchDst = uDstIn; 3045 pfn(&uBenchDst, uSrcIn, &fBenchEfl); 3046 3047 fBenchEfl = fEflIn; 3048 uBenchDst = uDstIn; 3049 pfn(&uBenchDst, uSrcIn, &fBenchEfl); 3050 3051 fBenchEfl = fEflIn; 3052 uBenchDst = uDstIn; 3053 pfn(&uBenchDst, uSrcIn, &fBenchEfl); 3054 } 3055 return RTTimeNanoTS() - nsStart; 3056 } 3057 2855 3058 static void MulDivU8Test(void) 2856 3059 { 2857 3060 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++) 2858 3061 { 2859 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aMulDivU8[iFn])) \2860 continue; \3062 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aMulDivU8[iFn])) 3063 continue; 2861 3064 MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests; 2862 3065 uint32_t const cTests = g_aMulDivU8[iFn].cTests; 2863 3066 uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra; 2864 3067 PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn; 2865 uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); \3068 uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); 2866 3069 if (!cTests) RTTestSkipped(g_hTest, "no tests"); 2867 3070 for (uint32_t iVar = 0; iVar < cVars; iVar++) … … 2892 3095 } 2893 3096 } 3097 3098 /* Benchmark if all succeeded. */ 3099 if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) 3100 { 3101 uint32_t const iTest = cTests / 2; 3102 uint32_t const cIterations = EstimateIterations(_64K, MulDivU8Bench(_64K, pfn, &paTests[iTest])); 3103 uint64_t const cNsRealRun = MulDivU8Bench(cIterations, pfn, &paTests[iTest]); 3104 RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, 3105 "%s%s", g_aMulDivU8[iFn].pszName, iVar ? "-native" : ""); 3106 } 3107 3108 /* Next variation is native. */ 2894 3109 pfn = g_aMulDivU8[iFn].pfnNative; 2895 3110 } 2896 FREE_DECOMPRESSED_TESTS(g_aMulDivU8[iFn]); \3111 FREE_DECOMPRESSED_TESTS(g_aMulDivU8[iFn]); 2897 3112 } 2898 3113 } … … 2986 3201 #endif 2987 3202 2988 #define TEST_MULDIV(a_cBits, a_ Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \3203 #define TEST_MULDIV(a_cBits, a_uType, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \ 2989 3204 TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \ 2990 3205 static a_SubTestType a_aSubTests [] = \ … … 3001 3216 \ 3002 3217 GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \ 3218 \ 3219 static uint64_t MulDivU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLMULDIVU ## a_cBits pfn, a_TestType const *pEntry) \ 3220 { \ 3221 uint32_t const fEflIn = pEntry->fEflIn; \ 3222 a_uType const uDst1In = pEntry->uDst1In; \ 3223 a_uType const uDst2In = pEntry->uDst2In; \ 3224 a_uType const uSrcIn = pEntry->uSrcIn; \ 3225 cIterations /= 4; \ 3226 RTThreadYield(); \ 3227 uint64_t const nsStart = RTTimeNanoTS(); \ 3228 for (uint32_t i = 0; i < cIterations; i++) \ 3229 { \ 3230 uint32_t fBenchEfl = fEflIn; \ 3231 a_uType uBenchDst1 = uDst1In; \ 3232 a_uType uBenchDst2 = uDst2In; \ 3233 pfn(&uBenchDst1, &uBenchDst2, uSrcIn, &fBenchEfl); \ 3234 \ 3235 fBenchEfl = fEflIn; \ 3236 uBenchDst1 = uDst1In; \ 3237 uBenchDst2 = uDst2In; \ 3238 pfn(&uBenchDst1, &uBenchDst2, uSrcIn, &fBenchEfl); \ 3239 \ 3240 fBenchEfl = fEflIn; \ 3241 uBenchDst1 = uDst1In; \ 3242 uBenchDst2 = uDst2In; \ 3243 pfn(&uBenchDst1, &uBenchDst2, uSrcIn, &fBenchEfl); \ 3244 \ 3245 fBenchEfl = fEflIn; \ 3246 uBenchDst1 = uDst1In; \ 3247 uBenchDst2 = uDst2In; \ 3248 pfn(&uBenchDst1, &uBenchDst2, uSrcIn, &fBenchEfl); \ 3249 } \ 3250 return RTTimeNanoTS() - nsStart; \ 3251 } \ 3003 3252 \ 3004 3253 static void MulDivU ## a_cBits ## Test(void) \ … … 3019 3268 { \ 3020 3269 uint32_t fEfl = paTests[iTest].fEflIn; \ 3021 a_ TypeuDst1 = paTests[iTest].uDst1In; \3022 a_ TypeuDst2 = paTests[iTest].uDst2In; \3270 a_uType uDst1 = paTests[iTest].uDst1In; \ 3271 a_uType uDst2 = paTests[iTest].uDst2In; \ 3023 3272 int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \ 3024 3273 if ( uDst1 != paTests[iTest].uDst1Out \ … … 3048 3297 } \ 3049 3298 } \ 3299 \ 3300 /* Benchmark if all succeeded. */ \ 3301 if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \ 3302 { \ 3303 uint32_t const iTest = cTests / 2; \ 3304 uint32_t const cIterations = EstimateIterations(_64K, MulDivU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \ 3305 uint64_t const cNsRealRun = MulDivU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \ 3306 RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, \ 3307 "%s%s", a_aSubTests[iFn].pszName, iVar ? "-native" : ""); \ 3308 } \ 3309 \ 3310 /* Next variation is native. */ \ 3050 3311 pfn = a_aSubTests[iFn].pfnNative; \ 3051 3312 } \
Note:
See TracChangeset
for help on using the changeset viewer.