Changeset 96337 in vbox for trunk/src/VBox/Runtime/common/math/powcore.asm
- Timestamp:
- Aug 19, 2022 2:49:44 PM (2 years ago)
- File:
-
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/Runtime/common/math/powcore.asm
r96336 r96337 1 1 ; $Id$ 2 2 ;; @file 3 ; IPRT - No-CRT pow- AMD64 & X86.3 ; IPRT - No-CRT common pow code - AMD64 & X86. 4 4 ; 5 5 … … 52 52 53 53 ;; 54 ; Compute the rdBase to the power of rdExp. 55 ; @returns st(0) / xmm0 56 ; @param rdBase [xSP + xCB*2] / xmm0 57 ; @param rdExp [xSP + xCB*2 + RTLRD_CB] / xmm1 58 RT_NOCRT_BEGINPROC pow 54 ; Compute the st1 to the power of st0. 55 ; 56 ; @returns st(0) = result 57 ; eax = what's being returned: 58 ; 0 - Just a value. 59 ; 1 - The rBase value. Caller may take steps to ensure it's exactly the same. 60 ; 2 - The rExp value. Caller may take steps to ensure it's exactly the same. 61 ; @param rBase/st1 The base. 62 ; @param rExp/st0 The exponent 63 ; @param fFxamBase/dx The status flags after fxam(rBase). 64 ; @param enmType/ebx The original parameter and return types: 65 ; 0 - 32-bit / float 66 ; 1 - 64-bit / double 67 ; 2 - 80-bit / long double 68 ; 69 BEGINPROC rtNoCrtMathPowCore 59 70 push xBP 60 71 SEH64_PUSH_xBP … … 64 75 SEH64_ALLOCATE_STACK 30h 65 76 SEH64_END_PROLOGUE 66 67 ;68 ; Load rdBase into st1 and rdExp into st0.69 ;70 %ifdef RT_ARCH_AMD6471 movsd [xBP - 10h], xmm072 fld qword [xBP - 10h]73 fxam74 fnstsw ax75 mov dx, ax ; dx=fxam(base)76 77 movsd [xBP - 20h], xmm178 fld qword [xBP - 20h]79 %else80 fld qword [xBP + xCB*2]81 fxam82 fnstsw ax83 mov dx, ax ; dx=fxam(base)84 85 fld qword [xBP + xCB*2 + RTLRD_CB]86 %endif87 77 88 78 ; … … 212 202 213 203 .integer_exp_return: 214 ffreep st0 ; drop the factor -> st0=result 204 ffreep st0 ; drop the factor -> st0=result; no st1. 215 205 jmp .return_val 216 206 … … 292 282 ; 293 283 .return_val: 294 %ifdef RT_ARCH_AMD64 295 fstp qword [xBP - 10h] 296 movsd xmm0, [xBP - 10h] 297 %endif 284 xor eax, eax 298 285 .return: 299 286 leave … … 334 321 .exp_zero: 335 322 .return_plus_one: 336 %ifdef RT_ARCH_AMD64337 movsd xmm0, qword [.s_r64PlusOne xWrtRIP]338 %else339 323 fld1 340 %endif341 324 jmp .return_pop_pop_val 342 325 … … 429 412 jnz .return_base_value ; Matching 8 430 413 .return_plus_zero: ; Matching 9 431 %ifdef RT_ARCH_AMD64432 movsd xmm0, qword [.s_r64PlusZero xWrtRIP]433 %else434 414 fldz 435 %endif436 415 jmp .return_pop_pop_val 437 416 … … 458 437 jz .return_plus_zero ; Matches 16 (exp not odd and < 0, base == -Inf) 459 438 .return_minus_zero: ; Matches 15 (exp is odd and < 0, base == -Inf) 460 %ifdef RT_ARCH_AMD64461 movsd xmm0, qword [.s_r64MinusZero xWrtRIP]462 %else463 439 fldz 464 440 fchs 465 %endif466 441 jmp .return_pop_pop_val 467 442 … … 479 454 ; 480 455 .return_exp_nan: 481 %ifdef RT_ARCH_AMD64482 movsd xmm0, xmm1483 %else484 456 fld st0 485 %endif 486 jmp .return_pop_pop_val 457 mov eax, 2 ; return param 2 458 jmp .return_pop_pop_val_with_eax 487 459 488 460 ; … … 492 464 .return_base_value: 493 465 .base_nan: ; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN. 494 %ifdef RT_ARCH_AMD64495 ; xmm0 = base already496 %else497 466 fld st1 498 %endif 467 mov eax, 1 ; return param 1 468 jmp .return_pop_pop_val_with_eax 469 470 ; 471 ; Pops the two values off the FPU stack and returns NaN. 472 ; 473 .return_nan: 474 fld qword [.s_r64QNan xWrtRIP] 499 475 jmp .return_pop_pop_val 500 476 501 477 ; 502 ; Pops the two values off the FPU stack and returns NaN. 503 ; 504 .return_nan: 505 %ifdef RT_ARCH_AMD64 506 movsd xmm0, qword [.s_r64QNan xWrtRIP] 507 %else 508 fld qword [.s_r64QNan xWrtRIP] 509 %endif 478 ; Pops the two values off the FPU stack and returns +Inf. 479 ; 480 .return_plus_inf: 481 fld qword [.s_r64PlusInf xWrtRIP] 510 482 jmp .return_pop_pop_val 511 483 512 484 ; 513 ; Pops the two values off the FPU stack and returns +Inf. 514 ; 515 .return_plus_inf: 516 %ifdef RT_ARCH_AMD64 517 movsd xmm0, qword [.s_r64PlusInf xWrtRIP] 518 %else 519 fld qword [.s_r64PlusInf xWrtRIP] 520 %endif 485 ; Pops the two values off the FPU stack and returns -Inf. 486 ; 487 .return_minus_inf: 488 fld qword [.s_r64MinusInf xWrtRIP] 521 489 jmp .return_pop_pop_val 522 490 523 491 ; 524 ; Pops the two values off the FPU stack and returns -Inf. 525 ; 526 .return_minus_inf: 527 %ifdef RT_ARCH_AMD64 528 movsd xmm0, qword [.s_r64MinusInf xWrtRIP] 529 %else 530 fld qword [.s_r64MinusInf xWrtRIP] 531 %endif 532 jmp .return_pop_pop_val 533 534 ; 535 ; AMD64: Return value in xmm0; Pop the two values on the FPU stack. 536 ; X86: Return st0, remove st1 and st2. 492 ; Return st0, remove st1 and st2. 537 493 ; 538 494 .return_pop_pop_val: 539 %ifdef RT_ARCH_AMD64 540 ffreep st0 541 ffreep st0 542 %else 495 xor eax, eax 496 .return_pop_pop_val_with_eax: 543 497 fstp st2 544 498 ffreep st0 545 %endif546 499 jmp .return 547 500 … … 570 523 .s_r64MinusInf: 571 524 dq RTFLOAT64U_INF_MINUS 572 %ifdef RT_ARCH_AMD64573 .s_r64PlusOne:574 dq +1.0575 .s_r64PlusZero:576 dq +0.0577 .s_r64MinusZero:578 dq -0.0579 %endif580 525 581 526 ;; … … 586 531 ; 587 532 .is_exp_odd_integer: 533 ; 588 534 ; Save the FPU enviornment and mask all exceptions. 535 ; 589 536 fnstenv [xBP - 30h] 590 537 mov ax, [xBP - 30h + X86FSTENV32P.FCW] … … 593 540 mov [xBP - 30h + X86FSTENV32P.FCW], ax 594 541 542 ; 595 543 ; Convert to 64-bit integer (probably not 100% correct). 544 ; 596 545 fld st0 ; -> st0=exponent st1=exponent; st2=base; 597 546 fistp qword [xBP - 10h] 598 547 fild qword [xBP - 10h] ; -> st0=int(exponent) st1=exponent; st2=base; 599 548 fcomip st0, st1 ; -> st0=exponent; st1=base; 600 jne .is_exp_odd_integer_ return_false ; jump if not integer.549 jne .is_exp_odd_integer__return_false ; jump if not integer. 601 550 mov xAX, [xBP - 10h] 602 551 %ifdef … … 604 553 %endif 605 554 555 ; 606 556 ; Check the lowest bit if it might be odd. 557 ; This works both for positive and negative numbers. 558 ; 607 559 test al, 1 608 jz .is_exp_odd_integer_return_false ; jump if even. 609 560 jz .is_exp_odd_integer__return_false ; jump if even. 561 562 ; 610 563 ; If the result is negative, convert to positive. 564 ; 611 565 %ifdef RT_ARCH_AMD64 612 566 bt rax, 63 … … 614 568 bt edx, 31 615 569 %endif 616 jnc .is_exp_odd_integer_ positive570 jnc .is_exp_odd_integer__positive 617 571 %ifdef RT_ARCH_AMD64 618 572 neg xAX … … 622 576 sbb edx, 0 623 577 %endif 624 .is_exp_odd_integer_positive: 625 578 .is_exp_odd_integer__positive: 579 580 ; 626 581 ; Now find the most significant bit in the value so we can verify that 627 582 ; the odd bit was part of the mantissa/fraction of the input. 583 ; 584 cmp bl, 3 ; Skip if 80-bit input, as it has a 64-bit mantissa which 585 je .is_exp_odd_integer__return_true ; makes it a 1 bit more precision than out integer reg(s). 586 628 587 %ifdef RT_ARCH_AMD64 629 588 bsr rax, rax 630 589 %else 631 590 bsr edx, edx 632 jnz .is_exp_odd_integer_ high_dword_is_zero591 jnz .is_exp_odd_integer__high_dword_is_zero 633 592 lea eax, [edx + 20h] 634 jmp .is_exp_odd_integer_ first_bit_in_eax635 .is_exp_odd_integer_ high_dword_is_zero:593 jmp .is_exp_odd_integer__first_bit_in_eax 594 .is_exp_odd_integer__high_dword_is_zero: 636 595 bsr eax, eax 637 .is_exp_odd_integer_first_bit_in_eax: 638 %endif 639 ; The limit is 53 for double precision (one implicit bit + 52 bits fraction). 640 cmp eax, 53 641 jae .is_exp_odd_integer_return_false 596 .is_exp_odd_integer__first_bit_in_eax: 597 %endif 598 ; 599 ; The limit is 53 for double precision (one implicit bit + 52 bits fraction), 600 ; and 24 for single precision types. 601 ; 602 mov ah, 53 ; RTFLOAT64U_FRACTION_BITS + 1 603 cmp bl, 0 604 jne .is_exp_odd_integer__is_double_limit 605 mov ah, 24 ; RTFLOAT32U_FRACTION_BITS + 1 606 .is_exp_odd_integer__is_double_limit: 607 608 cmp al, ah 609 jae .is_exp_odd_integer__return_false 642 610 mov eax, 1 643 jmp .is_exp_odd_integer_return644 611 645 612 ; Return. 646 .is_exp_odd_integer_return_false: 613 .is_exp_odd_integer__return_true: 614 jmp .is_exp_odd_integer__return 615 .is_exp_odd_integer__return_false: 647 616 xor eax, eax 648 .is_exp_odd_integer_ return:617 .is_exp_odd_integer__return: 649 618 ffreep st0 650 619 fldenv [xBP - 30h] 651 620 ret 652 621 653 ENDPROC RT_NOCRT(pow)654 622 ENDPROC rtNoCrtMathPowCore 623
Note:
See TracChangeset
for help on using the changeset viewer.