Changeset 96337 in vbox
- Timestamp:
- Aug 19, 2022 2:49:44 PM (2 years ago)
- Location:
- trunk/src/VBox/Runtime
- Files:
-
- 3 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/Runtime/Makefile.kmk
r96321 r96337 1949 1949 common/math/lrintf.asm \ 1950 1950 common/math/pow.asm \ 1951 common/math/powcore.asm \ 1951 1952 common/math/remainder.asm \ 1952 1953 common/math/remainderf.asm \ … … 2025 2026 common/math/lrintf.asm \ 2026 2027 common/math/pow.asm \ 2028 common/math/powcore.asm \ 2027 2029 common/math/remainder.asm \ 2028 2030 common/math/remainderf.asm \ -
trunk/src/VBox/Runtime/common/math/pow.asm
r96321 r96337 33 33 BEGINCODE 34 34 35 extern NAME(RT_NOCRT(feraiseexcept)) 36 37 ;; 38 ; Call feraiseexcept(%1) 39 %macro CALL_feraiseexcept_WITH 1 40 %ifdef RT_ARCH_X86 41 mov dword [xSP], X86_FSW_IE 42 %elifdef ASM_CALL64_GCC 43 mov edi, X86_FSW_IE 44 %elifdef ASM_CALL64_MSC 45 mov ecx, X86_FSW_IE 46 %else 47 %error calling conv. 48 %endif 49 call NAME(RT_NOCRT(feraiseexcept)) 50 %endmacro 51 35 extern NAME(rtNoCrtMathPowCore) 52 36 53 37 ;; … … 61 45 mov xBP, xSP 62 46 SEH64_SET_FRAME_xBP 0 63 sub xSP, 30h 64 SEH64_ALLOCATE_STACK 30h 47 push xBX 48 SEH64_PUSH_GREG rbx 49 sub xSP, 30h - xCB 50 SEH64_ALLOCATE_STACK 30h - xCB 65 51 SEH64_END_PROLOGUE 66 52 … … 69 55 ; 70 56 %ifdef RT_ARCH_AMD64 71 movsd [xBP - 10h], xmm072 fld qword [xBP - 10h]57 movsd [xBP - 20h], xmm0 58 fld qword [xBP - 20h] 73 59 fxam 74 60 fnstsw ax 75 61 mov dx, ax ; dx=fxam(base) 76 62 77 movsd [xBP - 20h], xmm178 fld qword [xBP - 20h]63 movsd [xBP - 30h], xmm1 64 fld qword [xBP - 30h] 79 65 %else 80 66 fld qword [xBP + xCB*2] … … 87 73 88 74 ; 89 ; Weed out special values, starting with the exponent.75 ; Call common worker for the calculation. 90 76 ; 91 fxam 92 fnstsw ax 93 mov cx, ax ; cx=fxam(exp) 94 95 and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 96 cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) 97 je .exp_finite 98 cmp ax, X86_FSW_C3 ; Zero 99 je .exp_zero 100 cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals 101 je .exp_finite 102 cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity. 103 je .exp_inf 104 jmp .exp_nan 105 106 .exp_finite: 107 ; 108 ; Detect special base values. 109 ; 110 mov ax, dx ; ax=fxam(base) 111 and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 112 cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) 113 je .base_finite 114 cmp ax, X86_FSW_C3 ; Zero 115 je .base_zero 116 cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals 117 je .base_finite 118 cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity. 119 je .base_inf 120 jmp .base_nan 121 122 .base_finite: 123 ; 124 ; 1 in the base is also special. 125 ; Rule 6 (see below): base == +1 and exponent = whatever: Return +1.0 126 ; 127 fld1 128 fcomip st0, st2 129 je .return_base_value 77 mov ebx, 1 ; float 78 call NAME(rtNoCrtMathPowCore) 130 79 131 80 ; 132 ; Check if the exponent is an integer value we can handle in a 64-bit133 ; GRP as that is simpler to handle accurately.81 ; Normally, we return with eax==0 and we have to load the result 82 ; from st0 and into xmm0. 134 83 ; 135 ; In 64-bit integer range? 136 fld tword [.s_r80MaxInt xWrtRIP] 137 fcomip st0, st1 138 jb .not_integer_exp 84 cmp eax, 0 85 jne .return_input_reg 139 86 140 fld tword [.s_r80MinInt xWrtRIP] 141 fcomip st0, st1 142 ja .not_integer_exp 87 fstp qword [xSP - 30h] 88 movsd xmm0, [xSP - 30h] 143 89 144 ; Convert it to integer.145 fld st0 ; -> st0=exp; st1=exp; st2=base146 fistp qword [xBP - 8] ; Save and pop 64-bit int (no non-popping version of this instruction).147 148 fild qword [xBP - 8] ; Load it again for comparison.149 fucomip st0, st1 ; Compare integer exp and floating point exp to see if they are the same. Pop.150 jne .not_integer_exp151 152 153 ;154 ;155 ; Ok, we've got an integer exponent value in that fits into a 64-bit.156 ; We'll multiply the base exponention bit by exponention bit, applying157 ; it as a factor for bits that are set.158 ;159 ;160 .integer_exp:161 ; Load the integer value into edx:exx / rdx and ditch the floating point exponent.162 mov xDX, [xBP - 8]163 %ifdef RT_ARCH_X86164 mov eax, [xBP - 8 + 4]165 %endif166 ffreep st0 ; -> st0=base;167 168 ; Load a 1 onto the stack, we'll need it below as well as for converting169 ; a negative exponent to a positive one.170 fld1 ; -> st0=1.0; st1=base;171 172 ; If the exponent is negative, negate it and change base to 1/base.173 or xDX, xDX174 jns .integer_exp_positive175 neg xDX176 %ifdef RT_ARCH_X86177 neg eax178 sbb edx, 0179 %endif180 fdivr st1, st0 ; -> st0=1.0; st1=1/base181 .integer_exp_positive:182 183 ;184 ; We'll process edx:eax / rdx bit by bit till it's zero, using st0 for185 ; the multiplication factor corresponding to the current exponent bit186 ; and st1 as the result.187 ;188 fxch ; -> st0=base; st1=1.0;189 .integer_exp_loop:190 %ifdef RT_ARCH_X86191 shrd eax, edx, 1192 %else193 shr rdx, 1194 %endif195 jnc .integer_exp_loop_advance196 fmul st1, st0197 198 .integer_exp_loop_advance:199 ; Check if we're done.200 %ifdef RT_ARCH_AMD64201 jz .integer_exp_return ; (we will have the flags for the shr rdx above)202 %else203 shr edx, 1 ; complete the above shift operation204 205 mov ecx, edx ; check if edx:eax is zero.206 or ecx, eax207 jz .integer_exp_return208 %endif209 ; Calculate the factor for the next bit.210 fmul st0, st0211 jmp .integer_exp_loop212 213 .integer_exp_return:214 ffreep st0 ; drop the factor -> st0=result215 jmp .return_val216 217 218 ;219 ;220 ; Non-integer or value was out of range for an int64_t.221 ;222 ; The approach here is the same as in exp.asm, only we have to do the223 ; log2(base) calculation first as it's a parameter and not a constant.224 ;225 ;226 .not_integer_exp:227 228 ; First reject negative numbers. We still have the fxam(base) status in dx.229 test dx, X86_FSW_C1230 jnz .base_negative_non_integer_exp231 232 ; Swap the items on the stack, so we can process the base first.233 fxch st0, st1 ; -> st0=base; st1=exponent;234 235 ;236 ; From log2.asm:237 ;238 ; The fyl2xp1 instruction (ST1=ST1*log2(ST0+1.0), popping ST0) has a239 ; valid ST0 range of 1(1-sqrt(0.5)) (approx 0.29289321881) on both240 ; sides of zero. We try use it if we can.241 ;242 .above_one:243 ; For both fyl2xp1 and fyl2xp1 we need st1=1.0.244 fld1245 fxch st0, st1 ; -> st0=base; st1=1.0; st2=exponent246 247 ; Check if the input is within the fyl2xp1 range.248 fld qword [.s_r64AbsFyL2xP1InputMax xWrtRIP]249 fcomip st0, st1250 jbe .cannot_use_fyl2xp1251 252 fld qword [.s_r64AbsFyL2xP1InputMin xWrtRIP]253 fcomip st0, st1254 jae .cannot_use_fyl2xp1255 256 ; Do the calculation.257 .use_fyl2xp1:258 fsub st0, st1 ; -> st0=base-1; st1=1.0; st2=exponent259 fyl2xp1 ; -> st0=1.0*log2(base-1.0+1.0); st1=exponent260 jmp .done_log2261 262 .cannot_use_fyl2xp1:263 fyl2x ; -> st0=1.0*log2(base); st1=exponent264 .done_log2:265 266 ;267 ; From exp.asm:268 ;269 ; Convert to power of 2 and it'll be the same as exp2.270 ;271 fmulp ; st0=log2(base); st1=exponent -> st0=pow2exp272 273 ;274 ; Split the job in two on the fraction and integer l2base parts.275 ;276 fld st0 ; Push a copy of the pow2exp on the stack.277 frndint ; st0 = (int)pow2exp278 fsub st1, st0 ; st1 = pow2exp - (int)pow2exp; i.e. st1 = fraction, st0 = integer.279 fxch ; st0 = fraction, st1 = integer.280 281 ; 1. Calculate on the fraction.282 f2xm1 ; st0 = 2**fraction - 1.0283 fld1284 faddp ; st0 = 2**fraction285 286 ; 2. Apply the integer power of two.287 fscale ; st0 = result; st1 = integer part of pow2exp.288 fstp st1 ; st0 = result; no st1.289 290 ;291 ; Return st0.292 ;293 .return_val:294 %ifdef RT_ARCH_AMD64295 fstp qword [xBP - 10h]296 movsd xmm0, [xBP - 10h]297 %endif298 90 .return: 91 lea xSP, [xBP - xCB] 92 pop xBX 299 93 leave 300 94 ret 301 95 302 303 96 ; 97 ; But sometimes, like if we have NaN or other special inputs, we should 98 ; return the input as-is and ditch the st0 value. 304 99 ; 305 ; pow() has a lot of defined behavior for special values, which is why 306 ; this is the largest and most difficult part of the code. :-) 307 ; 308 ; On https://pubs.opengroup.org/onlinepubs/9699919799/functions/pow.html 309 ; there are 21 error conditions listed in the return value section. 310 ; The code below refers to this by number. 311 ; 312 ; When we get here: 313 ; dx=fxam(base) 314 ; cx=fxam(exponent) 315 ; st1=base 316 ; st0=exponent 317 ; 318 319 ; 320 ; 1. Finit base < 0 and finit non-interger exponent: -> domain error (#IE) + NaN. 321 ; 322 ; The non-integer exponent claim might be wrong, as we only check if it 323 ; fits into a int64_t register. But, I don't see how we can calculate 324 ; it right now. 325 ; 326 .base_negative_non_integer_exp: 327 CALL_feraiseexcept_WITH X86_FSW_IE 328 jmp .return_nan 329 330 ; 331 ; 7. Exponent = +/-0.0, any base value including NaN: return +1.0 332 ; Note! According to https://en.cppreference.com/w/c/numeric/math/pow a 333 ; domain error (#IE) occur if base=+/-0. Not implemented. 334 .exp_zero: 335 .return_plus_one: 336 %ifdef RT_ARCH_AMD64 337 movsd xmm0, qword [.s_r64PlusOne xWrtRIP] 338 %else 339 fld1 100 .return_input_reg: 101 ffreep st0 102 cmp eax, 2 103 je .return_exp 104 %ifdef RT_STRICT 105 cmp eax, 1 106 je .return_base 107 int3 340 108 %endif 341 jmp .return_pop_pop_val 342 343 ; 344 ; 6. Exponent = whatever and base = 1: Return 1.0 345 ; 10. Exponent = +/-Inf and base = -1: Return 1.0 346 ;6+10 => Exponent = +/-Inf and |base| = 1: Return 1.0 347 ; 11. Exponent = -Inf and |base| < 1: Return +Inf 348 ; 12. Exponent = -Inf and |base| > 1: Return +0 349 ; 13. Exponent = +Inf and |base| < 1: Return +0 350 ; 14. Exponent = +Inf and |base| > 1: Return +Inf 351 ; 352 ; Note! Rule 4 would trigger for the same conditions as 11 when base == 0, 353 ; but it's optional to raise div/0 and it's apparently marked as 354 ; obsolete in C23, so not implemented. 355 ; 356 .exp_inf: 357 ; Check if base is NaN or unsupported. 358 and dx, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 ; fxam(base) 359 cmp dx, X86_FSW_C0 360 jbe .return_base_nan 361 362 ; Calc fabs(base) and replace the exponent with 1.0 as we're very likely to need this here. 363 ffreep st0 364 fabs 365 fld1 ; st0=1.0; st1=|rdBase| 366 fcomi st0, st1 367 je .return_plus_one ; Matches rule 6 + 10 (base is +/-1). 368 ja .exp_inf_base_smaller_than_one 369 .exp_inf_base_larger_than_one: 370 test cx, X86_FSW_C1 ; cx=faxm(exponent); C1=sign 371 jz .return_plus_inf ; Matches rule 14 (exponent is +Inf). 372 jmp .return_plus_zero ; Matches rule 12 (exponent is -Inf). 373 374 .exp_inf_base_smaller_than_one: 375 test cx, X86_FSW_C1 ; cx=faxm(exponent); C1=sign 376 jnz .return_plus_inf ; Matches rule 11 (exponent is -Inf). 377 jmp .return_plus_zero ; Matches rule 13 (exponent is +Inf). 378 379 ; 380 ; 6. Exponent = whatever and base = 1: Return 1.0 381 ; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN. 382 ; 383 .exp_nan: 384 ; Check if base is a number and possible 1. 385 test dx, X86_FSW_C2 ; dx=fxam(base); C2 is set for finite number, infinity and denormals. 386 jz .return_exp_nan 387 fld1 388 fcomip st0, st2 389 jne .return_exp_nan 390 jmp .return_plus_one 391 392 ; 393 ; 4a. base == +/-0.0 and exp < 0 and exp is odd integer: Return +/-Inf, raise div/0. 394 ; 4b. base == +/-0.0 and exp < 0 and exp is not odd int: Return +Inf, raise div/0. 395 ; 8. base == +/-0.0 and exp > 0 and exp is odd integer: Return +/-0.0 396 ; 9. base == +/-0.0 and exp > 0 and exp is not odd int: Return +0 397 ; 398 ; Note! Exponent must be finite and non-zero if we get here. 399 ; 400 .base_zero: 401 fldz 402 fcomip st0, st1 403 jbe .base_zero_plus_exp 404 .base_zero_minus_exp: 405 mov cx, dx ; stashing fxam(base) in CX because EDX is trashed by .is_exp_odd_integer 406 call .is_exp_odd_integer ; trashes EDX but no ECX. 407 or eax, eax 408 jz .base_zero_minus_exp_not_odd_int 409 410 ; Matching 4a. 411 .base_zero_minus_exp_odd_int: 412 test cx, X86_FSW_C1 ; base sign 413 jz .raise_de_and_return_plus_inf 414 .raise_de_and_return_minus_inf: 415 CALL_feraiseexcept_WITH X86_FSW_DE 416 jmp .return_minus_inf 417 .raise_de_and_return_plus_inf: 418 CALL_feraiseexcept_WITH X86_FSW_DE 419 jmp .return_plus_inf 420 421 ; Matching 4b. 422 .base_zero_minus_exp_not_odd_int: 423 CALL_feraiseexcept_WITH X86_FSW_DE 424 jmp .return_plus_inf 425 426 .base_zero_plus_exp: 427 call .is_exp_odd_integer 428 or eax, eax 429 jnz .return_base_value ; Matching 8 430 .return_plus_zero: ; Matching 9 431 %ifdef RT_ARCH_AMD64 432 movsd xmm0, qword [.s_r64PlusZero xWrtRIP] 433 %else 434 fldz 435 %endif 436 jmp .return_pop_pop_val 437 438 ; 439 ; 15. base == -Inf and exp < 0 and exp is odd integer: Return -0 440 ; 16. base == -Inf and exp < 0 and exp is not odd int: Return +0 441 ; 17. base == -Inf and exp > 0 and exp is odd integer: Return -Inf 442 ; 18. base == -Inf and exp > 0 and exp is not odd int: Return +Inf 443 ; 19. base == +Inf and exp < 0: Return +0 444 ; 20. base == +Inf and exp > 0: Return +Inf 445 ; 446 ; Note! Exponent must be finite and non-zero if we get here. 447 ; 448 .base_inf: 449 fldz 450 fcomip st0, st1 451 jbe .base_inf_plus_exp 452 .base_inf_minus_exp: 453 test dx, X86_FSW_C1 454 jz .return_plus_zero ; Matches 19 (base == +Inf). 455 .base_minus_inf_minus_exp: 456 call .is_exp_odd_integer 457 or eax, eax 458 jz .return_plus_zero ; Matches 16 (exp not odd and < 0, base == -Inf) 459 .return_minus_zero: ; Matches 15 (exp is odd and < 0, base == -Inf) 460 %ifdef RT_ARCH_AMD64 461 movsd xmm0, qword [.s_r64MinusZero xWrtRIP] 462 %else 463 fldz 464 fchs 465 %endif 466 jmp .return_pop_pop_val 467 468 .base_inf_plus_exp: 469 test dx, X86_FSW_C1 470 jz .return_plus_inf ; Matches 20 (base == +Inf). 471 .base_minus_inf_plus_exp: 472 call .is_exp_odd_integer 473 or eax, eax 474 jnz .return_minus_inf ; Matches 17 (exp is odd and > 0, base == +Inf) 475 jmp .return_plus_inf ; Matches 18 (exp not odd and > 0, base == +Inf) 476 477 ; 478 ; Return the exponent NaN (or whatever) value. 479 ; 480 .return_exp_nan: 481 %ifdef RT_ARCH_AMD64 482 movsd xmm0, xmm1 483 %else 484 fld st0 485 %endif 486 jmp .return_pop_pop_val 487 488 ; 489 ; Return the base NaN (or whatever) value. 490 ; 491 .return_base_nan: 492 .return_base_value: 493 .base_nan: ; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN. 494 %ifdef RT_ARCH_AMD64 495 ; xmm0 = base already 496 %else 497 fld st1 498 %endif 499 jmp .return_pop_pop_val 500 501 ; 502 ; Pops the two values off the FPU stack and returns NaN. 503 ; 504 .return_nan: 505 %ifdef RT_ARCH_AMD64 506 movsd xmm0, qword [.s_r64QNan xWrtRIP] 507 %else 508 fld qword [.s_r64QNan xWrtRIP] 509 %endif 510 jmp .return_pop_pop_val 511 512 ; 513 ; Pops the two values off the FPU stack and returns +Inf. 514 ; 515 .return_plus_inf: 516 %ifdef RT_ARCH_AMD64 517 movsd xmm0, qword [.s_r64PlusInf xWrtRIP] 518 %else 519 fld qword [.s_r64PlusInf xWrtRIP] 520 %endif 521 jmp .return_pop_pop_val 522 523 ; 524 ; Pops the two values off the FPU stack and returns -Inf. 525 ; 526 .return_minus_inf: 527 %ifdef RT_ARCH_AMD64 528 movsd xmm0, qword [.s_r64MinusInf xWrtRIP] 529 %else 530 fld qword [.s_r64MinusInf xWrtRIP] 531 %endif 532 jmp .return_pop_pop_val 533 534 ; 535 ; AMD64: Return value in xmm0; Pop the two values on the FPU stack. 536 ; X86: Return st0, remove st1 and st2. 537 ; 538 .return_pop_pop_val: 539 %ifdef RT_ARCH_AMD64 540 ffreep st0 541 ffreep st0 542 %else 543 fstp st2 544 ffreep st0 545 %endif 109 .return_base: 546 110 jmp .return 547 111 548 549 ALIGNCODE(8) 550 .s_r80MaxInt: 551 dt +9223372036854775807.0 552 553 ALIGNCODE(8) 554 .s_r80MinInt: 555 dt -9223372036854775807.0 556 557 ALIGNCODE(8) 558 ;; The fyl2xp1 instruction only works between +/-1(1-sqrt(0.5)). 559 ; These two variables is that range + 1.0, so we can compare directly 560 ; with the input w/o any extra fsub and fabs work. 561 .s_r64AbsFyL2xP1InputMin: 562 dq 0.708 ; -0.292 + 1.0 563 .s_r64AbsFyL2xP1InputMax: 564 dq 1.292 565 566 .s_r64QNan: 567 dq RTFLOAT64U_QNAN_MINUS 568 .s_r64PlusInf: 569 dq RTFLOAT64U_INF_PLUS 570 .s_r64MinusInf: 571 dq RTFLOAT64U_INF_MINUS 572 %ifdef RT_ARCH_AMD64 573 .s_r64PlusOne: 574 dq +1.0 575 .s_r64PlusZero: 576 dq +0.0 577 .s_r64MinusZero: 578 dq -0.0 579 %endif 580 581 ;; 582 ; Sub-function that checks if the exponent (st0) is an odd integer or not. 583 ; 584 ; @returns eax = 1 if odd, 0 if even or not integer. 585 ; @uses eax, edx, eflags. 586 ; 587 .is_exp_odd_integer: 588 ; Save the FPU enviornment and mask all exceptions. 589 fnstenv [xBP - 30h] 590 mov ax, [xBP - 30h + X86FSTENV32P.FCW] 591 or word [xBP - 30h + X86FSTENV32P.FCW], X86_FCW_MASK_ALL 592 fldcw [xBP - 30h + X86FSTENV32P.FCW] 593 mov [xBP - 30h + X86FSTENV32P.FCW], ax 594 595 ; Convert to 64-bit integer (probably not 100% correct). 596 fld st0 ; -> st0=exponent st1=exponent; st2=base; 597 fistp qword [xBP - 10h] 598 fild qword [xBP - 10h] ; -> st0=int(exponent) st1=exponent; st2=base; 599 fcomip st0, st1 ; -> st0=exponent; st1=base; 600 jne .is_exp_odd_integer_return_false ; jump if not integer. 601 mov xAX, [xBP - 10h] 602 %ifdef 603 mov edx, [xBP - 10h + 4] 604 %endif 605 606 ; Check the lowest bit if it might be odd. 607 test al, 1 608 jz .is_exp_odd_integer_return_false ; jump if even. 609 610 ; If the result is negative, convert to positive. 611 %ifdef RT_ARCH_AMD64 612 bt rax, 63 613 %else 614 bt edx, 31 615 %endif 616 jnc .is_exp_odd_integer_positive 617 %ifdef RT_ARCH_AMD64 618 neg xAX 619 %else 620 neg edx 621 neg eax 622 sbb edx, 0 623 %endif 624 .is_exp_odd_integer_positive: 625 626 ; Now find the most significant bit in the value so we can verify that 627 ; the odd bit was part of the mantissa/fraction of the input. 628 %ifdef RT_ARCH_AMD64 629 bsr rax, rax 630 %else 631 bsr edx, edx 632 jnz .is_exp_odd_integer_high_dword_is_zero 633 lea eax, [edx + 20h] 634 jmp .is_exp_odd_integer_first_bit_in_eax 635 .is_exp_odd_integer_high_dword_is_zero: 636 bsr eax, eax 637 .is_exp_odd_integer_first_bit_in_eax: 638 %endif 639 ; The limit is 53 for double precision (one implicit bit + 52 bits fraction). 640 cmp eax, 53 641 jae .is_exp_odd_integer_return_false 642 mov eax, 1 643 jmp .is_exp_odd_integer_return 644 645 ; Return. 646 .is_exp_odd_integer_return_false: 647 xor eax, eax 648 .is_exp_odd_integer_return: 649 ffreep st0 650 fldenv [xBP - 30h] 651 ret 652 112 .return_exp: 113 movsd xmm0, xmm1 114 jmp .return 653 115 ENDPROC RT_NOCRT(pow) 654 116 -
trunk/src/VBox/Runtime/common/math/powcore.asm
r96336 r96337 1 1 ; $Id$ 2 2 ;; @file 3 ; IPRT - No-CRT pow- AMD64 & X86.3 ; IPRT - No-CRT common pow code - AMD64 & X86. 4 4 ; 5 5 … … 52 52 53 53 ;; 54 ; Compute the rdBase to the power of rdExp. 55 ; @returns st(0) / xmm0 56 ; @param rdBase [xSP + xCB*2] / xmm0 57 ; @param rdExp [xSP + xCB*2 + RTLRD_CB] / xmm1 58 RT_NOCRT_BEGINPROC pow 54 ; Compute the st1 to the power of st0. 55 ; 56 ; @returns st(0) = result 57 ; eax = what's being returned: 58 ; 0 - Just a value. 59 ; 1 - The rBase value. Caller may take steps to ensure it's exactly the same. 60 ; 2 - The rExp value. Caller may take steps to ensure it's exactly the same. 61 ; @param rBase/st1 The base. 62 ; @param rExp/st0 The exponent 63 ; @param fFxamBase/dx The status flags after fxam(rBase). 64 ; @param enmType/ebx The original parameter and return types: 65 ; 0 - 32-bit / float 66 ; 1 - 64-bit / double 67 ; 2 - 80-bit / long double 68 ; 69 BEGINPROC rtNoCrtMathPowCore 59 70 push xBP 60 71 SEH64_PUSH_xBP … … 64 75 SEH64_ALLOCATE_STACK 30h 65 76 SEH64_END_PROLOGUE 66 67 ;68 ; Load rdBase into st1 and rdExp into st0.69 ;70 %ifdef RT_ARCH_AMD6471 movsd [xBP - 10h], xmm072 fld qword [xBP - 10h]73 fxam74 fnstsw ax75 mov dx, ax ; dx=fxam(base)76 77 movsd [xBP - 20h], xmm178 fld qword [xBP - 20h]79 %else80 fld qword [xBP + xCB*2]81 fxam82 fnstsw ax83 mov dx, ax ; dx=fxam(base)84 85 fld qword [xBP + xCB*2 + RTLRD_CB]86 %endif87 77 88 78 ; … … 212 202 213 203 .integer_exp_return: 214 ffreep st0 ; drop the factor -> st0=result 204 ffreep st0 ; drop the factor -> st0=result; no st1. 215 205 jmp .return_val 216 206 … … 292 282 ; 293 283 .return_val: 294 %ifdef RT_ARCH_AMD64 295 fstp qword [xBP - 10h] 296 movsd xmm0, [xBP - 10h] 297 %endif 284 xor eax, eax 298 285 .return: 299 286 leave … … 334 321 .exp_zero: 335 322 .return_plus_one: 336 %ifdef RT_ARCH_AMD64337 movsd xmm0, qword [.s_r64PlusOne xWrtRIP]338 %else339 323 fld1 340 %endif341 324 jmp .return_pop_pop_val 342 325 … … 429 412 jnz .return_base_value ; Matching 8 430 413 .return_plus_zero: ; Matching 9 431 %ifdef RT_ARCH_AMD64432 movsd xmm0, qword [.s_r64PlusZero xWrtRIP]433 %else434 414 fldz 435 %endif436 415 jmp .return_pop_pop_val 437 416 … … 458 437 jz .return_plus_zero ; Matches 16 (exp not odd and < 0, base == -Inf) 459 438 .return_minus_zero: ; Matches 15 (exp is odd and < 0, base == -Inf) 460 %ifdef RT_ARCH_AMD64461 movsd xmm0, qword [.s_r64MinusZero xWrtRIP]462 %else463 439 fldz 464 440 fchs 465 %endif466 441 jmp .return_pop_pop_val 467 442 … … 479 454 ; 480 455 .return_exp_nan: 481 %ifdef RT_ARCH_AMD64482 movsd xmm0, xmm1483 %else484 456 fld st0 485 %endif 486 jmp .return_pop_pop_val 457 mov eax, 2 ; return param 2 458 jmp .return_pop_pop_val_with_eax 487 459 488 460 ; … … 492 464 .return_base_value: 493 465 .base_nan: ; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN. 494 %ifdef RT_ARCH_AMD64495 ; xmm0 = base already496 %else497 466 fld st1 498 %endif 467 mov eax, 1 ; return param 1 468 jmp .return_pop_pop_val_with_eax 469 470 ; 471 ; Pops the two values off the FPU stack and returns NaN. 472 ; 473 .return_nan: 474 fld qword [.s_r64QNan xWrtRIP] 499 475 jmp .return_pop_pop_val 500 476 501 477 ; 502 ; Pops the two values off the FPU stack and returns NaN. 503 ; 504 .return_nan: 505 %ifdef RT_ARCH_AMD64 506 movsd xmm0, qword [.s_r64QNan xWrtRIP] 507 %else 508 fld qword [.s_r64QNan xWrtRIP] 509 %endif 478 ; Pops the two values off the FPU stack and returns +Inf. 479 ; 480 .return_plus_inf: 481 fld qword [.s_r64PlusInf xWrtRIP] 510 482 jmp .return_pop_pop_val 511 483 512 484 ; 513 ; Pops the two values off the FPU stack and returns +Inf. 514 ; 515 .return_plus_inf: 516 %ifdef RT_ARCH_AMD64 517 movsd xmm0, qword [.s_r64PlusInf xWrtRIP] 518 %else 519 fld qword [.s_r64PlusInf xWrtRIP] 520 %endif 485 ; Pops the two values off the FPU stack and returns -Inf. 486 ; 487 .return_minus_inf: 488 fld qword [.s_r64MinusInf xWrtRIP] 521 489 jmp .return_pop_pop_val 522 490 523 491 ; 524 ; Pops the two values off the FPU stack and returns -Inf. 525 ; 526 .return_minus_inf: 527 %ifdef RT_ARCH_AMD64 528 movsd xmm0, qword [.s_r64MinusInf xWrtRIP] 529 %else 530 fld qword [.s_r64MinusInf xWrtRIP] 531 %endif 532 jmp .return_pop_pop_val 533 534 ; 535 ; AMD64: Return value in xmm0; Pop the two values on the FPU stack. 536 ; X86: Return st0, remove st1 and st2. 492 ; Return st0, remove st1 and st2. 537 493 ; 538 494 .return_pop_pop_val: 539 %ifdef RT_ARCH_AMD64 540 ffreep st0 541 ffreep st0 542 %else 495 xor eax, eax 496 .return_pop_pop_val_with_eax: 543 497 fstp st2 544 498 ffreep st0 545 %endif546 499 jmp .return 547 500 … … 570 523 .s_r64MinusInf: 571 524 dq RTFLOAT64U_INF_MINUS 572 %ifdef RT_ARCH_AMD64573 .s_r64PlusOne:574 dq +1.0575 .s_r64PlusZero:576 dq +0.0577 .s_r64MinusZero:578 dq -0.0579 %endif580 525 581 526 ;; … … 586 531 ; 587 532 .is_exp_odd_integer: 533 ; 588 534 ; Save the FPU enviornment and mask all exceptions. 535 ; 589 536 fnstenv [xBP - 30h] 590 537 mov ax, [xBP - 30h + X86FSTENV32P.FCW] … … 593 540 mov [xBP - 30h + X86FSTENV32P.FCW], ax 594 541 542 ; 595 543 ; Convert to 64-bit integer (probably not 100% correct). 544 ; 596 545 fld st0 ; -> st0=exponent st1=exponent; st2=base; 597 546 fistp qword [xBP - 10h] 598 547 fild qword [xBP - 10h] ; -> st0=int(exponent) st1=exponent; st2=base; 599 548 fcomip st0, st1 ; -> st0=exponent; st1=base; 600 jne .is_exp_odd_integer_ return_false ; jump if not integer.549 jne .is_exp_odd_integer__return_false ; jump if not integer. 601 550 mov xAX, [xBP - 10h] 602 551 %ifdef … … 604 553 %endif 605 554 555 ; 606 556 ; Check the lowest bit if it might be odd. 557 ; This works both for positive and negative numbers. 558 ; 607 559 test al, 1 608 jz .is_exp_odd_integer_return_false ; jump if even. 609 560 jz .is_exp_odd_integer__return_false ; jump if even. 561 562 ; 610 563 ; If the result is negative, convert to positive. 564 ; 611 565 %ifdef RT_ARCH_AMD64 612 566 bt rax, 63 … … 614 568 bt edx, 31 615 569 %endif 616 jnc .is_exp_odd_integer_ positive570 jnc .is_exp_odd_integer__positive 617 571 %ifdef RT_ARCH_AMD64 618 572 neg xAX … … 622 576 sbb edx, 0 623 577 %endif 624 .is_exp_odd_integer_positive: 625 578 .is_exp_odd_integer__positive: 579 580 ; 626 581 ; Now find the most significant bit in the value so we can verify that 627 582 ; the odd bit was part of the mantissa/fraction of the input. 583 ; 584 cmp bl, 3 ; Skip if 80-bit input, as it has a 64-bit mantissa which 585 je .is_exp_odd_integer__return_true ; makes it a 1 bit more precision than out integer reg(s). 586 628 587 %ifdef RT_ARCH_AMD64 629 588 bsr rax, rax 630 589 %else 631 590 bsr edx, edx 632 jnz .is_exp_odd_integer_ high_dword_is_zero591 jnz .is_exp_odd_integer__high_dword_is_zero 633 592 lea eax, [edx + 20h] 634 jmp .is_exp_odd_integer_ first_bit_in_eax635 .is_exp_odd_integer_ high_dword_is_zero:593 jmp .is_exp_odd_integer__first_bit_in_eax 594 .is_exp_odd_integer__high_dword_is_zero: 636 595 bsr eax, eax 637 .is_exp_odd_integer_first_bit_in_eax: 638 %endif 639 ; The limit is 53 for double precision (one implicit bit + 52 bits fraction). 640 cmp eax, 53 641 jae .is_exp_odd_integer_return_false 596 .is_exp_odd_integer__first_bit_in_eax: 597 %endif 598 ; 599 ; The limit is 53 for double precision (one implicit bit + 52 bits fraction), 600 ; and 24 for single precision types. 601 ; 602 mov ah, 53 ; RTFLOAT64U_FRACTION_BITS + 1 603 cmp bl, 0 604 jne .is_exp_odd_integer__is_double_limit 605 mov ah, 24 ; RTFLOAT32U_FRACTION_BITS + 1 606 .is_exp_odd_integer__is_double_limit: 607 608 cmp al, ah 609 jae .is_exp_odd_integer__return_false 642 610 mov eax, 1 643 jmp .is_exp_odd_integer_return644 611 645 612 ; Return. 646 .is_exp_odd_integer_return_false: 613 .is_exp_odd_integer__return_true: 614 jmp .is_exp_odd_integer__return 615 .is_exp_odd_integer__return_false: 647 616 xor eax, eax 648 .is_exp_odd_integer_ return:617 .is_exp_odd_integer__return: 649 618 ffreep st0 650 619 fldenv [xBP - 30h] 651 620 ret 652 621 653 ENDPROC RT_NOCRT(pow)654 622 ENDPROC rtNoCrtMathPowCore 623 -
trunk/src/VBox/Runtime/testcase/Makefile.kmk
r96321 r96337 682 682 ../common/math/lrintf.asm \ 683 683 ../common/math/pow.asm \ 684 ../common/math/powcore.asm \ 684 685 ../common/math/remainder.asm \ 685 686 ../common/math/remainderf.asm \
Note:
See TracChangeset
for help on using the changeset viewer.