Changeset 96337 in vbox for trunk/src/VBox/Runtime/common/math/pow.asm
- Timestamp:
- Aug 19, 2022 2:49:44 PM (2 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/Runtime/common/math/pow.asm
r96321 r96337 33 33 BEGINCODE 34 34 35 extern NAME(RT_NOCRT(feraiseexcept)) 36 37 ;; 38 ; Call feraiseexcept(%1) 39 %macro CALL_feraiseexcept_WITH 1 40 %ifdef RT_ARCH_X86 41 mov dword [xSP], X86_FSW_IE 42 %elifdef ASM_CALL64_GCC 43 mov edi, X86_FSW_IE 44 %elifdef ASM_CALL64_MSC 45 mov ecx, X86_FSW_IE 46 %else 47 %error calling conv. 48 %endif 49 call NAME(RT_NOCRT(feraiseexcept)) 50 %endmacro 51 35 extern NAME(rtNoCrtMathPowCore) 52 36 53 37 ;; … … 61 45 mov xBP, xSP 62 46 SEH64_SET_FRAME_xBP 0 63 sub xSP, 30h 64 SEH64_ALLOCATE_STACK 30h 47 push xBX 48 SEH64_PUSH_GREG rbx 49 sub xSP, 30h - xCB 50 SEH64_ALLOCATE_STACK 30h - xCB 65 51 SEH64_END_PROLOGUE 66 52 … … 69 55 ; 70 56 %ifdef RT_ARCH_AMD64 71 movsd [xBP - 10h], xmm072 fld qword [xBP - 10h]57 movsd [xBP - 20h], xmm0 58 fld qword [xBP - 20h] 73 59 fxam 74 60 fnstsw ax 75 61 mov dx, ax ; dx=fxam(base) 76 62 77 movsd [xBP - 20h], xmm178 fld qword [xBP - 20h]63 movsd [xBP - 30h], xmm1 64 fld qword [xBP - 30h] 79 65 %else 80 66 fld qword [xBP + xCB*2] … … 87 73 88 74 ; 89 ; Weed out special values, starting with the exponent.75 ; Call common worker for the calculation. 90 76 ; 91 fxam 92 fnstsw ax 93 mov cx, ax ; cx=fxam(exp) 94 95 and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 96 cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) 97 je .exp_finite 98 cmp ax, X86_FSW_C3 ; Zero 99 je .exp_zero 100 cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals 101 je .exp_finite 102 cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity. 103 je .exp_inf 104 jmp .exp_nan 105 106 .exp_finite: 107 ; 108 ; Detect special base values. 109 ; 110 mov ax, dx ; ax=fxam(base) 111 and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 112 cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) 113 je .base_finite 114 cmp ax, X86_FSW_C3 ; Zero 115 je .base_zero 116 cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals 117 je .base_finite 118 cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity. 119 je .base_inf 120 jmp .base_nan 121 122 .base_finite: 123 ; 124 ; 1 in the base is also special. 125 ; Rule 6 (see below): base == +1 and exponent = whatever: Return +1.0 126 ; 127 fld1 128 fcomip st0, st2 129 je .return_base_value 77 mov ebx, 1 ; float 78 call NAME(rtNoCrtMathPowCore) 130 79 131 80 ; 132 ; Check if the exponent is an integer value we can handle in a 64-bit133 ; GRP as that is simpler to handle accurately.81 ; Normally, we return with eax==0 and we have to load the result 82 ; from st0 and into xmm0. 134 83 ; 135 ; In 64-bit integer range? 136 fld tword [.s_r80MaxInt xWrtRIP] 137 fcomip st0, st1 138 jb .not_integer_exp 84 cmp eax, 0 85 jne .return_input_reg 139 86 140 fld tword [.s_r80MinInt xWrtRIP] 141 fcomip st0, st1 142 ja .not_integer_exp 87 fstp qword [xSP - 30h] 88 movsd xmm0, [xSP - 30h] 143 89 144 ; Convert it to integer.145 fld st0 ; -> st0=exp; st1=exp; st2=base146 fistp qword [xBP - 8] ; Save and pop 64-bit int (no non-popping version of this instruction).147 148 fild qword [xBP - 8] ; Load it again for comparison.149 fucomip st0, st1 ; Compare integer exp and floating point exp to see if they are the same. Pop.150 jne .not_integer_exp151 152 153 ;154 ;155 ; Ok, we've got an integer exponent value in that fits into a 64-bit.156 ; We'll multiply the base exponention bit by exponention bit, applying157 ; it as a factor for bits that are set.158 ;159 ;160 .integer_exp:161 ; Load the integer value into edx:exx / rdx and ditch the floating point exponent.162 mov xDX, [xBP - 8]163 %ifdef RT_ARCH_X86164 mov eax, [xBP - 8 + 4]165 %endif166 ffreep st0 ; -> st0=base;167 168 ; Load a 1 onto the stack, we'll need it below as well as for converting169 ; a negative exponent to a positive one.170 fld1 ; -> st0=1.0; st1=base;171 172 ; If the exponent is negative, negate it and change base to 1/base.173 or xDX, xDX174 jns .integer_exp_positive175 neg xDX176 %ifdef RT_ARCH_X86177 neg eax178 sbb edx, 0179 %endif180 fdivr st1, st0 ; -> st0=1.0; st1=1/base181 .integer_exp_positive:182 183 ;184 ; We'll process edx:eax / rdx bit by bit till it's zero, using st0 for185 ; the multiplication factor corresponding to the current exponent bit186 ; and st1 as the result.187 ;188 fxch ; -> st0=base; st1=1.0;189 .integer_exp_loop:190 %ifdef RT_ARCH_X86191 shrd eax, edx, 1192 %else193 shr rdx, 1194 %endif195 jnc .integer_exp_loop_advance196 fmul st1, st0197 198 .integer_exp_loop_advance:199 ; Check if we're done.200 %ifdef RT_ARCH_AMD64201 jz .integer_exp_return ; (we will have the flags for the shr rdx above)202 %else203 shr edx, 1 ; complete the above shift operation204 205 mov ecx, edx ; check if edx:eax is zero.206 or ecx, eax207 jz .integer_exp_return208 %endif209 ; Calculate the factor for the next bit.210 fmul st0, st0211 jmp .integer_exp_loop212 213 .integer_exp_return:214 ffreep st0 ; drop the factor -> st0=result215 jmp .return_val216 217 218 ;219 ;220 ; Non-integer or value was out of range for an int64_t.221 ;222 ; The approach here is the same as in exp.asm, only we have to do the223 ; log2(base) calculation first as it's a parameter and not a constant.224 ;225 ;226 .not_integer_exp:227 228 ; First reject negative numbers. We still have the fxam(base) status in dx.229 test dx, X86_FSW_C1230 jnz .base_negative_non_integer_exp231 232 ; Swap the items on the stack, so we can process the base first.233 fxch st0, st1 ; -> st0=base; st1=exponent;234 235 ;236 ; From log2.asm:237 ;238 ; The fyl2xp1 instruction (ST1=ST1*log2(ST0+1.0), popping ST0) has a239 ; valid ST0 range of 1(1-sqrt(0.5)) (approx 0.29289321881) on both240 ; sides of zero. We try use it if we can.241 ;242 .above_one:243 ; For both fyl2xp1 and fyl2xp1 we need st1=1.0.244 fld1245 fxch st0, st1 ; -> st0=base; st1=1.0; st2=exponent246 247 ; Check if the input is within the fyl2xp1 range.248 fld qword [.s_r64AbsFyL2xP1InputMax xWrtRIP]249 fcomip st0, st1250 jbe .cannot_use_fyl2xp1251 252 fld qword [.s_r64AbsFyL2xP1InputMin xWrtRIP]253 fcomip st0, st1254 jae .cannot_use_fyl2xp1255 256 ; Do the calculation.257 .use_fyl2xp1:258 fsub st0, st1 ; -> st0=base-1; st1=1.0; st2=exponent259 fyl2xp1 ; -> st0=1.0*log2(base-1.0+1.0); st1=exponent260 jmp .done_log2261 262 .cannot_use_fyl2xp1:263 fyl2x ; -> st0=1.0*log2(base); st1=exponent264 .done_log2:265 266 ;267 ; From exp.asm:268 ;269 ; Convert to power of 2 and it'll be the same as exp2.270 ;271 fmulp ; st0=log2(base); st1=exponent -> st0=pow2exp272 273 ;274 ; Split the job in two on the fraction and integer l2base parts.275 ;276 fld st0 ; Push a copy of the pow2exp on the stack.277 frndint ; st0 = (int)pow2exp278 fsub st1, st0 ; st1 = pow2exp - (int)pow2exp; i.e. st1 = fraction, st0 = integer.279 fxch ; st0 = fraction, st1 = integer.280 281 ; 1. Calculate on the fraction.282 f2xm1 ; st0 = 2**fraction - 1.0283 fld1284 faddp ; st0 = 2**fraction285 286 ; 2. Apply the integer power of two.287 fscale ; st0 = result; st1 = integer part of pow2exp.288 fstp st1 ; st0 = result; no st1.289 290 ;291 ; Return st0.292 ;293 .return_val:294 %ifdef RT_ARCH_AMD64295 fstp qword [xBP - 10h]296 movsd xmm0, [xBP - 10h]297 %endif298 90 .return: 91 lea xSP, [xBP - xCB] 92 pop xBX 299 93 leave 300 94 ret 301 95 302 303 96 ; 97 ; But sometimes, like if we have NaN or other special inputs, we should 98 ; return the input as-is and ditch the st0 value. 304 99 ; 305 ; pow() has a lot of defined behavior for special values, which is why 306 ; this is the largest and most difficult part of the code. :-) 307 ; 308 ; On https://pubs.opengroup.org/onlinepubs/9699919799/functions/pow.html 309 ; there are 21 error conditions listed in the return value section. 310 ; The code below refers to this by number. 311 ; 312 ; When we get here: 313 ; dx=fxam(base) 314 ; cx=fxam(exponent) 315 ; st1=base 316 ; st0=exponent 317 ; 318 319 ; 320 ; 1. Finit base < 0 and finit non-interger exponent: -> domain error (#IE) + NaN. 321 ; 322 ; The non-integer exponent claim might be wrong, as we only check if it 323 ; fits into a int64_t register. But, I don't see how we can calculate 324 ; it right now. 325 ; 326 .base_negative_non_integer_exp: 327 CALL_feraiseexcept_WITH X86_FSW_IE 328 jmp .return_nan 329 330 ; 331 ; 7. Exponent = +/-0.0, any base value including NaN: return +1.0 332 ; Note! According to https://en.cppreference.com/w/c/numeric/math/pow a 333 ; domain error (#IE) occur if base=+/-0. Not implemented. 334 .exp_zero: 335 .return_plus_one: 336 %ifdef RT_ARCH_AMD64 337 movsd xmm0, qword [.s_r64PlusOne xWrtRIP] 338 %else 339 fld1 100 .return_input_reg: 101 ffreep st0 102 cmp eax, 2 103 je .return_exp 104 %ifdef RT_STRICT 105 cmp eax, 1 106 je .return_base 107 int3 340 108 %endif 341 jmp .return_pop_pop_val 342 343 ; 344 ; 6. Exponent = whatever and base = 1: Return 1.0 345 ; 10. Exponent = +/-Inf and base = -1: Return 1.0 346 ;6+10 => Exponent = +/-Inf and |base| = 1: Return 1.0 347 ; 11. Exponent = -Inf and |base| < 1: Return +Inf 348 ; 12. Exponent = -Inf and |base| > 1: Return +0 349 ; 13. Exponent = +Inf and |base| < 1: Return +0 350 ; 14. Exponent = +Inf and |base| > 1: Return +Inf 351 ; 352 ; Note! Rule 4 would trigger for the same conditions as 11 when base == 0, 353 ; but it's optional to raise div/0 and it's apparently marked as 354 ; obsolete in C23, so not implemented. 355 ; 356 .exp_inf: 357 ; Check if base is NaN or unsupported. 358 and dx, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 ; fxam(base) 359 cmp dx, X86_FSW_C0 360 jbe .return_base_nan 361 362 ; Calc fabs(base) and replace the exponent with 1.0 as we're very likely to need this here. 363 ffreep st0 364 fabs 365 fld1 ; st0=1.0; st1=|rdBase| 366 fcomi st0, st1 367 je .return_plus_one ; Matches rule 6 + 10 (base is +/-1). 368 ja .exp_inf_base_smaller_than_one 369 .exp_inf_base_larger_than_one: 370 test cx, X86_FSW_C1 ; cx=faxm(exponent); C1=sign 371 jz .return_plus_inf ; Matches rule 14 (exponent is +Inf). 372 jmp .return_plus_zero ; Matches rule 12 (exponent is -Inf). 373 374 .exp_inf_base_smaller_than_one: 375 test cx, X86_FSW_C1 ; cx=faxm(exponent); C1=sign 376 jnz .return_plus_inf ; Matches rule 11 (exponent is -Inf). 377 jmp .return_plus_zero ; Matches rule 13 (exponent is +Inf). 378 379 ; 380 ; 6. Exponent = whatever and base = 1: Return 1.0 381 ; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN. 382 ; 383 .exp_nan: 384 ; Check if base is a number and possible 1. 385 test dx, X86_FSW_C2 ; dx=fxam(base); C2 is set for finite number, infinity and denormals. 386 jz .return_exp_nan 387 fld1 388 fcomip st0, st2 389 jne .return_exp_nan 390 jmp .return_plus_one 391 392 ; 393 ; 4a. base == +/-0.0 and exp < 0 and exp is odd integer: Return +/-Inf, raise div/0. 394 ; 4b. base == +/-0.0 and exp < 0 and exp is not odd int: Return +Inf, raise div/0. 395 ; 8. base == +/-0.0 and exp > 0 and exp is odd integer: Return +/-0.0 396 ; 9. base == +/-0.0 and exp > 0 and exp is not odd int: Return +0 397 ; 398 ; Note! Exponent must be finite and non-zero if we get here. 399 ; 400 .base_zero: 401 fldz 402 fcomip st0, st1 403 jbe .base_zero_plus_exp 404 .base_zero_minus_exp: 405 mov cx, dx ; stashing fxam(base) in CX because EDX is trashed by .is_exp_odd_integer 406 call .is_exp_odd_integer ; trashes EDX but no ECX. 407 or eax, eax 408 jz .base_zero_minus_exp_not_odd_int 409 410 ; Matching 4a. 411 .base_zero_minus_exp_odd_int: 412 test cx, X86_FSW_C1 ; base sign 413 jz .raise_de_and_return_plus_inf 414 .raise_de_and_return_minus_inf: 415 CALL_feraiseexcept_WITH X86_FSW_DE 416 jmp .return_minus_inf 417 .raise_de_and_return_plus_inf: 418 CALL_feraiseexcept_WITH X86_FSW_DE 419 jmp .return_plus_inf 420 421 ; Matching 4b. 422 .base_zero_minus_exp_not_odd_int: 423 CALL_feraiseexcept_WITH X86_FSW_DE 424 jmp .return_plus_inf 425 426 .base_zero_plus_exp: 427 call .is_exp_odd_integer 428 or eax, eax 429 jnz .return_base_value ; Matching 8 430 .return_plus_zero: ; Matching 9 431 %ifdef RT_ARCH_AMD64 432 movsd xmm0, qword [.s_r64PlusZero xWrtRIP] 433 %else 434 fldz 435 %endif 436 jmp .return_pop_pop_val 437 438 ; 439 ; 15. base == -Inf and exp < 0 and exp is odd integer: Return -0 440 ; 16. base == -Inf and exp < 0 and exp is not odd int: Return +0 441 ; 17. base == -Inf and exp > 0 and exp is odd integer: Return -Inf 442 ; 18. base == -Inf and exp > 0 and exp is not odd int: Return +Inf 443 ; 19. base == +Inf and exp < 0: Return +0 444 ; 20. base == +Inf and exp > 0: Return +Inf 445 ; 446 ; Note! Exponent must be finite and non-zero if we get here. 447 ; 448 .base_inf: 449 fldz 450 fcomip st0, st1 451 jbe .base_inf_plus_exp 452 .base_inf_minus_exp: 453 test dx, X86_FSW_C1 454 jz .return_plus_zero ; Matches 19 (base == +Inf). 455 .base_minus_inf_minus_exp: 456 call .is_exp_odd_integer 457 or eax, eax 458 jz .return_plus_zero ; Matches 16 (exp not odd and < 0, base == -Inf) 459 .return_minus_zero: ; Matches 15 (exp is odd and < 0, base == -Inf) 460 %ifdef RT_ARCH_AMD64 461 movsd xmm0, qword [.s_r64MinusZero xWrtRIP] 462 %else 463 fldz 464 fchs 465 %endif 466 jmp .return_pop_pop_val 467 468 .base_inf_plus_exp: 469 test dx, X86_FSW_C1 470 jz .return_plus_inf ; Matches 20 (base == +Inf). 471 .base_minus_inf_plus_exp: 472 call .is_exp_odd_integer 473 or eax, eax 474 jnz .return_minus_inf ; Matches 17 (exp is odd and > 0, base == +Inf) 475 jmp .return_plus_inf ; Matches 18 (exp not odd and > 0, base == +Inf) 476 477 ; 478 ; Return the exponent NaN (or whatever) value. 479 ; 480 .return_exp_nan: 481 %ifdef RT_ARCH_AMD64 482 movsd xmm0, xmm1 483 %else 484 fld st0 485 %endif 486 jmp .return_pop_pop_val 487 488 ; 489 ; Return the base NaN (or whatever) value. 490 ; 491 .return_base_nan: 492 .return_base_value: 493 .base_nan: ; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN. 494 %ifdef RT_ARCH_AMD64 495 ; xmm0 = base already 496 %else 497 fld st1 498 %endif 499 jmp .return_pop_pop_val 500 501 ; 502 ; Pops the two values off the FPU stack and returns NaN. 503 ; 504 .return_nan: 505 %ifdef RT_ARCH_AMD64 506 movsd xmm0, qword [.s_r64QNan xWrtRIP] 507 %else 508 fld qword [.s_r64QNan xWrtRIP] 509 %endif 510 jmp .return_pop_pop_val 511 512 ; 513 ; Pops the two values off the FPU stack and returns +Inf. 514 ; 515 .return_plus_inf: 516 %ifdef RT_ARCH_AMD64 517 movsd xmm0, qword [.s_r64PlusInf xWrtRIP] 518 %else 519 fld qword [.s_r64PlusInf xWrtRIP] 520 %endif 521 jmp .return_pop_pop_val 522 523 ; 524 ; Pops the two values off the FPU stack and returns -Inf. 525 ; 526 .return_minus_inf: 527 %ifdef RT_ARCH_AMD64 528 movsd xmm0, qword [.s_r64MinusInf xWrtRIP] 529 %else 530 fld qword [.s_r64MinusInf xWrtRIP] 531 %endif 532 jmp .return_pop_pop_val 533 534 ; 535 ; AMD64: Return value in xmm0; Pop the two values on the FPU stack. 536 ; X86: Return st0, remove st1 and st2. 537 ; 538 .return_pop_pop_val: 539 %ifdef RT_ARCH_AMD64 540 ffreep st0 541 ffreep st0 542 %else 543 fstp st2 544 ffreep st0 545 %endif 109 .return_base: 546 110 jmp .return 547 111 548 549 ALIGNCODE(8) 550 .s_r80MaxInt: 551 dt +9223372036854775807.0 552 553 ALIGNCODE(8) 554 .s_r80MinInt: 555 dt -9223372036854775807.0 556 557 ALIGNCODE(8) 558 ;; The fyl2xp1 instruction only works between +/-1(1-sqrt(0.5)). 559 ; These two variables is that range + 1.0, so we can compare directly 560 ; with the input w/o any extra fsub and fabs work. 561 .s_r64AbsFyL2xP1InputMin: 562 dq 0.708 ; -0.292 + 1.0 563 .s_r64AbsFyL2xP1InputMax: 564 dq 1.292 565 566 .s_r64QNan: 567 dq RTFLOAT64U_QNAN_MINUS 568 .s_r64PlusInf: 569 dq RTFLOAT64U_INF_PLUS 570 .s_r64MinusInf: 571 dq RTFLOAT64U_INF_MINUS 572 %ifdef RT_ARCH_AMD64 573 .s_r64PlusOne: 574 dq +1.0 575 .s_r64PlusZero: 576 dq +0.0 577 .s_r64MinusZero: 578 dq -0.0 579 %endif 580 581 ;; 582 ; Sub-function that checks if the exponent (st0) is an odd integer or not. 583 ; 584 ; @returns eax = 1 if odd, 0 if even or not integer. 585 ; @uses eax, edx, eflags. 586 ; 587 .is_exp_odd_integer: 588 ; Save the FPU enviornment and mask all exceptions. 589 fnstenv [xBP - 30h] 590 mov ax, [xBP - 30h + X86FSTENV32P.FCW] 591 or word [xBP - 30h + X86FSTENV32P.FCW], X86_FCW_MASK_ALL 592 fldcw [xBP - 30h + X86FSTENV32P.FCW] 593 mov [xBP - 30h + X86FSTENV32P.FCW], ax 594 595 ; Convert to 64-bit integer (probably not 100% correct). 596 fld st0 ; -> st0=exponent st1=exponent; st2=base; 597 fistp qword [xBP - 10h] 598 fild qword [xBP - 10h] ; -> st0=int(exponent) st1=exponent; st2=base; 599 fcomip st0, st1 ; -> st0=exponent; st1=base; 600 jne .is_exp_odd_integer_return_false ; jump if not integer. 601 mov xAX, [xBP - 10h] 602 %ifdef 603 mov edx, [xBP - 10h + 4] 604 %endif 605 606 ; Check the lowest bit if it might be odd. 607 test al, 1 608 jz .is_exp_odd_integer_return_false ; jump if even. 609 610 ; If the result is negative, convert to positive. 611 %ifdef RT_ARCH_AMD64 612 bt rax, 63 613 %else 614 bt edx, 31 615 %endif 616 jnc .is_exp_odd_integer_positive 617 %ifdef RT_ARCH_AMD64 618 neg xAX 619 %else 620 neg edx 621 neg eax 622 sbb edx, 0 623 %endif 624 .is_exp_odd_integer_positive: 625 626 ; Now find the most significant bit in the value so we can verify that 627 ; the odd bit was part of the mantissa/fraction of the input. 628 %ifdef RT_ARCH_AMD64 629 bsr rax, rax 630 %else 631 bsr edx, edx 632 jnz .is_exp_odd_integer_high_dword_is_zero 633 lea eax, [edx + 20h] 634 jmp .is_exp_odd_integer_first_bit_in_eax 635 .is_exp_odd_integer_high_dword_is_zero: 636 bsr eax, eax 637 .is_exp_odd_integer_first_bit_in_eax: 638 %endif 639 ; The limit is 53 for double precision (one implicit bit + 52 bits fraction). 640 cmp eax, 53 641 jae .is_exp_odd_integer_return_false 642 mov eax, 1 643 jmp .is_exp_odd_integer_return 644 645 ; Return. 646 .is_exp_odd_integer_return_false: 647 xor eax, eax 648 .is_exp_odd_integer_return: 649 ffreep st0 650 fldenv [xBP - 30h] 651 ret 652 112 .return_exp: 113 movsd xmm0, xmm1 114 jmp .return 653 115 ENDPROC RT_NOCRT(pow) 654 116
Note:
See TracChangeset
for help on using the changeset viewer.