VirtualBox

Ignore:
Timestamp:
Aug 17, 2022 1:03:48 AM (3 years ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
153053
Message:

IPRT/nocrt: Forgot to remove rtNoCrtMathSinCore from sin.asm after moving t to sincore.asm. bugref:10261

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/Runtime/common/math/sin.asm

    r96240 r96241  
    3232
    3333BEGINCODE
    34 
    35 ;;
    36 ; Internal sine and cosine worker that calculates the sine of st0 returning
    37 ; it in st0.
    38 ;
    39 ; When called by a sine function, fabs(st0) >= pi/2.
    40 ; When called by a cosine function, fabs(original input value) >= 3pi/8.
    41 ;
    42 ; That the input isn't a tiny number close to zero, means that we can do a bit
    43 ; cruder rounding when operating close to a pi/2 boundrary.  The value in the
    44 ; ecx register indicates the input precision and controls the crudeness of the
    45 ; rounding.
    46 ;
    47 ; @returns st0 = sine
    48 ; @param   st0      A finite number to calucate sine of.
    49 ; @param   ecx      Set to 0 if original input was a 32-bit float.
    50 ;                   Set to 1 if original input was a 64-bit double.
    51 ;                   set to 2 if original input was a 80-bit long double.
    52 ;
    53 BEGINPROC   rtNoCrtMathSinCore
    54         push    xBP
    55         SEH64_PUSH_xBP
    56         mov     xBP, xSP
    57         SEH64_SET_FRAME_xBP 0
    58         SEH64_END_PROLOGUE
    59 
    60         ;
    61         ; Load the pointer to the rounding crudeness factor into xDX.
    62         ;
    63         lea     xDX, [.s_ar64NearZero xWrtRIP]
    64         lea     xDX, [xDX + xCX * xCB]
    65 
    66         ;
    67         ; Finite number.  We want it in the range [0,2pi] and will preform
    68         ; a remainder division if it isn't.
    69         ;
    70         fcom    qword [.s_r64Max xWrtRIP]   ; compares st0 and 2*pi
    71         fnstsw  ax
    72         test    ax, X86_FSW_C3 | X86_FSW_C0 | X86_FSW_C2 ; C3 := st0 == mem;  C0 := st0 < mem;  C2 := unordered (should be the case);
    73         jz      .reduce_st0                 ; Jump if st0 > mem
    74 
    75         fcom    qword [.s_r64Min xWrtRIP]   ; compares st0 and 0.0
    76         fnstsw  ax
    77         test    ax, X86_FSW_C3 | X86_FSW_C0
    78         jnz     .reduce_st0                 ; Jump if st0 <= mem
    79 
    80         ;
    81         ; We get here if st0 is in the [0,2pi] range.
    82         ;
    83         ; Now, FSIN is documented to be reasonably accurate for the range
    84         ; -3pi/4 to +3pi/4, so we have to make some more effort to calculate
    85         ; in that range only.
    86         ;
    87 .in_range:
    88         ; if (st0 < pi)
    89         fldpi
    90         fcom    st1                         ; compares st0 (pi) with st1 (the normalized value)
    91         fnstsw  ax
    92         test    ax, X86_FSW_C0              ; st1 > pi
    93         jnz     .larger_than_pi
    94         test    ax, X86_FSW_C3
    95         jnz     .equals_pi
    96 
    97         ;
    98         ; input in the range [0,pi[
    99         ;
    100 .smaller_than_pi:
    101         fdiv    qword [.s_r64Two xWrtRIP]   ; st0 = pi/2
    102 
    103         ; if (st0 < pi/2)
    104         fcom    st1                         ; compares st0 (pi/2) with st1
    105         fnstsw  ax
    106         test    ax, X86_FSW_C0              ; st1 > pi
    107         jnz     .between_half_pi_and_pi
    108         test    ax, X86_FSW_C3
    109         jnz     .equals_half_pi
    110 
    111         ;
    112         ; The value is between zero and half pi, including the zero value.
    113         ;
    114         ; This is in range where FSIN works reasonably reliably. So drop the
    115         ; half pi in st0 and do the calculation.
    116         ;
    117 .between_zero_and_half_pi:
    118         ; Check if we're so close to pi/2 that it makes no difference.
    119         fsub    st0, st1                    ; st0 = pi/2 - st1
    120         fcom    qword [xDX]
    121         fnstsw  ax
    122         test    ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number.
    123         jnz     .equals_half_pi
    124         ffreep  st0
    125 
    126         ; Check if we're so close to zero that it makes no difference given the
    127         ; internal accuracy of the FPU.
    128         fcom    qword [xDX]
    129         fnstsw  ax
    130         test    ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number.
    131         jnz     .equals_zero_popped_one
    132 
    133         ; Ok, calculate sine.
    134         fsin
    135         jmp     .return
    136 
    137         ;
    138         ; The value is in the range ]pi/2,pi[
    139         ;
    140         ; This is outside the comfortable FSIN range, but if we subtract PI and
    141         ; move to the ]-pi/2,0[ range we just have to change the sign to get
    142         ; the value we want.
    143         ;
    144 .between_half_pi_and_pi:
    145         ; Check if we're so close to pi/2 that it makes no difference.
    146         fsubr   st0, st1                    ; st0 = st1 - st0
    147         fcom    qword [xDX]
    148         fnstsw  ax
    149         test    ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number.
    150         jnz     .equals_half_pi
    151         ffreep  st0
    152 
    153         ; Check if we're so close to pi that it makes no difference.
    154         fldpi
    155         fsub    st0, st1                    ; st0 = st0 - st1
    156         fcom    qword [xDX]
    157         fnstsw  ax
    158         test    ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number.
    159         jnz     .equals_pi
    160         ffreep  st0
    161 
    162         ; Ok, transform the value and calculate sine.
    163         fldpi
    164         fsubp   st1, st0
    165 
    166         fsin
    167         fchs
    168         jmp     .return
    169 
    170         ;
    171         ; input in the range ]pi,2pi[
    172         ;
    173 .larger_than_pi:
    174         fsub    st1, st0                    ; st1 -= pi
    175         fdiv    qword [.s_r64Two xWrtRIP]   ; st0 = pi/2
    176 
    177         ; if (st0 < pi/2)
    178         fcom    st1                         ; compares st0 (pi/2) with reduced st1
    179         fnstsw  ax
    180         test    ax, X86_FSW_C0              ; st1 > pi
    181         jnz     .between_3_half_pi_and_2pi
    182         test    ax, X86_FSW_C3
    183         jnz     .equals_3_half_pi
    184 
    185         ;
    186         ; The value is in the the range: ]pi,3pi/2[
    187         ;
    188         ; The actual st0 is in the range ]pi,pi/2[ where FSIN is performing okay
    189         ; and we can get the desired result by changing the sign (-FSIN).
    190         ;
    191 .between_pi_and_3_half_pi:
    192         ; Check if we're so close to pi/2 that it makes no difference.
    193         fsub    st0, st1                    ; st0 = pi/2 - st1
    194         fcom    qword [xDX]
    195         fnstsw  ax
    196         test    ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number.
    197         jnz     .equals_3_half_pi
    198         ffreep  st0
    199 
    200         ; Check if we're so close to zero that it makes no difference given the
    201         ; internal accuracy of the FPU.
    202         fcom    qword [xDX]
    203         fnstsw  ax
    204         test    ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number.
    205         jnz     .equals_pi_popped
    206 
    207         ; Ok, calculate sine and flip the sign.
    208         fsin
    209         fchs
    210         jmp     .return
    211 
    212         ;
    213         ; The value is in the last pi/2 of the range: ]3pi/2,2pi[
    214         ;
    215         ; Since FSIN should work reasonably well for ]-pi/2,pi], we can just
    216         ; subtract pi again (we subtracted pi at .larger_than_pi above) and
    217         ; run FSIN on it.  (st1 is currently in the range ]pi/2,pi[.)
    218         ;
    219 .between_3_half_pi_and_2pi:
    220         ; Check if we're so close to pi/2 that it makes no difference.
    221         fsubr   st0, st1                    ; st0 = st1 - st0
    222         fcom    qword [xDX]
    223         fnstsw  ax
    224         test    ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number.
    225         jnz     .equals_3_half_pi
    226         ffreep  st0
    227 
    228         ; Check if we're so close to pi that it makes no difference.
    229         fldpi
    230         fsub    st0, st1                    ; st0 = st0 - st1
    231         fcom    qword [xDX]
    232         fnstsw  ax
    233         test    ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number.
    234         jnz     .equals_2pi
    235         ffreep  st0
    236 
    237         ; Ok, adjust input and calculate sine.
    238         fldpi
    239         fsubp   st1, st0
    240         fsin
    241         jmp     .return
    242 
    243         ;
    244         ; sin(0) = 0
    245         ; sin(pi) = 0
    246         ;
    247 .equals_zero:
    248 .equals_pi:
    249 .equals_2pi:
    250         ffreep  st0
    251 .equals_zero_popped_one:
    252 .equals_pi_popped:
    253         ffreep  st0
    254         fldz
    255         jmp     .return
    256 
    257         ;
    258         ; sin(pi/2) = 1
    259         ;
    260 .equals_half_pi:
    261         ffreep  st0
    262         ffreep  st0
    263         fld1
    264         jmp     .return
    265 
    266         ;
    267         ; sin(3*pi/2) = -1
    268         ;
    269 .equals_3_half_pi:
    270         ffreep  st0
    271         ffreep  st0
    272         fld1
    273         fchs
    274         jmp     .return
    275 
    276         ;
    277         ; Return.
    278         ;
    279 .return:
    280         leave
    281         ret
    282 
    283         ;
    284         ; Reduce st0 by reminder division by PI*2.  The result should be positive here.
    285         ;
    286         ;; @todo this is one of our weak spots (really any calculation involving PI is).
    287 .reduce_st0:
    288         fldpi
    289         fadd    st0, st0
    290         fxch    st1                     ; st0=input (dividend) st1=2pi (divisor)
    291 .again:
    292         fprem1
    293         fnstsw  ax
    294         test    ah, (X86_FSW_C2 >> 8)   ; C2 is set if partial result.
    295         jnz     .again                  ; Loop till C2 == 0 and we have a final result.
    296 
    297         ;
    298         ; Make sure the result is positive.
    299         ;
    300         fxam
    301         fnstsw  ax
    302         test    ax, X86_FSW_C1          ; The sign bit
    303         jz      .reduced_to_positive
    304 
    305         fadd    st0, st1                ; st0 += 2pi, which should make it positive
    306 
    307 %ifdef RT_STRICT
    308         fxam
    309         fnstsw  ax
    310         test    ax, X86_FSW_C1
    311         jz      .reduced_to_positive
    312         int3
    313 %endif
    314 
    315 .reduced_to_positive:
    316         fstp    st1                     ; Get rid of the 2pi value.
    317         jmp     .in_range
    318 
    319 ALIGNCODE(8)
    320 .s_r64Max:
    321         dq +6.28318530717958647692      ; 2*pi
    322 .s_r64Min:
    323         dq 0.0
    324 .s_r64Two:
    325         dq 2.0
    326         ;;
    327         ; Close to 2/pi rounding limits for 32-bit, 64-bit and 80-bit floating point operations.
    328         ; Given that the original input is at least +/-3pi/8 (1.178) and that precision of the
    329         ; PI constant used during reduction/whatever, I think we can round to a whole pi/2
    330         ; step when we get close enough.
    331         ;
    332         ; Look to RTFLOAT64U for the format details, but 52 is the shift for the exponent field
    333         ; and 1023 is the exponent bias.  Since the format uses an implied 1 in the mantissa,
    334         ; we only have to set the exponent to get a valid number.
    335         ;
    336 .s_ar64NearZero:
    337         dq  (-18 + 1023) << 52          ; float / 32-bit / single precision input
    338         dq  (-40 + 1023) << 52          ; double / 64-bit / double precision input
    339         dq  (-52 + 1023) << 52          ; long double / 80-bit / extended precision input
    340 ENDPROC     rtNoCrtMathSinCore
    34134
    34235
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette