Changeset 96241 in vbox for trunk/src/VBox/Runtime/common/math
- Timestamp:
- Aug 17, 2022 1:03:48 AM (3 years ago)
- svn:sync-xref-src-repo-rev:
- 153053
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/Runtime/common/math/sin.asm
r96240 r96241 32 32 33 33 BEGINCODE 34 35 ;;36 ; Internal sine and cosine worker that calculates the sine of st0 returning37 ; it in st0.38 ;39 ; When called by a sine function, fabs(st0) >= pi/2.40 ; When called by a cosine function, fabs(original input value) >= 3pi/8.41 ;42 ; That the input isn't a tiny number close to zero, means that we can do a bit43 ; cruder rounding when operating close to a pi/2 boundrary. The value in the44 ; ecx register indicates the input precision and controls the crudeness of the45 ; rounding.46 ;47 ; @returns st0 = sine48 ; @param st0 A finite number to calucate sine of.49 ; @param ecx Set to 0 if original input was a 32-bit float.50 ; Set to 1 if original input was a 64-bit double.51 ; set to 2 if original input was a 80-bit long double.52 ;53 BEGINPROC rtNoCrtMathSinCore54 push xBP55 SEH64_PUSH_xBP56 mov xBP, xSP57 SEH64_SET_FRAME_xBP 058 SEH64_END_PROLOGUE59 60 ;61 ; Load the pointer to the rounding crudeness factor into xDX.62 ;63 lea xDX, [.s_ar64NearZero xWrtRIP]64 lea xDX, [xDX + xCX * xCB]65 66 ;67 ; Finite number. We want it in the range [0,2pi] and will preform68 ; a remainder division if it isn't.69 ;70 fcom qword [.s_r64Max xWrtRIP] ; compares st0 and 2*pi71 fnstsw ax72 test ax, X86_FSW_C3 | X86_FSW_C0 | X86_FSW_C2 ; C3 := st0 == mem; C0 := st0 < mem; C2 := unordered (should be the case);73 jz .reduce_st0 ; Jump if st0 > mem74 75 fcom qword [.s_r64Min xWrtRIP] ; compares st0 and 0.076 fnstsw ax77 test ax, X86_FSW_C3 | X86_FSW_C078 jnz .reduce_st0 ; Jump if st0 <= mem79 80 ;81 ; We get here if st0 is in the [0,2pi] range.82 ;83 ; Now, FSIN is documented to be reasonably accurate for the range84 ; -3pi/4 to +3pi/4, so we have to make some more effort to calculate85 ; in that range only.86 ;87 .in_range:88 ; if (st0 < pi)89 fldpi90 fcom st1 ; compares st0 (pi) with st1 (the normalized value)91 fnstsw ax92 test ax, X86_FSW_C0 ; st1 > pi93 jnz .larger_than_pi94 test ax, X86_FSW_C395 jnz .equals_pi96 97 ;98 ; input in the range [0,pi[99 ;100 .smaller_than_pi:101 fdiv qword [.s_r64Two xWrtRIP] ; st0 = pi/2102 103 ; if (st0 < pi/2)104 fcom st1 ; compares st0 (pi/2) with st1105 fnstsw ax106 test ax, X86_FSW_C0 ; st1 > pi107 jnz .between_half_pi_and_pi108 test ax, X86_FSW_C3109 jnz .equals_half_pi110 111 ;112 ; The value is between zero and half pi, including the zero value.113 ;114 ; This is in range where FSIN works reasonably reliably. So drop the115 ; half pi in st0 and do the calculation.116 ;117 .between_zero_and_half_pi:118 ; Check if we're so close to pi/2 that it makes no difference.119 fsub st0, st1 ; st0 = pi/2 - st1120 fcom qword [xDX]121 fnstsw ax122 test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number.123 jnz .equals_half_pi124 ffreep st0125 126 ; Check if we're so close to zero that it makes no difference given the127 ; internal accuracy of the FPU.128 fcom qword [xDX]129 fnstsw ax130 test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number.131 jnz .equals_zero_popped_one132 133 ; Ok, calculate sine.134 fsin135 jmp .return136 137 ;138 ; The value is in the range ]pi/2,pi[139 ;140 ; This is outside the comfortable FSIN range, but if we subtract PI and141 ; move to the ]-pi/2,0[ range we just have to change the sign to get142 ; the value we want.143 ;144 .between_half_pi_and_pi:145 ; Check if we're so close to pi/2 that it makes no difference.146 fsubr st0, st1 ; st0 = st1 - st0147 fcom qword [xDX]148 fnstsw ax149 test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number.150 jnz .equals_half_pi151 ffreep st0152 153 ; Check if we're so close to pi that it makes no difference.154 fldpi155 fsub st0, st1 ; st0 = st0 - st1156 fcom qword [xDX]157 fnstsw ax158 test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number.159 jnz .equals_pi160 ffreep st0161 162 ; Ok, transform the value and calculate sine.163 fldpi164 fsubp st1, st0165 166 fsin167 fchs168 jmp .return169 170 ;171 ; input in the range ]pi,2pi[172 ;173 .larger_than_pi:174 fsub st1, st0 ; st1 -= pi175 fdiv qword [.s_r64Two xWrtRIP] ; st0 = pi/2176 177 ; if (st0 < pi/2)178 fcom st1 ; compares st0 (pi/2) with reduced st1179 fnstsw ax180 test ax, X86_FSW_C0 ; st1 > pi181 jnz .between_3_half_pi_and_2pi182 test ax, X86_FSW_C3183 jnz .equals_3_half_pi184 185 ;186 ; The value is in the the range: ]pi,3pi/2[187 ;188 ; The actual st0 is in the range ]pi,pi/2[ where FSIN is performing okay189 ; and we can get the desired result by changing the sign (-FSIN).190 ;191 .between_pi_and_3_half_pi:192 ; Check if we're so close to pi/2 that it makes no difference.193 fsub st0, st1 ; st0 = pi/2 - st1194 fcom qword [xDX]195 fnstsw ax196 test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number.197 jnz .equals_3_half_pi198 ffreep st0199 200 ; Check if we're so close to zero that it makes no difference given the201 ; internal accuracy of the FPU.202 fcom qword [xDX]203 fnstsw ax204 test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number.205 jnz .equals_pi_popped206 207 ; Ok, calculate sine and flip the sign.208 fsin209 fchs210 jmp .return211 212 ;213 ; The value is in the last pi/2 of the range: ]3pi/2,2pi[214 ;215 ; Since FSIN should work reasonably well for ]-pi/2,pi], we can just216 ; subtract pi again (we subtracted pi at .larger_than_pi above) and217 ; run FSIN on it. (st1 is currently in the range ]pi/2,pi[.)218 ;219 .between_3_half_pi_and_2pi:220 ; Check if we're so close to pi/2 that it makes no difference.221 fsubr st0, st1 ; st0 = st1 - st0222 fcom qword [xDX]223 fnstsw ax224 test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number.225 jnz .equals_3_half_pi226 ffreep st0227 228 ; Check if we're so close to pi that it makes no difference.229 fldpi230 fsub st0, st1 ; st0 = st0 - st1231 fcom qword [xDX]232 fnstsw ax233 test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number.234 jnz .equals_2pi235 ffreep st0236 237 ; Ok, adjust input and calculate sine.238 fldpi239 fsubp st1, st0240 fsin241 jmp .return242 243 ;244 ; sin(0) = 0245 ; sin(pi) = 0246 ;247 .equals_zero:248 .equals_pi:249 .equals_2pi:250 ffreep st0251 .equals_zero_popped_one:252 .equals_pi_popped:253 ffreep st0254 fldz255 jmp .return256 257 ;258 ; sin(pi/2) = 1259 ;260 .equals_half_pi:261 ffreep st0262 ffreep st0263 fld1264 jmp .return265 266 ;267 ; sin(3*pi/2) = -1268 ;269 .equals_3_half_pi:270 ffreep st0271 ffreep st0272 fld1273 fchs274 jmp .return275 276 ;277 ; Return.278 ;279 .return:280 leave281 ret282 283 ;284 ; Reduce st0 by reminder division by PI*2. The result should be positive here.285 ;286 ;; @todo this is one of our weak spots (really any calculation involving PI is).287 .reduce_st0:288 fldpi289 fadd st0, st0290 fxch st1 ; st0=input (dividend) st1=2pi (divisor)291 .again:292 fprem1293 fnstsw ax294 test ah, (X86_FSW_C2 >> 8) ; C2 is set if partial result.295 jnz .again ; Loop till C2 == 0 and we have a final result.296 297 ;298 ; Make sure the result is positive.299 ;300 fxam301 fnstsw ax302 test ax, X86_FSW_C1 ; The sign bit303 jz .reduced_to_positive304 305 fadd st0, st1 ; st0 += 2pi, which should make it positive306 307 %ifdef RT_STRICT308 fxam309 fnstsw ax310 test ax, X86_FSW_C1311 jz .reduced_to_positive312 int3313 %endif314 315 .reduced_to_positive:316 fstp st1 ; Get rid of the 2pi value.317 jmp .in_range318 319 ALIGNCODE(8)320 .s_r64Max:321 dq +6.28318530717958647692 ; 2*pi322 .s_r64Min:323 dq 0.0324 .s_r64Two:325 dq 2.0326 ;;327 ; Close to 2/pi rounding limits for 32-bit, 64-bit and 80-bit floating point operations.328 ; Given that the original input is at least +/-3pi/8 (1.178) and that precision of the329 ; PI constant used during reduction/whatever, I think we can round to a whole pi/2330 ; step when we get close enough.331 ;332 ; Look to RTFLOAT64U for the format details, but 52 is the shift for the exponent field333 ; and 1023 is the exponent bias. Since the format uses an implied 1 in the mantissa,334 ; we only have to set the exponent to get a valid number.335 ;336 .s_ar64NearZero:337 dq (-18 + 1023) << 52 ; float / 32-bit / single precision input338 dq (-40 + 1023) << 52 ; double / 64-bit / double precision input339 dq (-52 + 1023) << 52 ; long double / 80-bit / extended precision input340 ENDPROC rtNoCrtMathSinCore341 34 342 35
Note:
See TracChangeset
for help on using the changeset viewer.