Changeset 96240 in vbox
- Timestamp:
- Aug 17, 2022 12:59:31 AM (3 years ago)
- svn:sync-xref-src-repo-rev:
- 153052
- Location:
- trunk/src/VBox/Runtime
- Files:
-
- 1 added
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/Runtime/Makefile.kmk
r96213 r96240 1942 1942 common/math/sin.asm \ 1943 1943 common/math/sinf.asm \ 1944 common/math/sincore.asm \ 1944 1945 common/math/sqrt.asm \ 1945 1946 common/math/sqrtf.asm \ … … 2012 2013 common/math/sin.asm \ 2013 2014 common/math/sinf.asm \ 2015 common/math/sincore.asm \ 2014 2016 common/math/sqrt.asm \ 2015 2017 common/math/sqrtf.asm \ -
trunk/src/VBox/Runtime/common/math/cos.asm
r96060 r96240 25 25 ; 26 26 27 28 %define RT_ASM_WITH_SEH64 27 29 %include "iprt/asmdefs.mac" 30 %include "iprt/x86.mac" 31 28 32 29 33 BEGINCODE 30 34 31 35 ;; 32 ; compute the cosine of dr, measured in radians. 33 ; @returns st(0) / xmm0 36 ; Compute the cosine of rd, measured in radians. 37 ; 38 ; @returns st(0) / xmm0 34 39 ; @param rd [rbp + xCB*2] / xmm0 40 ; 35 41 RT_NOCRT_BEGINPROC cos 36 push xBP 37 mov xBP, xSP 42 push xBP 43 SEH64_PUSH_xBP 44 mov xBP, xSP 45 SEH64_SET_FRAME_xBP 0 46 sub xSP, 20h 47 SEH64_ALLOCATE_STACK 20h 48 SEH64_END_PROLOGUE 49 50 %ifdef RT_OS_WINDOWS 51 ; 52 ; Make sure we use full precision and not the windows default of 53 bits. 53 ; 54 ;; @todo not sure if this makes any difference... 55 fnstcw [xBP - 20h] 56 mov ax, [xBP - 20h] 57 or ax, X86_FCW_PC_64 ; includes both bits, so no need to clear the mask. 58 mov [xBP - 1ch], ax 59 fldcw [xBP - 1ch] 60 %endif 61 62 ; 63 ; Load the input into st0. 64 ; 38 65 %ifdef RT_ARCH_AMD64 39 sub xSP, 10h 40 41 movsd [xSP], xmm0 42 fld qword [xSP] 66 movsd [xBP - 10h], xmm0 67 fld qword [xBP - 10h] 43 68 %else 44 fld qword [xBP + xCB*2] 45 %endif 46 fcos 47 fnstsw ax 48 test ah, 4 49 jz .done 50 51 fldpi 52 fadd st0, st0 53 fxch st1 54 .again: 55 fprem1 56 fnstsw ax 57 test ah, 4 58 jnz .again 59 60 fstp st0 61 fcos 62 63 .done: 69 fld qword [xBP + xCB*2] 70 %endif 71 72 ; 73 ; The FCOS instruction has a very narrow range (-3pi/8 to 3pi/8) where it 74 ; works reliably, so outside that we'll use the FSIN instruction instead 75 ; as it has a larger good range (-5pi/4 to 1pi/4 for cosine). 76 ; Input conversion follows: cos(x) = sin(x + pi/2) 77 ; 78 ; We examin the input and weed out non-finit numbers first. 79 ; 80 81 ; We only do the range check on normal finite numbers. 82 fxam 83 fnstsw ax 84 and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 85 cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) 86 je .finite 87 cmp ax, X86_FSW_C3 ; Zero 88 je .zero 89 cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals - treat them as zero. 90 je .zero 91 cmp ax, X86_FSW_C0 ; NaN - must handle it special, 92 je .nan 93 94 ; Pass infinities and unsupported inputs to fcos, assuming it does the right thing. 95 ; We also jump here if we get a finite number in the "good" range, see below. 96 .do_fcos: 97 fcos 98 jmp .return_val 99 100 ; 101 ; Finite number. 102 ; 103 ; First check if it's a very tiny number where we can simply return 1. 104 ; Next check if it's in the range where FCOS is reasonable, otherwise 105 ; go to FSIN to do the work. 106 ; 107 .finite: 108 fld st0 109 fabs 110 fld qword [.s_r64TinyCosTo1 xWrtRIP] 111 fcomip st1 112 jbe .zero_extra_pop 113 114 .not_that_tiny_input: 115 fld qword [.s_r64FCosOkay xWrtRIP] 116 fcomip st1 117 ffreep st0 ; pop fabs(input) 118 ja .do_fcos ; jmp if fabs(input) < .s_r64FCosOkay 119 120 ; 121 ; If we have a positive number we subtract 3pi/2, for negative we add pi/2. 122 ; We still have the FXAM result in AX. 123 ; 124 .outside_fcos_range: 125 test ax, X86_FSW_C1 ; The sign bit. 126 jnz .adjust_negative_to_sine 127 128 ; Calc -3pi/2 using FPU-internal pi constant. 129 fldpi 130 fadd st0, st0 ; st0=2pi 131 fldpi 132 fdiv qword [.s_r64Two xWrtRIP] ; st1=2pi; st0=pi/2 133 fsubp st1, st0 ; st0=3pi/2 134 fchs ; st0=-3pi/2 135 jmp .make_sine_adjustment 136 137 .adjust_negative_to_sine: 138 ; Calc +pi/2. 139 fldpi 140 fdiv qword [.s_r64Two xWrtRIP] ; st1=2pi; st0=pi/2 141 142 .make_sine_adjustment: 143 faddp st1, st0 144 145 ; 146 ; Call internal sine worker to calculate st0=sin(st0) 147 ; 148 .do_sine: 149 mov ecx, 1 ; double 150 extern NAME(rtNoCrtMathSinCore) 151 call NAME(rtNoCrtMathSinCore) 152 153 ; 154 ; Return st0. 155 ; 156 .return_val: 64 157 %ifdef RT_ARCH_AMD64 65 fstp qword [xSP] 66 movsd xmm0, [xSP] 67 %endif 68 leave 69 ret 158 fstp qword [xBP - 10h] 159 movsd xmm0, [xBP - 10h] 160 %endif 161 %ifdef RT_OS_WINDOWS 162 fldcw [xBP - 20h] ; restore original 163 %endif 164 .return: 165 leave 166 ret 167 168 ; 169 ; cos(+/-0) = +1.0 170 ; 171 .zero_extra_pop: 172 ffreep st0 173 .zero: 174 ffreep st0 175 fld1 176 jmp .return_val 177 178 ; 179 ; Input is NaN, output it unmodified as far as we can (FLD changes SNaN 180 ; to QNaN when masked). 181 ; 182 .nan: 183 %ifdef RT_ARCH_AMD64 184 ffreep st0 185 %endif 186 jmp .return 187 188 ; 189 ; Local constants. 190 ; 191 ALIGNCODE(8) 192 ; About 2**-27. When fabs(input) is below this limit we can consider cos(input) ~= 1.0. 193 .s_r64TinyCosTo1: 194 dq 7.4505806e-9 195 196 ; The absolute limit for the range which FCOS is expected to produce reasonable results. 197 .s_r64FCosOkay: 198 dq 1.1780972450961724644225 ; 3*pi/8 199 200 .s_r64Two: 201 dq 2.0 70 202 ENDPROC RT_NOCRT(cos) 71 203 -
trunk/src/VBox/Runtime/common/math/sin.asm
r96060 r96240 25 25 ; 26 26 27 28 %define RT_ASM_WITH_SEH64 27 29 %include "iprt/asmdefs.mac" 30 %include "iprt/x86.mac" 31 28 32 29 33 BEGINCODE 30 34 31 35 ;; 32 ; Compute the sine of rd 33 ; @returns st(0)/xmm0 34 ; @param rd [xSP + xCB*2] / xmm0 36 ; Internal sine and cosine worker that calculates the sine of st0 returning 37 ; it in st0. 38 ; 39 ; When called by a sine function, fabs(st0) >= pi/2. 40 ; When called by a cosine function, fabs(original input value) >= 3pi/8. 41 ; 42 ; That the input isn't a tiny number close to zero, means that we can do a bit 43 ; cruder rounding when operating close to a pi/2 boundrary. The value in the 44 ; ecx register indicates the input precision and controls the crudeness of the 45 ; rounding. 46 ; 47 ; @returns st0 = sine 48 ; @param st0 A finite number to calucate sine of. 49 ; @param ecx Set to 0 if original input was a 32-bit float. 50 ; Set to 1 if original input was a 64-bit double. 51 ; set to 2 if original input was a 80-bit long double. 52 ; 53 BEGINPROC rtNoCrtMathSinCore 54 push xBP 55 SEH64_PUSH_xBP 56 mov xBP, xSP 57 SEH64_SET_FRAME_xBP 0 58 SEH64_END_PROLOGUE 59 60 ; 61 ; Load the pointer to the rounding crudeness factor into xDX. 62 ; 63 lea xDX, [.s_ar64NearZero xWrtRIP] 64 lea xDX, [xDX + xCX * xCB] 65 66 ; 67 ; Finite number. We want it in the range [0,2pi] and will preform 68 ; a remainder division if it isn't. 69 ; 70 fcom qword [.s_r64Max xWrtRIP] ; compares st0 and 2*pi 71 fnstsw ax 72 test ax, X86_FSW_C3 | X86_FSW_C0 | X86_FSW_C2 ; C3 := st0 == mem; C0 := st0 < mem; C2 := unordered (should be the case); 73 jz .reduce_st0 ; Jump if st0 > mem 74 75 fcom qword [.s_r64Min xWrtRIP] ; compares st0 and 0.0 76 fnstsw ax 77 test ax, X86_FSW_C3 | X86_FSW_C0 78 jnz .reduce_st0 ; Jump if st0 <= mem 79 80 ; 81 ; We get here if st0 is in the [0,2pi] range. 82 ; 83 ; Now, FSIN is documented to be reasonably accurate for the range 84 ; -3pi/4 to +3pi/4, so we have to make some more effort to calculate 85 ; in that range only. 86 ; 87 .in_range: 88 ; if (st0 < pi) 89 fldpi 90 fcom st1 ; compares st0 (pi) with st1 (the normalized value) 91 fnstsw ax 92 test ax, X86_FSW_C0 ; st1 > pi 93 jnz .larger_than_pi 94 test ax, X86_FSW_C3 95 jnz .equals_pi 96 97 ; 98 ; input in the range [0,pi[ 99 ; 100 .smaller_than_pi: 101 fdiv qword [.s_r64Two xWrtRIP] ; st0 = pi/2 102 103 ; if (st0 < pi/2) 104 fcom st1 ; compares st0 (pi/2) with st1 105 fnstsw ax 106 test ax, X86_FSW_C0 ; st1 > pi 107 jnz .between_half_pi_and_pi 108 test ax, X86_FSW_C3 109 jnz .equals_half_pi 110 111 ; 112 ; The value is between zero and half pi, including the zero value. 113 ; 114 ; This is in range where FSIN works reasonably reliably. So drop the 115 ; half pi in st0 and do the calculation. 116 ; 117 .between_zero_and_half_pi: 118 ; Check if we're so close to pi/2 that it makes no difference. 119 fsub st0, st1 ; st0 = pi/2 - st1 120 fcom qword [xDX] 121 fnstsw ax 122 test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number. 123 jnz .equals_half_pi 124 ffreep st0 125 126 ; Check if we're so close to zero that it makes no difference given the 127 ; internal accuracy of the FPU. 128 fcom qword [xDX] 129 fnstsw ax 130 test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number. 131 jnz .equals_zero_popped_one 132 133 ; Ok, calculate sine. 134 fsin 135 jmp .return 136 137 ; 138 ; The value is in the range ]pi/2,pi[ 139 ; 140 ; This is outside the comfortable FSIN range, but if we subtract PI and 141 ; move to the ]-pi/2,0[ range we just have to change the sign to get 142 ; the value we want. 143 ; 144 .between_half_pi_and_pi: 145 ; Check if we're so close to pi/2 that it makes no difference. 146 fsubr st0, st1 ; st0 = st1 - st0 147 fcom qword [xDX] 148 fnstsw ax 149 test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number. 150 jnz .equals_half_pi 151 ffreep st0 152 153 ; Check if we're so close to pi that it makes no difference. 154 fldpi 155 fsub st0, st1 ; st0 = st0 - st1 156 fcom qword [xDX] 157 fnstsw ax 158 test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number. 159 jnz .equals_pi 160 ffreep st0 161 162 ; Ok, transform the value and calculate sine. 163 fldpi 164 fsubp st1, st0 165 166 fsin 167 fchs 168 jmp .return 169 170 ; 171 ; input in the range ]pi,2pi[ 172 ; 173 .larger_than_pi: 174 fsub st1, st0 ; st1 -= pi 175 fdiv qword [.s_r64Two xWrtRIP] ; st0 = pi/2 176 177 ; if (st0 < pi/2) 178 fcom st1 ; compares st0 (pi/2) with reduced st1 179 fnstsw ax 180 test ax, X86_FSW_C0 ; st1 > pi 181 jnz .between_3_half_pi_and_2pi 182 test ax, X86_FSW_C3 183 jnz .equals_3_half_pi 184 185 ; 186 ; The value is in the the range: ]pi,3pi/2[ 187 ; 188 ; The actual st0 is in the range ]pi,pi/2[ where FSIN is performing okay 189 ; and we can get the desired result by changing the sign (-FSIN). 190 ; 191 .between_pi_and_3_half_pi: 192 ; Check if we're so close to pi/2 that it makes no difference. 193 fsub st0, st1 ; st0 = pi/2 - st1 194 fcom qword [xDX] 195 fnstsw ax 196 test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number. 197 jnz .equals_3_half_pi 198 ffreep st0 199 200 ; Check if we're so close to zero that it makes no difference given the 201 ; internal accuracy of the FPU. 202 fcom qword [xDX] 203 fnstsw ax 204 test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number. 205 jnz .equals_pi_popped 206 207 ; Ok, calculate sine and flip the sign. 208 fsin 209 fchs 210 jmp .return 211 212 ; 213 ; The value is in the last pi/2 of the range: ]3pi/2,2pi[ 214 ; 215 ; Since FSIN should work reasonably well for ]-pi/2,pi], we can just 216 ; subtract pi again (we subtracted pi at .larger_than_pi above) and 217 ; run FSIN on it. (st1 is currently in the range ]pi/2,pi[.) 218 ; 219 .between_3_half_pi_and_2pi: 220 ; Check if we're so close to pi/2 that it makes no difference. 221 fsubr st0, st1 ; st0 = st1 - st0 222 fcom qword [xDX] 223 fnstsw ax 224 test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number. 225 jnz .equals_3_half_pi 226 ffreep st0 227 228 ; Check if we're so close to pi that it makes no difference. 229 fldpi 230 fsub st0, st1 ; st0 = st0 - st1 231 fcom qword [xDX] 232 fnstsw ax 233 test ax, X86_FSW_C0 | X86_FSW_C3 ; st0 <= very small positive number. 234 jnz .equals_2pi 235 ffreep st0 236 237 ; Ok, adjust input and calculate sine. 238 fldpi 239 fsubp st1, st0 240 fsin 241 jmp .return 242 243 ; 244 ; sin(0) = 0 245 ; sin(pi) = 0 246 ; 247 .equals_zero: 248 .equals_pi: 249 .equals_2pi: 250 ffreep st0 251 .equals_zero_popped_one: 252 .equals_pi_popped: 253 ffreep st0 254 fldz 255 jmp .return 256 257 ; 258 ; sin(pi/2) = 1 259 ; 260 .equals_half_pi: 261 ffreep st0 262 ffreep st0 263 fld1 264 jmp .return 265 266 ; 267 ; sin(3*pi/2) = -1 268 ; 269 .equals_3_half_pi: 270 ffreep st0 271 ffreep st0 272 fld1 273 fchs 274 jmp .return 275 276 ; 277 ; Return. 278 ; 279 .return: 280 leave 281 ret 282 283 ; 284 ; Reduce st0 by reminder division by PI*2. The result should be positive here. 285 ; 286 ;; @todo this is one of our weak spots (really any calculation involving PI is). 287 .reduce_st0: 288 fldpi 289 fadd st0, st0 290 fxch st1 ; st0=input (dividend) st1=2pi (divisor) 291 .again: 292 fprem1 293 fnstsw ax 294 test ah, (X86_FSW_C2 >> 8) ; C2 is set if partial result. 295 jnz .again ; Loop till C2 == 0 and we have a final result. 296 297 ; 298 ; Make sure the result is positive. 299 ; 300 fxam 301 fnstsw ax 302 test ax, X86_FSW_C1 ; The sign bit 303 jz .reduced_to_positive 304 305 fadd st0, st1 ; st0 += 2pi, which should make it positive 306 307 %ifdef RT_STRICT 308 fxam 309 fnstsw ax 310 test ax, X86_FSW_C1 311 jz .reduced_to_positive 312 int3 313 %endif 314 315 .reduced_to_positive: 316 fstp st1 ; Get rid of the 2pi value. 317 jmp .in_range 318 319 ALIGNCODE(8) 320 .s_r64Max: 321 dq +6.28318530717958647692 ; 2*pi 322 .s_r64Min: 323 dq 0.0 324 .s_r64Two: 325 dq 2.0 326 ;; 327 ; Close to 2/pi rounding limits for 32-bit, 64-bit and 80-bit floating point operations. 328 ; Given that the original input is at least +/-3pi/8 (1.178) and that precision of the 329 ; PI constant used during reduction/whatever, I think we can round to a whole pi/2 330 ; step when we get close enough. 331 ; 332 ; Look to RTFLOAT64U for the format details, but 52 is the shift for the exponent field 333 ; and 1023 is the exponent bias. Since the format uses an implied 1 in the mantissa, 334 ; we only have to set the exponent to get a valid number. 335 ; 336 .s_ar64NearZero: 337 dq (-18 + 1023) << 52 ; float / 32-bit / single precision input 338 dq (-40 + 1023) << 52 ; double / 64-bit / double precision input 339 dq (-52 + 1023) << 52 ; long double / 80-bit / extended precision input 340 ENDPROC rtNoCrtMathSinCore 341 342 343 ;; 344 ; Compute the sine of rd, measured in radians. 345 ; 346 ; @returns st(0) / xmm0 347 ; @param rd [rbp + xCB*2] / xmm0 348 ; 35 349 RT_NOCRT_BEGINPROC sin 36 push xBP 37 mov xBP, xSP 38 350 push xBP 351 SEH64_PUSH_xBP 352 mov xBP, xSP 353 SEH64_SET_FRAME_xBP 0 354 sub xSP, 20h 355 SEH64_ALLOCATE_STACK 20h 356 SEH64_END_PROLOGUE 357 358 %ifdef RT_OS_WINDOWS 359 ; 360 ; Make sure we use full precision and not the windows default of 53 bits. 361 ; 362 fnstcw [xBP - 20h] 363 mov ax, [xBP - 20h] 364 or ax, X86_FCW_PC_64 ; includes both bits, so no need to clear the mask. 365 mov [xBP - 1ch], ax 366 fldcw [xBP - 1ch] 367 %endif 368 369 ; 370 ; Load the input into st0. 371 ; 39 372 %ifdef RT_ARCH_AMD64 40 sub xSP, 10h 41 42 movsd [xSP], xmm0 43 fld qword [xSP] 373 movsd [xBP - 10h], xmm0 374 fld qword [xBP - 10h] 44 375 %else 45 fld qword [xBP + xCB*2] 46 %endif 47 fsin 48 fnstsw ax 49 test ah, 04h 50 jz .done 51 52 fldpi 53 fadd st0 54 fxch st1 55 .again: 56 fprem1 57 fnstsw ax 58 test ah, 04h 59 jnz .again 60 fstp st1 61 fsin 62 63 .done: 376 fld qword [xBP + xCB*2] 377 %endif 378 379 ; 380 ; We examin the input and weed out non-finit numbers first. 381 ; 382 fxam 383 fnstsw ax 384 and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 385 cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero) 386 je .finite 387 cmp ax, X86_FSW_C3 ; Zero 388 je .zero 389 cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals - treat them as zero. 390 je .zero 391 cmp ax, X86_FSW_C0 ; NaN - must handle it special, 392 je .nan 393 394 ; Pass infinities and unsupported inputs to fsin, assuming it does the right thing. 395 .do_sin: 396 fsin 397 jmp .return_val 398 399 ; 400 ; Finite number. 401 ; 402 .finite: 403 ; For very tiny numbers, 0 < abs(input) < 2**-25, we can return the 404 ; input value directly. 405 fld st0 ; duplicate st0 406 fabs ; make it an absolute (positive) value. 407 fld qword [.s_r64Tiny xWrtRIP] 408 fcomip st1 ; compare s_r64Tiny and fabs(input) 409 ja .return_tiny_number_as_is ; jump if fabs(input) is smaller 410 411 ; FSIN is documented to be reasonable for the range ]-3pi/4,3pi/4[, so 412 ; while we have fabs(input) loaded already, check for that here and 413 ; allow rtNoCrtMathSinCore to assume it won't see values very close to 414 ; zero, except by cos -> sin conversion where they won't be relevant to 415 ; any assumpttions about precision approximation. 416 fld qword [.s_r64FSinOkay xWrtRIP] 417 fcomip st1 418 ffreep st0 ; drop the fabs(input) value 419 ja .do_sin 420 421 ; 422 ; Call common sine/cos worker. 423 ; 424 mov ecx, 1 ; double 425 extern NAME(rtNoCrtMathSinCore) 426 call NAME(rtNoCrtMathSinCore) 427 428 ; 429 ; Run st0. 430 ; 431 .return_val: 64 432 %ifdef RT_ARCH_AMD64 65 fstp qword [xSP] 66 movsd xmm0, [xSP] 67 %endif 68 leave 69 ret 433 fstp qword [xBP - 10h] 434 movsd xmm0, [xBP - 10h] 435 %endif 436 %ifdef RT_OS_WINDOWS 437 fldcw [xBP - 20h] ; restore original 438 %endif 439 .return: 440 leave 441 ret 442 443 ; 444 ; As explained already, we can return tiny numbers directly too as the 445 ; output from sin(input) = input given our precision. 446 ; We can skip the st0 -> xmm0 translation here, so follow the same path 447 ; as .zero & .nan, after we've removed the fabs(input) value. 448 ; 449 .return_tiny_number_as_is: 450 ffreep st0 451 452 ; 453 ; sin(+/-0.0) = +/-0.0 (preserve the sign) 454 ; We can skip the st0 -> xmm0 translation here, so follow the .nan code path. 455 ; 456 .zero: 457 458 ; 459 ; Input is NaN, output it unmodified as far as we can (FLD changes SNaN 460 ; to QNaN when masked). 461 ; 462 .nan: 463 %ifdef RT_ARCH_AMD64 464 ffreep st0 465 %endif 466 jmp .return 467 468 ALIGNCODE(8) 469 ; Ca. 2**-26, absolute value. Inputs closer to zero than this can be 470 ; returns directly as the sin(input) value should be basically the same 471 ; given the precision we're working with and FSIN probably won't even 472 ; manage that. 473 ;; @todo experiment when FSIN gets better than this. 474 .s_r64Tiny: 475 dq 1.49011612e-8 476 ; The absolute limit of FSIN "good" range. 477 .s_r64FSinOkay: 478 dq 2.356194490192344928845 ; 3pi/4 479 ;dq 1.57079632679489661923 ; pi/2 - alternative. 480 70 481 ENDPROC RT_NOCRT(sin) 71 482 -
trunk/src/VBox/Runtime/testcase/Makefile.kmk
r96213 r96240 677 677 ../common/math/sin.asm \ 678 678 ../common/math/sinf.asm \ 679 ../common/math/sincore.asm \ 679 680 ../common/math/sqrt.asm \ 680 681 ../common/math/sqrtf.asm \ -
trunk/src/VBox/Runtime/testcase/tstRTNoCrt-2.cpp
r96224 r96240 51 51 #endif 52 52 53 /* Stuff we provide in our math, but UCRT apparently doesn't: */ 54 #ifndef M_E 55 # define M_E 2.7182818284590452354 /* e */ 56 #endif 57 #ifndef M_LOG2E 58 # define M_LOG2E 1.4426950408889634074 /* log 2e */ 59 #endif 60 #ifndef M_LOG10E 61 # define M_LOG10E 0.43429448190325182765 /* log 10e */ 62 #endif 63 #ifndef M_LN2 64 # define M_LN2 0.69314718055994530942 /* log e2 */ 65 #endif 66 #ifndef M_LN10 67 # define M_LN10 2.30258509299404568402 /* log e10 */ 68 #endif 69 #ifndef M_PI 70 # define M_PI 3.14159265358979323846 /* pi */ 71 #endif 72 #ifndef M_PI_2 73 # define M_PI_2 1.57079632679489661923 /* pi/2 */ 74 #endif 75 #ifndef M_PI_4 76 # define M_PI_4 0.78539816339744830962 /* pi/4 */ 77 #endif 78 #ifndef M_1_PI 79 # define M_1_PI 0.31830988618379067154 /* 1/pi */ 80 #endif 81 #ifndef M_2_PI 82 # define M_2_PI 0.63661977236758134308 /* 2/pi */ 83 #endif 84 #ifndef M_2_SQRTPI 85 # define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */ 86 #endif 87 #ifndef M_SQRT2 88 # define M_SQRT2 1.41421356237309504880 /* sqrt(2) */ 89 #endif 90 #ifndef M_SQRT1_2 91 # define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */ 92 #endif 93 53 94 54 95 /********************************************************************************************************************************* … … 280 321 } while (0) 281 322 323 #define CHECK_DBL_RANGE(a_Expr, a_rdExpect, a_rdPlusMin) do { \ 324 RTFLOAT64U uRet; \ 325 uRet.r = a_Expr; \ 326 RTFLOAT64U uExpectMin; \ 327 uExpectMin.r = (a_rdExpect) - (a_rdPlusMin); \ 328 RTFLOAT64U uExpectMax; \ 329 uExpectMax.r = (a_rdExpect) + (a_rdPlusMin); \ 330 if ( !(RTFLOAT64U_IS_NORMAL(&uRet) || RTFLOAT64U_IS_ZERO(&uRet))\ 331 || uRet.r < uExpectMin.r \ 332 || uRet.r > uExpectMax.r ) \ 333 { \ 334 RTStrFormatR64(g_szFloat[0], sizeof(g_szFloat[0]), &uRet, 0, 0, RTSTR_F_SPECIAL); \ 335 RTStrFormatR64(g_szFloat[1], sizeof(g_szFloat[1]), &uExpectMin, 0, 0, RTSTR_F_SPECIAL); \ 336 RTStrFormatR64(g_szFloat[2], sizeof(g_szFloat[2]), &uExpectMax, 0, 0, RTSTR_F_SPECIAL); \ 337 RTTestFailed(g_hTest, "line %u: %s -> %s, expected [%s,%s] (%s +/- %s)", \ 338 __LINE__, #a_Expr, g_szFloat[0], g_szFloat[1], #a_rdExpect, #a_rdPlusMin); \ 339 } \ 340 } while (0) 341 282 342 #define CHECK_DBL_SAME_RELAXED_NAN(a_Fn, a_Args) do { \ 283 343 RTFLOAT64U uNoCrtRet, uCrtRet; \ … … 361 421 *********************************************************************************************************************************/ 362 422 RTTEST g_hTest; 363 char g_szFloat[ 2][128];423 char g_szFloat[4][128]; 364 424 365 425 … … 2694 2754 RTTestSub(g_hTest, "atan[f]"); 2695 2755 2696 CHECK_DBL(RT_NOCRT(atan)( +1.0), + 0.78539816339744830962 /*+M_PI_4*/);2697 CHECK_DBL(RT_NOCRT(atan)( -1.0), - 0.78539816339744830962 /*-M_PI_4*/);2698 CHECK_DBL(RT_NOCRT(atan)( +INFINITY), + 1.57079632679489661923 /*+M_PI_2*/);2699 CHECK_DBL(RT_NOCRT(atan)( -INFINITY), - 1.57079632679489661923 /*-M_PI_2*/);2756 CHECK_DBL(RT_NOCRT(atan)( +1.0), +M_PI_4); 2757 CHECK_DBL(RT_NOCRT(atan)( -1.0), -M_PI_4); 2758 CHECK_DBL(RT_NOCRT(atan)( +INFINITY), +M_PI_2); 2759 CHECK_DBL(RT_NOCRT(atan)( -INFINITY), -M_PI_2); 2700 2760 CHECK_DBL_SAME( atan,( 1.0)); 2701 2761 CHECK_DBL_SAME( atan,( 1.5)); … … 2723 2783 CHECK_DBL_SAME( atan,(RTStrNanDouble("s", false))); 2724 2784 2725 CHECK_DBL(RT_NOCRT(atanf)( +1.0f), +0.78539816339744830962f /*+M_PI_4*/);2726 CHECK_DBL(RT_NOCRT(atanf)( -1.0f), -0.78539816339744830962f /*-M_PI_4*/);2727 CHECK_DBL(RT_NOCRT(atanf)( +INFINITY), +1.57079632679489661923f /*+M_PI_2*/);2728 CHECK_DBL(RT_NOCRT(atanf)( -INFINITY), -1.57079632679489661923f /*-M_PI_2*/);2785 CHECK_DBL(RT_NOCRT(atanf)( +1.0f), (float)+M_PI_4); 2786 CHECK_DBL(RT_NOCRT(atanf)( -1.0f), (float)-M_PI_4); 2787 CHECK_DBL(RT_NOCRT(atanf)( +INFINITY), (float)+M_PI_2); 2788 CHECK_DBL(RT_NOCRT(atanf)( -INFINITY), (float)-M_PI_2); 2729 2789 CHECK_DBL_SAME( atanf,( 1.0f)); 2730 2790 CHECK_DBL_SAME( atanf,( 1.5f)); … … 2748 2808 CHECK_DBL_SAME( atanf,(2.34960584706e+30f)); 2749 2809 CHECK_DBL_SAME( atanf,(2.34960584706e+30f)); 2750 CHECK_DBL_SAME( atanf,(RTStrNanDouble(NULL, true))); 2751 CHECK_DBL_SAME( atanf,(RTStrNanDouble("s", true))); 2752 CHECK_DBL_SAME( atanf,(RTStrNanDouble("s", false))); 2753 2810 CHECK_DBL_SAME( atanf,(RTStrNanFloat(NULL, true))); 2811 CHECK_DBL_SAME( atanf,(RTStrNanFloat("s", true))); 2812 CHECK_DBL_SAME( atanf,(RTStrNanFloat("s", false))); 2754 2813 } 2755 2814 2815 2816 void testATan2() 2817 { 2818 RTTestSub(g_hTest, "atan2[f]"); 2819 2820 CHECK_DBL(RT_NOCRT(atan2)( +1.0, 0.0), +M_PI_2); 2821 CHECK_DBL(RT_NOCRT(atan2)( -1.0, 0.0), -M_PI_2); 2822 CHECK_DBL(RT_NOCRT(atan2)( +1.0, +1.0), +M_PI_4); 2823 CHECK_DBL(RT_NOCRT(atan2)( -1.0, -1.0), -M_PI_2 - M_PI_4); 2824 CHECK_DBL_SAME( atan2,( +1.0, 0.0)); 2825 CHECK_DBL_SAME( atan2,( +1.0, -0.0)); 2826 CHECK_DBL_SAME( atan2,( -1.0, 0.0)); 2827 CHECK_DBL_SAME( atan2,( -1.0, -0.0)); 2828 CHECK_DBL_SAME( atan2,( +1.0, +1.0)); 2829 CHECK_DBL_SAME( atan2,( -1.0, +1.0)); 2830 CHECK_DBL_SAME( atan2,( +1.0, -1.0)); 2831 CHECK_DBL_SAME( atan2,( -1.0, -1.0)); 2832 CHECK_DBL_SAME( atan2,( 238.6634566, -999999.0)); 2833 CHECK_DBL_SAME( atan2,( -905698045.1, 490876.0)); 2834 CHECK_DBL_SAME( atan2,( 1.333334e-10, -1.9993e+200)); 2835 CHECK_DBL_SAME( atan2,( 1.333334e+168, -1.9993e+299)); 2836 CHECK_DBL_SAME( atan2,( +DBL_MAX, +DBL_MAX)); 2837 CHECK_DBL_SAME( atan2,( -DBL_MAX, +DBL_MAX)); 2838 CHECK_DBL_SAME( atan2,( +INFINITY, +INFINITY)); 2839 CHECK_DBL_SAME( atan2,( -INFINITY, +INFINITY)); 2840 CHECK_DBL_SAME( atan2,( -INFINITY, 42.242424)); 2841 CHECK_DBL_SAME( atan2,(RTStrNanDouble(NULL, true), RTStrNanDouble(NULL, true))); 2842 CHECK_DBL_SAME( atan2,(RTStrNanDouble(NULL, false), RTStrNanDouble(NULL, false))); 2843 CHECK_DBL_SAME( atan2,(RTStrNanDouble(NULL, false), RTStrNanDouble(NULL, true))); 2844 //CHECK_DBL_SAME( atan2,(RTStrNanDouble(NULL, true), RTStrNanDouble(NULL, false))); - UCRT returns -QNaN, we +QNaN 2845 CHECK_DBL_SAME( atan2,(RTStrNanDouble(NULL, true), RTStrNanDouble("s", false))); 2846 2847 CHECK_FLT(RT_NOCRT(atan2f)( +1.0f, 0.0f), (float)+M_PI_2); 2848 CHECK_FLT(RT_NOCRT(atan2f)( -1.0f, 0.0f), (float)-M_PI_2); 2849 CHECK_FLT(RT_NOCRT(atan2f)( +1.0f, +1.0f), (float)+M_PI_4); 2850 CHECK_FLT(RT_NOCRT(atan2f)( -1.0f, -1.0f), (float)(-M_PI_2 - M_PI_4)); 2851 CHECK_FLT_SAME( atan2f,( +1.0f, 0.0f)); 2852 CHECK_FLT_SAME( atan2f,( +1.0f, -0.0f)); 2853 CHECK_FLT_SAME( atan2f,( -1.0f, 0.0f)); 2854 CHECK_FLT_SAME( atan2f,( -1.0f, -0.0f)); 2855 CHECK_FLT_SAME( atan2f,( +1.0f, +1.0f)); 2856 CHECK_FLT_SAME( atan2f,( -1.0f, +1.0f)); 2857 CHECK_FLT_SAME( atan2f,( +1.0f, -1.0f)); 2858 CHECK_FLT_SAME( atan2f,( -1.0f, -1.0f)); 2859 CHECK_FLT_SAME( atan2f,( 238.6634566f, -999999.0f)); 2860 CHECK_FLT_SAME( atan2f,( -905698045.1f, 490876.0f)); 2861 CHECK_FLT_SAME( atan2f,( 1.333334e-10f, -1.9993e+20f)); 2862 CHECK_FLT_SAME( atan2f,( 1.333334e+35f, -1.9993e+29f)); 2863 CHECK_FLT_SAME( atan2f,( +FLT_MAX, +FLT_MAX)); 2864 CHECK_FLT_SAME( atan2f,( -FLT_MAX, +FLT_MAX)); 2865 CHECK_FLT_SAME( atan2f,( +INFINITY, +INFINITY)); 2866 CHECK_FLT_SAME( atan2f,( -INFINITY, +INFINITY)); 2867 CHECK_FLT_SAME( atan2f,( -INFINITY, 42.242424f)); 2868 CHECK_FLT_SAME( atan2f,(RTStrNanFloat(NULL, true), RTStrNanFloat(NULL, true))); 2869 CHECK_FLT_SAME( atan2f,(RTStrNanFloat(NULL, false), RTStrNanFloat(NULL, false))); 2870 CHECK_FLT_SAME( atan2f,(RTStrNanFloat(NULL, false), RTStrNanFloat(NULL, true))); 2871 //CHECK_FLT_SAME( atan2f,(RTStrNanFloat(NULL, true), RTStrNanFloat(NULL, false))); - UCRT returns -QNaN, we +QNaN 2872 CHECK_FLT_SAME( atan2f,(RTStrNanFloat(NULL, true), RTStrNanFloat("s", false))); 2873 } 2874 2875 2876 void testSin() 2877 { 2878 RTTestSub(g_hTest, "sin[f]"); 2879 2880 /* 2881 * Note! sin, cos and friends are complicated the results may differ between 2882 * implementations. The numbers below was computed using amd64 glibc 2883 * (2.27-3ubuntu1.4) sinl() and a %.33Lf printf. 2884 * 2885 * Our code is based on the x87 CPU and does not have the best 2886 * reduction code is inaccurate, so accuracy drops. Also, with the 2887 * input accuracy difference we must expect differences too. 2888 */ 2889 CHECK_DBL( RT_NOCRT(sin)( +0.0), +0.0); 2890 CHECK_DBL( RT_NOCRT(sin)( -0.0), -0.0); 2891 CHECK_DBL( RT_NOCRT(sin)( +M_PI), +0.0); 2892 CHECK_DBL( RT_NOCRT(sin)( -M_PI), +0.0); 2893 CHECK_DBL( RT_NOCRT(sin)( +M_PI_2), +1.0); 2894 CHECK_DBL( RT_NOCRT(sin)( -M_PI_2), -1.0); 2895 CHECK_DBL( RT_NOCRT(sin)( +M_PI_2 + M_PI*4), +1.0); 2896 CHECK_DBL( RT_NOCRT(sin)( -M_PI_2 - M_PI*4), -1.0); 2897 2898 CHECK_DBL( RT_NOCRT(sin)( +M_PI_2 + M_PI*2), +1.0); 2899 CHECK_DBL( RT_NOCRT(sin)( -M_PI_2 - M_PI*2), -1.0); 2900 CHECK_DBL( RT_NOCRT(sin)( +1.0), +0.84147098480789650488); 2901 CHECK_DBL( RT_NOCRT(sin)( +2.0), +0.90929742682568170942); 2902 CHECK_DBL( RT_NOCRT(sin)( +3.0), +0.14112000805986721352); 2903 CHECK_DBL( RT_NOCRT(sin)( +4.0), -0.75680249530792820245); 2904 CHECK_DBL( RT_NOCRT(sin)( +5.0), -0.95892427466313845397); 2905 CHECK_DBL( RT_NOCRT(sin)( +6.0), -0.27941549819892586015); 2906 CHECK_DBL( RT_NOCRT(sin)( +7.0), +0.65698659871878906102); 2907 CHECK_DBL( RT_NOCRT(sin)( +8.0), +0.98935824662338178737); 2908 CHECK_DBL( RT_NOCRT(sin)( +9.0), +0.41211848524175659358); 2909 CHECK_DBL( RT_NOCRT(sin)( +10.0), -0.54402111088936977445); 2910 CHECK_DBL( RT_NOCRT(sin)( +100.0), -0.50636564110975879061); 2911 CHECK_DBL( RT_NOCRT(sin)( +654.216812456), +0.69292681127157818022); 2912 CHECK_DBL( RT_NOCRT(sin)( 10.1010101010101010101010), -0.62585878258501614901); 2913 CHECK_DBL( RT_NOCRT(sin)( +25.2525252525252525252525), +0.11949778146891366915); 2914 CHECK_DBL( RT_NOCRT(sin)( +252.2525252525252525252525), +0.79868874455343841223); 2915 CHECK_DBL( RT_NOCRT(sin)( +2525.2525252525252525252525), -0.55467159842968405403); 2916 CHECK_DBL_RANGE(RT_NOCRT(sin)( +25252.2525252525252525252525), +0.13040325588994761130, 0.0000000000000010000); 2917 CHECK_DBL_RANGE(RT_NOCRT(sin)(+252525.2525252525252525252525), -0.77923047482990159818, 0.0000000000000100000); 2918 2919 CHECK_DBL( RT_NOCRT(sin)( -1.0), -0.84147098480789650488); 2920 CHECK_DBL( RT_NOCRT(sin)( -2.0), -0.90929742682568170942); 2921 CHECK_DBL( RT_NOCRT(sin)( -3.0), -0.14112000805986721352); 2922 CHECK_DBL( RT_NOCRT(sin)( -4.0), +0.75680249530792820245); 2923 CHECK_DBL( RT_NOCRT(sin)( -5.0), +0.95892427466313845397); 2924 CHECK_DBL( RT_NOCRT(sin)( -6.0), +0.27941549819892586015); 2925 CHECK_DBL( RT_NOCRT(sin)( -7.0), -0.65698659871878906102); 2926 CHECK_DBL( RT_NOCRT(sin)( -8.0), -0.98935824662338178737); 2927 CHECK_DBL( RT_NOCRT(sin)( -9.0), -0.41211848524175659358); 2928 CHECK_DBL( RT_NOCRT(sin)( -10.0), +0.54402111088936977445); 2929 CHECK_DBL( RT_NOCRT(sin)( -100.0), +0.50636564110975879061); 2930 CHECK_DBL( RT_NOCRT(sin)( -654.216812456), -0.69292681127157818022); 2931 CHECK_DBL( RT_NOCRT(sin)( -10.1010101010101010101010), +0.62585878258501614901); 2932 CHECK_DBL( RT_NOCRT(sin)( -25.2525252525252525252525), -0.11949778146891366915); 2933 CHECK_DBL( RT_NOCRT(sin)( -252.2525252525252525252525), -0.79868874455343841223); 2934 CHECK_DBL( RT_NOCRT(sin)( -2525.2525252525252525252525), +0.55467159842968405403); 2935 CHECK_DBL_RANGE(RT_NOCRT(sin)( -25252.2525252525252525252525), -0.13040325588994761130, 0.0000000000000010000); 2936 CHECK_DBL_RANGE(RT_NOCRT(sin)(-252525.2525252525252525252525), +0.77923047482990159818, 0.0000000000000100000); 2937 CHECK_DBL( RT_NOCRT(sin)( RTStrNanDouble("s", true)), RTStrNanDouble("s", true)); 2938 CHECK_DBL( RT_NOCRT(sin)(RTStrNanDouble("9999s", false)), RTStrNanDouble("9999s", false)); 2939 2940 CHECK_DBL_SAME( sin,( 1.0)); 2941 CHECK_DBL_SAME( sin,( 1.5)); 2942 CHECK_DBL_SAME( sin,( +0.0)); 2943 CHECK_DBL_SAME( sin,( +0.0)); 2944 CHECK_DBL_SAME( sin,( -0.0)); 2945 CHECK_DBL_SAME( sin,( -0.0)); 2946 CHECK_DBL_SAME( sin,( -10.0)); 2947 #if 0 /* UCRT returns tiny fractions for these in the 2**-53 range, we return 0.0 */ 2948 CHECK_DBL_SAME( sin,( +M_PI)); 2949 CHECK_DBL_SAME( sin,( -M_PI)); 2950 #endif 2951 CHECK_DBL_SAME( sin,( +M_PI_2)); 2952 CHECK_DBL_SAME( sin,( -M_PI_2)); 2953 CHECK_DBL_SAME( sin,( +INFINITY)); 2954 CHECK_DBL_SAME( sin,( -INFINITY)); 2955 CHECK_DBL_SAME( sin,(RTStrNanDouble(NULL, true))); 2956 #if 0 /*UCRT converts these to quiet ones, we check above */ 2957 //CHECK_DBL_SAME( sin,(RTStrNanDouble("s", true))); 2958 //CHECK_DBL_SAME( sin,(RTStrNanDouble("s", false))); 2959 #endif 2960 2961 2962 2963 } 2964 2965 2966 void testCos() 2967 { 2968 RTTestSub(g_hTest, "cos[f]"); 2969 2970 CHECK_DBL(RT_NOCRT(cos)( +0.0), 1.0); 2971 CHECK_DBL( cos( +0.0), 1.0); 2972 CHECK_DBL(RT_NOCRT(cos)( +M_PI), -1.0); 2973 CHECK_DBL( cos( +M_PI), -1.0); 2974 CHECK_DBL(RT_NOCRT(cos)( -M_PI), -1.0); 2975 CHECK_DBL( cos( -M_PI), -1.0); 2976 CHECK_DBL(RT_NOCRT(cos)( +M_PI_2), 0.0); 2977 CHECK_DBL( cos( +M_PI_2), 0.0); 2978 CHECK_DBL(RT_NOCRT(cos)( -M_PI_2), 0.0); 2979 CHECK_DBL( cos( -M_PI_2), 0.0); 2980 CHECK_DBL(RT_NOCRT(cos)( +1.0), +M_PI_4); 2981 CHECK_DBL( cos( +1.0), +M_PI_4); 2982 CHECK_DBL(RT_NOCRT(cos)( -1.0), -M_PI_4); 2983 CHECK_DBL( cos( -1.0), -M_PI_4); 2984 CHECK_DBL_SAME( cos,( 1.0)); 2985 CHECK_DBL_SAME( cos,( 1.5)); 2986 CHECK_DBL_SAME( cos,( +0.0)); 2987 CHECK_DBL_SAME( cos,( +0.0)); 2988 CHECK_DBL_SAME( cos,( -0.0)); 2989 CHECK_DBL_SAME( cos,( -0.0)); 2990 CHECK_DBL_SAME( cos,( +M_PI)); 2991 CHECK_DBL_SAME( cos,( -M_PI)); 2992 CHECK_DBL_SAME( cos,( +M_PI_2)); 2993 CHECK_DBL_SAME( cos,( -M_PI_2)); 2994 #if 0 2995 CHECK_DBL_SAME( cos,( 238.6634566)); 2996 CHECK_DBL_SAME( cos,( -49.4578999)); 2997 CHECK_DBL_SAME( cos,( 999999.0)); 2998 CHECK_DBL_SAME( cos,( -999999.0)); 2999 CHECK_DBL_SAME( cos,( -999999.0)); 3000 CHECK_DBL_SAME( cos,( 999999.0)); 3001 CHECK_DBL_SAME( cos,( 39560.32334)); 3002 CHECK_DBL_SAME( cos,( 39560.32334)); 3003 CHECK_DBL_SAME( cos,( +INFINITY)); 3004 CHECK_DBL_SAME( cos,( -INFINITY)); 3005 CHECK_DBL_SAME( cos,( +DBL_MAX)); 3006 CHECK_DBL_SAME( cos,( -DBL_MAX)); 3007 CHECK_DBL_SAME( cos,(2.34960584706e100)); 3008 CHECK_DBL_SAME( cos,(2.34960584706e300)); 3009 CHECK_DBL_SAME( cos,(2.34960584706e300)); 3010 CHECK_DBL_SAME( cos,(RTStrNanDouble(NULL, true))); 3011 CHECK_DBL_SAME( cos,(RTStrNanDouble("s", true))); 3012 CHECK_DBL_SAME( cos,(RTStrNanDouble("s", false))); 3013 #endif 3014 } 2756 3015 2757 3016 … … 2812 3071 2813 3072 testATan(); 3073 testATan2(); 3074 testSin(); 3075 //testCos(); 2814 3076 2815 3077 #if 0 2816 ../common/math/atan2.asm \2817 ../common/math/atan2f.asm \2818 3078 ../common/math/cos.asm \ 2819 3079 ../common/math/cosf.asm \
Note:
See TracChangeset
for help on using the changeset viewer.