Changeset 96273 in vbox
- Timestamp:
- Aug 17, 2022 9:01:33 PM (2 years ago)
- Location:
- trunk/src/VBox/Runtime
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/Runtime/common/math/tan.asm
r96060 r96273 25 25 ; 26 26 27 28 %define RT_ASM_WITH_SEH64 27 29 %include "iprt/asmdefs.mac" 30 %include "iprt/x86.mac" 31 28 32 29 33 BEGINCODE … … 34 38 ; @param rd [xSP + xCB*2] / xmm0 35 39 RT_NOCRT_BEGINPROC tan 36 push xBP 37 mov xBP, xSP 40 push xBP 41 SEH64_PUSH_xBP 42 mov xBP, xSP 43 SEH64_SET_FRAME_xBP 0 44 sub xSP, 20h 45 SEH64_ALLOCATE_STACK 20h 46 SEH64_END_PROLOGUE 38 47 48 %ifdef RT_OS_WINDOWS 49 ; 50 ; Make sure we use full precision and not the windows default of 53 bits. 51 ; 52 fnstcw [xBP - 20h] 53 mov ax, [xBP - 20h] 54 or ax, X86_FCW_PC_64 ; includes both bits, so no need to clear the mask. 55 mov [xBP - 1ch], ax 56 fldcw [xBP - 1ch] 57 %endif 58 59 ; 60 ; Load the input into st0. 61 ; 39 62 %ifdef RT_ARCH_AMD64 40 sub xSP, 10h 63 movsd [xBP - 10h], xmm0 64 fld qword [xBP - 10h] 65 %else 66 fld qword [xBP + xCB*2] 67 %endif 41 68 42 movsd [xSP], xmm0 43 fld qword [xSP] 44 %else 45 fld qword [xBP + xCB*2] 69 ; 70 ; Calculate the tangent. 71 ; 72 fptan 73 fnstsw ax 74 test ah, (X86_FSW_C2 >> 8) ; C2 is set if the input was out of range. 75 jz .return_val 76 77 ; 78 ; Input was out of range, perform reduction to +/-2pi. 79 ; 80 fldpi 81 fadd st0 82 fxch st1 83 .again: 84 fprem1 85 fnstsw ax 86 test ah, (X86_FSW_C2 >> 8) ; C2 is set if partial result. 87 jnz .again ; Loop till C2 == 0 and we have a final result. 88 89 fstp st1 90 91 fptan 92 93 ; 94 ; Run st0. 95 ; 96 .return_val: 97 ffreep st0 ; ignore the 1.0 fptan pushed 98 %ifdef RT_ARCH_AMD64 99 fstp qword [xBP - 10h] 100 movsd xmm0, [xBP - 10h] 46 101 %endif 47 fptan 48 fnstsw ax 49 test ah, 04h ; check for C2 50 jz .done 51 52 fldpi 53 fadd st0 54 fxch st1 55 .again: 56 fprem1 57 fnstsw ax 58 test ah, 04h 59 jnz .again 60 fstp st1 61 fptan 62 63 .done: 64 fstp st0 65 %ifdef RT_ARCH_AMD64 66 fstp qword [xSP] 67 movsd xmm0, [xSP] 102 %ifdef RT_OS_WINDOWS 103 fldcw [xBP - 20h] ; restore original 68 104 %endif 69 leave 70 ret 105 .return: 106 leave 107 ret 71 108 ENDPROC RT_NOCRT(tan) 72 109 -
trunk/src/VBox/Runtime/common/math/tanf.asm
r96060 r96273 25 25 ; 26 26 27 28 %define RT_ASM_WITH_SEH64 27 29 %include "iprt/asmdefs.mac" 30 %include "iprt/x86.mac" 31 28 32 29 33 BEGINCODE 30 34 31 35 ;; 32 ; Compute the sine of r 3236 ; Compute the sine of rf 33 37 ; @returns st(0) / xmm0 34 ; @param r 32[xSP + xCB*2] / xmm038 ; @param rf [xSP + xCB*2] / xmm0 35 39 RT_NOCRT_BEGINPROC tanf 36 push xBP 37 mov xBP, xSP 40 push xBP 41 SEH64_PUSH_xBP 42 mov xBP, xSP 43 SEH64_SET_FRAME_xBP 0 44 sub xSP, 20h 45 SEH64_ALLOCATE_STACK 20h 46 SEH64_END_PROLOGUE 38 47 48 %ifdef RT_OS_WINDOWS 49 ; 50 ; Make sure we use full precision and not the windows default of 53 bits. 51 ; 52 fnstcw [xBP - 20h] 53 mov ax, [xBP - 20h] 54 or ax, X86_FCW_PC_64 ; includes both bits, so no need to clear the mask. 55 mov [xBP - 1ch], ax 56 fldcw [xBP - 1ch] 57 %endif 58 59 ; 60 ; Load the input into st0. 61 ; 39 62 %ifdef RT_ARCH_AMD64 40 sub xSP, 10h 63 movss [xBP - 10h], xmm0 64 fld dword [xBP - 10h] 65 %else 66 fld dword [xBP + xCB*2] 67 %endif 41 68 42 movss [xSP], xmm0 43 fld dword [xSP] 44 %else 45 fld dword [xBP + xCB*2] 69 ; 70 ; Calculate the tangent. 71 ; 72 fptan 73 fnstsw ax 74 test ah, (X86_FSW_C2 >> 8) ; C2 is set if the input was out of range. 75 jz .return_val 76 77 ; 78 ; Input was out of range, perform reduction to +/-2pi. 79 ; 80 fldpi 81 fadd st0 82 fxch st1 83 .again: 84 fprem1 85 fnstsw ax 86 test ah, (X86_FSW_C2 >> 8) ; C2 is set if partial result. 87 jnz .again ; Loop till C2 == 0 and we have a final result. 88 89 fstp st1 90 91 fptan 92 93 ; 94 ; Run st0. 95 ; 96 .return_val: 97 ffreep st0 ; ignore the 1.0 fptan pushed 98 %ifdef RT_ARCH_AMD64 99 fstp dword [xBP - 10h] 100 movss xmm0, [xBP - 10h] 46 101 %endif 47 fptan 48 fnstsw ax 49 test ah, 04h ; check for C2 50 jz .done 51 52 fldpi 53 fadd st0 54 fxch st1 55 .again: 56 fprem1 57 fnstsw ax 58 test ah, 04h 59 jnz .again 60 fstp st1 61 fptan 62 63 .done: 64 fstp st0 65 %ifdef RT_ARCH_AMD64 66 fstp dword [xSP] 67 movss xmm0, [xSP] 102 %ifdef RT_OS_WINDOWS 103 fldcw [xBP - 20h] ; restore original 68 104 %endif 69 leave 70 ret 105 .return: 106 leave 107 ret 71 108 ENDPROC RT_NOCRT(tanf) 72 109 -
trunk/src/VBox/Runtime/testcase/tstRTNoCrt-2.cpp
r96242 r96273 398 398 RTTestFailed(g_hTest, "line %u: %s%s: noCRT => %s; CRT => %s", \ 399 399 __LINE__, #a_Fn, #a_Args, g_szFloat[0], g_szFloat[1]); \ 400 } \ 401 } while (0) 402 403 #define CHECK_FLT_RANGE(a_Expr, a_rfExpect, a_rfPlusMin) do { \ 404 RTFLOAT32U uRet; \ 405 uRet.r = a_Expr; \ 406 RTFLOAT32U uExpectMin; \ 407 uExpectMin.r = (a_rfExpect) - (a_rfPlusMin); \ 408 RTFLOAT32U uExpectMax; \ 409 uExpectMax.r = (a_rfExpect) + (a_rfPlusMin); \ 410 if ( !(RTFLOAT32U_IS_NORMAL(&uRet) || RTFLOAT32U_IS_ZERO(&uRet))\ 411 || uRet.r < uExpectMin.r \ 412 || uRet.r > uExpectMax.r ) \ 413 { \ 414 RTStrFormatR32(g_szFloat[0], sizeof(g_szFloat[0]), &uRet, 0, 0, RTSTR_F_SPECIAL); \ 415 RTStrFormatR32(g_szFloat[1], sizeof(g_szFloat[1]), &uExpectMin, 0, 0, RTSTR_F_SPECIAL); \ 416 RTStrFormatR32(g_szFloat[2], sizeof(g_szFloat[2]), &uExpectMax, 0, 0, RTSTR_F_SPECIAL); \ 417 RTTestFailed(g_hTest, "line %u: %s -> %s, expected [%s,%s] (%s +/- %s)", \ 418 __LINE__, #a_Expr, g_szFloat[0], g_szFloat[1], #a_rfExpect, #a_rfPlusMin); \ 400 419 } \ 401 420 } while (0) … … 3180 3199 3181 3200 3201 void testTan() 3202 { 3203 RTTestSub(g_hTest, "tan[f]"); 3204 3205 /* See comment in testSin regarding testing and accuracy. Note that tan 3206 and tanf have receive no extra attention yet and are solely based on 3207 the FPU capabilities. */ 3208 //lvbe /mnt/e/misc/float/tan -d +1.0 +2.0 +3.0 +4.0 +5.0 +6.0 +7.0 +8.0 +9.0 +10.0 +100.0 +654.216812456 +10.10101010101010 +25.25252525252525 +252.25252525252525 +2525.25252525252525 +25252.25252525252525 +252525.25252525252525 +3.14 +1.57 +2.355 +1.1775 3209 CHECK_DBL( RT_NOCRT(tan)( +0.0), +0.0); 3210 CHECK_DBL( RT_NOCRT(tan)( -0.0), -0.0); 3211 CHECK_DBL( tan( -0.0), -0.0); 3212 CHECK_DBL_RANGE(RT_NOCRT(tan)( +M_PI), +0.0, 0.0000000000000100000); 3213 CHECK_DBL_RANGE(RT_NOCRT(tan)( -M_PI), +0.0, 0.0000000000000100000); 3214 CHECK_DBL( RT_NOCRT(tan)( +1.0), +1.55740772465490229237); 3215 CHECK_DBL( RT_NOCRT(tan)( +2.0), -2.18503986326151888875); 3216 CHECK_DBL( RT_NOCRT(tan)( +3.0), -0.14254654307427780391); 3217 CHECK_DBL( RT_NOCRT(tan)( +4.0), +1.15782128234957748525); 3218 CHECK_DBL( RT_NOCRT(tan)( +5.0), -3.38051500624658585181); 3219 CHECK_DBL( RT_NOCRT(tan)( +6.0), -0.29100619138474914660); 3220 CHECK_DBL( RT_NOCRT(tan)( +7.0), +0.87144798272431878150); 3221 CHECK_DBL( RT_NOCRT(tan)( +8.0), -6.79971145522037900832); 3222 CHECK_DBL( RT_NOCRT(tan)( +9.0), -0.45231565944180984751); 3223 CHECK_DBL( RT_NOCRT(tan)( +10.0), +0.64836082745908663050); 3224 CHECK_DBL( RT_NOCRT(tan)( +100.0), -0.58721391515692911156); 3225 CHECK_DBL( RT_NOCRT(tan)( +654.216812456), +0.96105296910208881656); 3226 CHECK_DBL( RT_NOCRT(tan)( +10.10101010101010), +0.80244848750680519700); 3227 CHECK_DBL( RT_NOCRT(tan)( +25.25252525252525), +0.12036022656173953060); 3228 CHECK_DBL( RT_NOCRT(tan)( +252.25252525252525), +1.32728909752762014307); 3229 CHECK_DBL( RT_NOCRT(tan)( +2525.25252525252525), -0.66661702242341180913); 3230 CHECK_DBL_RANGE(RT_NOCRT(tan)( +25252.25252525252525), +0.13152635436679746550, 0.0000000000000010000); 3231 CHECK_DBL_RANGE(RT_NOCRT(tan)( +252525.25252525252525), +1.24331239382105529501, 0.0000000000000100000); 3232 CHECK_DBL( RT_NOCRT(tan)( +3.14), -0.00159265493640722302); 3233 CHECK_DBL( RT_NOCRT(tan)( +1.57), +1255.76559150078969651076); 3234 CHECK_DBL( RT_NOCRT(tan)( +2.355), -1.00239183854994351464); 3235 CHECK_DBL( RT_NOCRT(tan)( +1.1775), +2.41014118913622787943); 3236 3237 CHECK_DBL( RT_NOCRT(tan)( -1.0), -1.55740772465490229237); 3238 CHECK_DBL( RT_NOCRT(tan)( -2.0), +2.18503986326151888875); 3239 CHECK_DBL( RT_NOCRT(tan)( -3.0), +0.14254654307427780391); 3240 CHECK_DBL( RT_NOCRT(tan)( -4.0), -1.15782128234957748525); 3241 CHECK_DBL( RT_NOCRT(tan)( -5.0), +3.38051500624658585181); 3242 CHECK_DBL( RT_NOCRT(tan)( -6.0), +0.29100619138474914660); 3243 CHECK_DBL( RT_NOCRT(tan)( -7.0), -0.87144798272431878150); 3244 CHECK_DBL( RT_NOCRT(tan)( -8.0), +6.79971145522037900832); 3245 CHECK_DBL( RT_NOCRT(tan)( -9.0), +0.45231565944180984751); 3246 CHECK_DBL( RT_NOCRT(tan)( -10.0), -0.64836082745908663050); 3247 CHECK_DBL( RT_NOCRT(tan)( -100.0), +0.58721391515692911156); 3248 CHECK_DBL( RT_NOCRT(tan)( -654.216812456), -0.96105296910208881656); 3249 CHECK_DBL( RT_NOCRT(tan)( -10.10101010101010), -0.80244848750680519700); 3250 CHECK_DBL( RT_NOCRT(tan)( -25.25252525252525), -0.12036022656173953060); 3251 CHECK_DBL( RT_NOCRT(tan)( -252.25252525252525), -1.32728909752762014307); 3252 CHECK_DBL( RT_NOCRT(tan)( -2525.25252525252525), +0.66661702242341180913); 3253 CHECK_DBL_RANGE(RT_NOCRT(tan)( -25252.25252525252525), -0.13152635436679746550, 0.0000000000000010000); 3254 CHECK_DBL_RANGE(RT_NOCRT(tan)( -252525.25252525252525), -1.24331239382105529501, 0.0000000000000100000); 3255 CHECK_DBL( RT_NOCRT(tan)( -3.14), +0.00159265493640722302); 3256 CHECK_DBL( RT_NOCRT(tan)( RTStrNanDouble(NULL, true)), RTStrNanDouble(NULL, true)); 3257 CHECK_DBL( RT_NOCRT(tan)( RTStrNanDouble("4940", false)), RTStrNanDouble("4940", false)); 3258 //CHECK_DBL( RT_NOCRT(tan)( RTStrNanDouble("494s", false)), RTStrNanDouble("494s", false)); //- not preserved 3259 CHECK_DBL_SAME(tan,( +0.0)); 3260 CHECK_DBL_SAME(tan,( -0.0)); 3261 CHECK_DBL_SAME(tan,( +1.0)); 3262 CHECK_DBL_SAME(tan,( -1.0)); 3263 #if 0 /* the FPU reduction isn't accurate enough, don't want to spend time on this now. */ 3264 CHECK_DBL_SAME(tan,( +M_PI)); 3265 CHECK_DBL_SAME(tan,( -M_PI)); 3266 #endif 3267 CHECK_DBL_SAME(tan,( -6.0)); 3268 CHECK_DBL_SAME(tan,( -6.333)); 3269 CHECK_DBL_SAME(tan,( +6.666)); 3270 CHECK_DBL_SAME(tan,( 246.36775)); 3271 CHECK_DBL_SAME(tan,( +INFINITY)); 3272 CHECK_DBL_SAME(tan,( -INFINITY)); 3273 CHECK_DBL_SAME(tan,(RTStrNanDouble(NULL, true))); 3274 CHECK_DBL_SAME(tan,(RTStrNanDouble("s", true))); 3275 3276 3277 //lvbe /mnt/e/misc/float/tan -f +1.0 +2.0 +3.0 +4.0 +5.0 +6.0 +7.0 +8.0 +9.0 +10.0 +100.0 +654.216812456 +10.10101010101010 +25.25252525252525 +252.25252525252525 +2525.25252525252525 +25252.25252525252525 +252525.25252525252525 +3.14 +1.57 +2.355 +1.1775 3278 //lvbe /mnt/e/misc/float/tan -f -1.0 -2.0 -3.0 -4.0 -5.0 -6.0 -7.0 -8.0 -9.0 -10.0 -100.0 -654.216812456 -10.10101010101010 -25.25252525252525 -252.25252525252525 -2525.25252525252525 -25252.25252525252525 -252525.25252525252525 -3.14 -1.57 -2.355 -1.1775 3279 CHECK_FLT( RT_NOCRT(tanf)( +0.0f), +0.0f); 3280 CHECK_FLT( RT_NOCRT(tanf)( -0.0f), -0.0f); 3281 CHECK_FLT_RANGE(RT_NOCRT(tanf)( +(float)M_PI), +0.0f, 0.000000100000000f); 3282 CHECK_FLT_RANGE(RT_NOCRT(tanf)( -(float)M_PI), +0.0f, 0.000000100000000f); 3283 CHECK_FLT( RT_NOCRT(tanf)( +1.0f), +1.557407736778259f); 3284 CHECK_FLT( RT_NOCRT(tanf)( +2.0f), -2.185039758682251f); 3285 CHECK_FLT( RT_NOCRT(tanf)( +3.0f), -0.142546549439430f); 3286 CHECK_FLT( RT_NOCRT(tanf)( +4.0f), +1.157821297645569f); 3287 CHECK_FLT( RT_NOCRT(tanf)( +5.0f), -3.380515098571777f); 3288 CHECK_FLT( RT_NOCRT(tanf)( +6.0f), -0.291006177663803f); 3289 CHECK_FLT( RT_NOCRT(tanf)( +7.0f), +0.871447980403900f); 3290 CHECK_FLT( RT_NOCRT(tanf)( +8.0f), -6.799711227416992f); 3291 CHECK_FLT( RT_NOCRT(tanf)( +9.0f), -0.452315658330917f); 3292 CHECK_FLT( RT_NOCRT(tanf)( +10.0f), +0.648360848426819f); 3293 CHECK_FLT( RT_NOCRT(tanf)( +100.0f), -0.587213933467865f); 3294 CHECK_FLT( RT_NOCRT(tanf)( +654.216812456f), +0.961022973060608f); 3295 CHECK_FLT( RT_NOCRT(tanf)( +10.10101010101010f), +0.802448868751526f); 3296 CHECK_FLT( RT_NOCRT(tanf)( +25.25252525252525f), +0.120360307395458f); 3297 CHECK_FLT( RT_NOCRT(tanf)( +252.25252525252525f), +1.327268242835999f); 3298 CHECK_FLT( RT_NOCRT(tanf)( +2525.25252525252525f), -0.666738152503967f); 3299 CHECK_FLT( RT_NOCRT(tanf)( +25252.25252525252525f), +0.130944371223450f); 3300 CHECK_FLT( RT_NOCRT(tanf)( +252525.25252525252525f), +1.236903667449951f); 3301 CHECK_FLT( RT_NOCRT(tanf)( +3.14f), -0.001592550077476f); 3302 CHECK_FLT( RT_NOCRT(tanf)( +1.57f), +1255.848266601562500f); 3303 CHECK_FLT( RT_NOCRT(tanf)( +2.355f), -1.002391815185547f); 3304 CHECK_FLT( RT_NOCRT(tanf)( +1.1775f), +2.410141229629517f); 3305 CHECK_FLT( RT_NOCRT(tanf)( -1.0f), -1.557407736778259f); 3306 CHECK_FLT( RT_NOCRT(tanf)( -2.0f), +2.185039758682251f); 3307 CHECK_FLT( RT_NOCRT(tanf)( -3.0f), +0.142546549439430f); 3308 CHECK_FLT( RT_NOCRT(tanf)( -4.0f), -1.157821297645569f); 3309 CHECK_FLT( RT_NOCRT(tanf)( -5.0f), +3.380515098571777f); 3310 CHECK_FLT( RT_NOCRT(tanf)( -6.0f), +0.291006177663803f); 3311 CHECK_FLT( RT_NOCRT(tanf)( -7.0f), -0.871447980403900f); 3312 CHECK_FLT( RT_NOCRT(tanf)( -8.0f), +6.799711227416992f); 3313 CHECK_FLT( RT_NOCRT(tanf)( -9.0f), +0.452315658330917f); 3314 CHECK_FLT( RT_NOCRT(tanf)( -10.0f), -0.648360848426819f); 3315 CHECK_FLT( RT_NOCRT(tanf)( -100.0f), +0.587213933467865f); 3316 CHECK_FLT( RT_NOCRT(tanf)( -654.216812456f), -0.961022973060608f); 3317 CHECK_FLT( RT_NOCRT(tanf)( -10.10101010101010f), -0.802448868751526f); 3318 CHECK_FLT( RT_NOCRT(tanf)( -25.25252525252525f), -0.120360307395458f); 3319 CHECK_FLT( RT_NOCRT(tanf)( -252.25252525252525f), -1.327268242835999f); 3320 CHECK_FLT( RT_NOCRT(tanf)( -2525.25252525252525f), +0.666738152503967f); 3321 CHECK_FLT( RT_NOCRT(tanf)( -25252.25252525252525f), -0.130944371223450f); 3322 CHECK_FLT( RT_NOCRT(tanf)( -252525.25252525252525f), -1.236903667449951f); 3323 CHECK_FLT( RT_NOCRT(tanf)( -3.14f), +0.001592550077476f); 3324 CHECK_FLT( RT_NOCRT(tanf)( -1.57f), -1255.848266601562500f); 3325 CHECK_FLT( RT_NOCRT(tanf)( -2.355f), +1.002391815185547f); 3326 CHECK_FLT( RT_NOCRT(tanf)( -1.1775f), -2.410141229629517f); 3327 CHECK_FLT( RT_NOCRT(tanf)( RTStrNanFloat(NULL, true)), RTStrNanFloat(NULL, true)); 3328 CHECK_FLT( RT_NOCRT(tanf)( RTStrNanFloat("4940", false)), RTStrNanFloat("4940", false)); 3329 //CHECK_FLT( RT_NOCRT(tanf)( RTStrNanFloat("494s", false)), RTStrNanFloat("494s", false)); - not preserved 3330 3331 CHECK_FLT_SAME(tanf,( +0.0f)); 3332 CHECK_FLT_SAME(tanf,( -0.0f)); 3333 CHECK_FLT_SAME(tanf,( +1.0f)); 3334 CHECK_FLT_SAME(tanf,( -1.0f)); 3335 CHECK_FLT_SAME(tanf,( -6.0f)); 3336 CHECK_FLT_SAME(tanf,( -6.333f)); 3337 CHECK_FLT_SAME(tanf,( +6.666f)); 3338 CHECK_FLT_SAME(tanf,( 246.36775f)); 3339 3340 CHECK_FLT_SAME(tanf,( +(float)INFINITY)); 3341 CHECK_FLT_SAME(tanf,( -(float)INFINITY)); 3342 CHECK_FLT_SAME(tanf,(RTStrNanFloat(NULL, true))); 3343 CHECK_FLT_SAME(tanf,(RTStrNanFloat("s", true))); 3344 } 3345 3182 3346 3183 3347 int main() … … 3239 3403 testSin(); 3240 3404 testCos(); 3241 3242 #if 0 3243 ../common/math/cos.asm \ 3244 ../common/math/cosf.asm \ 3245 ../common/math/cosl.asm \ 3246 ../common/math/sin.asm \ 3247 ../common/math/sinf.asm \ 3248 ../common/math/tan.asm \ 3249 ../common/math/tanf.asm \ 3250 3251 #endif 3405 testTan(); 3252 3406 3253 3407 return RTTestSummaryAndDestroy(g_hTest);
Note:
See TracChangeset
for help on using the changeset viewer.