VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/math/sinf.asm@ 96282

Last change on this file since 96282 was 96242, checked in by vboxsync, 2 years ago

IPRT/nocrt: Adapted the reworked sin and cos code for sinf and cosf; fixed a few cos bugs and added tests for cos. bugref:10261

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 5.4 KB
Line 
1; $Id: sinf.asm 96242 2022-08-17 01:59:06Z vboxsync $
2;; @file
3; IPRT - No-CRT sinf - AMD64 & X86.
4;
5
6;
7; Copyright (C) 2006-2022 Oracle Corporation
8;
9; This file is part of VirtualBox Open Source Edition (OSE), as
10; available from http://www.virtualbox.org. This file is free software;
11; you can redistribute it and/or modify it under the terms of the GNU
12; General Public License (GPL) as published by the Free Software
13; Foundation, in version 2 as it comes in the "COPYING" file of the
14; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16;
17; The contents of this file may alternatively be used under the terms
18; of the Common Development and Distribution License Version 1.0
19; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20; VirtualBox OSE distribution, in which case the provisions of the
21; CDDL are applicable instead of those of the GPL.
22;
23; You may elect to license modified versions of this file under the
24; terms and conditions of either the GPL or the CDDL or both.
25;
26
27
28%define RT_ASM_WITH_SEH64
29%include "iprt/asmdefs.mac"
30%include "iprt/x86.mac"
31
32
33BEGINCODE
34
35
36;;
37; Compute the sine of rd, measured in radians.
38;
39; @returns st(0) / xmm0
40; @param rd [rbp + xCB*2] / xmm0
41;
42RT_NOCRT_BEGINPROC sinf
43 push xBP
44 SEH64_PUSH_xBP
45 mov xBP, xSP
46 SEH64_SET_FRAME_xBP 0
47 sub xSP, 20h
48 SEH64_ALLOCATE_STACK 20h
49 SEH64_END_PROLOGUE
50
51%ifdef RT_OS_WINDOWS
52 ;
53 ; Make sure we use full precision and not the windows default of 53 bits.
54 ;
55 fnstcw [xBP - 20h]
56 mov ax, [xBP - 20h]
57 or ax, X86_FCW_PC_64 ; includes both bits, so no need to clear the mask.
58 mov [xBP - 1ch], ax
59 fldcw [xBP - 1ch]
60%endif
61
62 ;
63 ; Load the input into st0.
64 ;
65%ifdef RT_ARCH_AMD64
66 movss [xBP - 10h], xmm0
67 fld dword [xBP - 10h]
68%else
69 fld dword [xBP + xCB*2]
70%endif
71
72 ;
73 ; We examin the input and weed out non-finit numbers first.
74 ;
75 fxam
76 fnstsw ax
77 and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0
78 cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero)
79 je .finite
80 cmp ax, X86_FSW_C3 ; Zero
81 je .zero
82 cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals - treat them as zero.
83 je .zero
84 cmp ax, X86_FSW_C0 ; NaN - must handle it special,
85 je .nan
86
87 ; Pass infinities and unsupported inputs to fsin, assuming it does the right thing.
88.do_sin:
89 fsin
90 jmp .return_val
91
92 ;
93 ; Finite number.
94 ;
95.finite:
96 ; For very tiny numbers, 0 < abs(input) < 2**-25, we can return the
97 ; input value directly.
98 fld st0 ; duplicate st0
99 fabs ; make it an absolute (positive) value.
100 fld qword [.s_r64Tiny xWrtRIP]
101 fcomip st1 ; compare s_r64Tiny and fabs(input)
102 ja .return_tiny_number_as_is ; jump if fabs(input) is smaller
103
104 ; FSIN is documented to be reasonable for the range ]-3pi/4,3pi/4[, so
105 ; while we have fabs(input) loaded already, check for that here and
106 ; allow rtNoCrtMathSinCore to assume it won't see values very close to
107 ; zero, except by cos -> sin conversion where they won't be relevant to
108 ; any assumpttions about precision approximation.
109 fld qword [.s_r64FSinOkay xWrtRIP]
110 fcomip st1
111 ffreep st0 ; drop the fabs(input) value
112 ja .do_sin
113
114 ;
115 ; Call common sine/cos worker.
116 ;
117 mov ecx, 0 ; float
118 extern NAME(rtNoCrtMathSinCore)
119 call NAME(rtNoCrtMathSinCore)
120
121 ;
122 ; Run st0.
123 ;
124.return_val:
125%ifdef RT_ARCH_AMD64
126 fstp dword [xBP - 10h]
127 movss xmm0, [xBP - 10h]
128%endif
129%ifdef RT_OS_WINDOWS
130 fldcw [xBP - 20h] ; restore original
131%endif
132.return:
133 leave
134 ret
135
136 ;
137 ; As explained already, we can return tiny numbers directly too as the
138 ; output from sinf(input) = input given our precision.
139 ; We can skip the st0 -> xmm0 translation here, so follow the same path
140 ; as .zero & .nan, after we've removed the fabs(input) value.
141 ;
142.return_tiny_number_as_is:
143 ffreep st0
144
145 ;
146 ; sinf(+/-0.0) = +/-0.0 (preserve the sign)
147 ; We can skip the st0 -> xmm0 translation here, so follow the .nan code path.
148 ;
149.zero:
150
151 ;
152 ; Input is NaN, output it unmodified as far as we can (FLD changes SNaN
153 ; to QNaN when masked).
154 ;
155.nan:
156%ifdef RT_ARCH_AMD64
157 ffreep st0
158%endif
159 jmp .return
160
161ALIGNCODE(8)
162 ; Ca. 2**-26, absolute value. Inputs closer to zero than this can be
163 ; returns directly as the sinf(input) value should be basically the same
164 ; given the precision we're working with and FSIN probably won't even
165 ; manage that.
166 ;; @todo experiment when FSIN gets better than this.
167.s_r64Tiny:
168 dq 1.49011612e-8
169 ; The absolute limit of FSIN "good" range.
170.s_r64FSinOkay:
171 dq 2.356194490192344928845 ; 3pi/4
172 ;dq 1.57079632679489661923 ; pi/2 - alternative.
173
174ENDPROC RT_NOCRT(sinf)
175
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette