sinf.asm@ 96282

Last change on this file since 96282 was 96242, checked in by vboxsync, 2 years ago
IPRT/nocrt: Adapted the reworked sin and cos code for sinf and cosf; fixed a few cos bugs and added tests for cos. bugref:10261
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 5.4 KB

Line
1	; $Id: sinf.asm 96242 2022-08-17 01:59:06Z vboxsync $
2	;; @file
3	; IPRT - No-CRT sinf - AMD64 & X86.
4	;
5
6	;
7	; Copyright (C) 2006-2022 Oracle Corporation
8	;
9	; This file is part of VirtualBox Open Source Edition (OSE), as
10	; available from http://www.virtualbox.org. This file is free software;
11	; you can redistribute it and/or modify it under the terms of the GNU
12	; General Public License (GPL) as published by the Free Software
13	; Foundation, in version 2 as it comes in the "COPYING" file of the
14	; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	;
17	; The contents of this file may alternatively be used under the terms
18	; of the Common Development and Distribution License Version 1.0
19	; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20	; VirtualBox OSE distribution, in which case the provisions of the
21	; CDDL are applicable instead of those of the GPL.
22	;
23	; You may elect to license modified versions of this file under the
24	; terms and conditions of either the GPL or the CDDL or both.
25	;
26
27
28	%define RT_ASM_WITH_SEH64
29	%include "iprt/asmdefs.mac"
30	%include "iprt/x86.mac"
31
32
33	BEGINCODE
34
35
36	;;
37	; Compute the sine of rd, measured in radians.
38	;
39	; @returns st(0) / xmm0
40	; @param rd [rbp + xCB*2] / xmm0
41	;
42	RT_NOCRT_BEGINPROC sinf
43	push xBP
44	SEH64_PUSH_xBP
45	mov xBP, xSP
46	SEH64_SET_FRAME_xBP 0
47	sub xSP, 20h
48	SEH64_ALLOCATE_STACK 20h
49	SEH64_END_PROLOGUE
50
51	%ifdef RT_OS_WINDOWS
52	;
53	; Make sure we use full precision and not the windows default of 53 bits.
54	;
55	fnstcw [xBP - 20h]
56	mov ax, [xBP - 20h]
57	or ax, X86_FCW_PC_64 ; includes both bits, so no need to clear the mask.
58	mov [xBP - 1ch], ax
59	fldcw [xBP - 1ch]
60	%endif
61
62	;
63	; Load the input into st0.
64	;
65	%ifdef RT_ARCH_AMD64
66	movss [xBP - 10h], xmm0
67	fld dword [xBP - 10h]
68	%else
69	fld dword [xBP + xCB*2]
70	%endif
71
72	;
73	; We examin the input and weed out non-finit numbers first.
74	;
75	fxam
76	fnstsw ax
77	and ax, X86_FSW_C3 \| X86_FSW_C2 \| X86_FSW_C0
78	cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero)
79	je .finite
80	cmp ax, X86_FSW_C3 ; Zero
81	je .zero
82	cmp ax, X86_FSW_C3 \| X86_FSW_C2 ; Denormals - treat them as zero.
83	je .zero
84	cmp ax, X86_FSW_C0 ; NaN - must handle it special,
85	je .nan
86
87	; Pass infinities and unsupported inputs to fsin, assuming it does the right thing.
88	.do_sin:
89	fsin
90	jmp .return_val
91
92	;
93	; Finite number.
94	;
95	.finite:
96	; For very tiny numbers, 0 < abs(input) < 2**-25, we can return the
97	; input value directly.
98	fld st0 ; duplicate st0
99	fabs ; make it an absolute (positive) value.
100	fld qword [.s_r64Tiny xWrtRIP]
101	fcomip st1 ; compare s_r64Tiny and fabs(input)
102	ja .return_tiny_number_as_is ; jump if fabs(input) is smaller
103
104	; FSIN is documented to be reasonable for the range ]-3pi/4,3pi/4[, so
105	; while we have fabs(input) loaded already, check for that here and
106	; allow rtNoCrtMathSinCore to assume it won't see values very close to
107	; zero, except by cos -> sin conversion where they won't be relevant to
108	; any assumpttions about precision approximation.
109	fld qword [.s_r64FSinOkay xWrtRIP]
110	fcomip st1
111	ffreep st0 ; drop the fabs(input) value
112	ja .do_sin
113
114	;
115	; Call common sine/cos worker.
116	;
117	mov ecx, 0 ; float
118	extern NAME(rtNoCrtMathSinCore)
119	call NAME(rtNoCrtMathSinCore)
120
121	;
122	; Run st0.
123	;
124	.return_val:
125	%ifdef RT_ARCH_AMD64
126	fstp dword [xBP - 10h]
127	movss xmm0, [xBP - 10h]
128	%endif
129	%ifdef RT_OS_WINDOWS
130	fldcw [xBP - 20h] ; restore original
131	%endif
132	.return:
133	leave
134	ret
135
136	;
137	; As explained already, we can return tiny numbers directly too as the
138	; output from sinf(input) = input given our precision.
139	; We can skip the st0 -> xmm0 translation here, so follow the same path
140	; as .zero & .nan, after we've removed the fabs(input) value.
141	;
142	.return_tiny_number_as_is:
143	ffreep st0
144
145	;
146	; sinf(+/-0.0) = +/-0.0 (preserve the sign)
147	; We can skip the st0 -> xmm0 translation here, so follow the .nan code path.
148	;
149	.zero:
150
151	;
152	; Input is NaN, output it unmodified as far as we can (FLD changes SNaN
153	; to QNaN when masked).
154	;
155	.nan:
156	%ifdef RT_ARCH_AMD64
157	ffreep st0
158	%endif
159	jmp .return
160
161	ALIGNCODE(8)
162	; Ca. 2**-26, absolute value. Inputs closer to zero than this can be
163	; returns directly as the sinf(input) value should be basically the same
164	; given the precision we're working with and FSIN probably won't even
165	; manage that.
166	;; @todo experiment when FSIN gets better than this.
167	.s_r64Tiny:
168	dq 1.49011612e-8
169	; The absolute limit of FSIN "good" range.
170	.s_r64FSinOkay:
171	dq 2.356194490192344928845 ; 3pi/4
172	;dq 1.57079632679489661923 ; pi/2 - alternative.
173
174	ENDPROC RT_NOCRT(sinf)
175

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Runtime/common/math/sinf.asm@ 96282

Download in other formats: