VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/math/cos.asm@ 96240

Last change on this file since 96240 was 96240, checked in by vboxsync, 2 years ago

IPRT/nocrt: Reworking the sin and cos code to take into account which ranges FCOS and FSIN instructions are expected to deliver reasonable results and handle extreme values better. bugref:10261

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 5.8 KB
Line 
1; $Id: cos.asm 96240 2022-08-17 00:59:31Z vboxsync $
2;; @file
3; IPRT - No-CRT cos - AMD64 & X86.
4;
5
6;
7; Copyright (C) 2006-2022 Oracle Corporation
8;
9; This file is part of VirtualBox Open Source Edition (OSE), as
10; available from http://www.virtualbox.org. This file is free software;
11; you can redistribute it and/or modify it under the terms of the GNU
12; General Public License (GPL) as published by the Free Software
13; Foundation, in version 2 as it comes in the "COPYING" file of the
14; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16;
17; The contents of this file may alternatively be used under the terms
18; of the Common Development and Distribution License Version 1.0
19; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20; VirtualBox OSE distribution, in which case the provisions of the
21; CDDL are applicable instead of those of the GPL.
22;
23; You may elect to license modified versions of this file under the
24; terms and conditions of either the GPL or the CDDL or both.
25;
26
27
28%define RT_ASM_WITH_SEH64
29%include "iprt/asmdefs.mac"
30%include "iprt/x86.mac"
31
32
33BEGINCODE
34
35;;
36; Compute the cosine of rd, measured in radians.
37;
38; @returns st(0) / xmm0
39; @param rd [rbp + xCB*2] / xmm0
40;
41RT_NOCRT_BEGINPROC cos
42 push xBP
43 SEH64_PUSH_xBP
44 mov xBP, xSP
45 SEH64_SET_FRAME_xBP 0
46 sub xSP, 20h
47 SEH64_ALLOCATE_STACK 20h
48 SEH64_END_PROLOGUE
49
50%ifdef RT_OS_WINDOWS
51 ;
52 ; Make sure we use full precision and not the windows default of 53 bits.
53 ;
54;; @todo not sure if this makes any difference...
55 fnstcw [xBP - 20h]
56 mov ax, [xBP - 20h]
57 or ax, X86_FCW_PC_64 ; includes both bits, so no need to clear the mask.
58 mov [xBP - 1ch], ax
59 fldcw [xBP - 1ch]
60%endif
61
62 ;
63 ; Load the input into st0.
64 ;
65%ifdef RT_ARCH_AMD64
66 movsd [xBP - 10h], xmm0
67 fld qword [xBP - 10h]
68%else
69 fld qword [xBP + xCB*2]
70%endif
71
72 ;
73 ; The FCOS instruction has a very narrow range (-3pi/8 to 3pi/8) where it
74 ; works reliably, so outside that we'll use the FSIN instruction instead
75 ; as it has a larger good range (-5pi/4 to 1pi/4 for cosine).
76 ; Input conversion follows: cos(x) = sin(x + pi/2)
77 ;
78 ; We examin the input and weed out non-finit numbers first.
79 ;
80
81 ; We only do the range check on normal finite numbers.
82 fxam
83 fnstsw ax
84 and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0
85 cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero)
86 je .finite
87 cmp ax, X86_FSW_C3 ; Zero
88 je .zero
89 cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals - treat them as zero.
90 je .zero
91 cmp ax, X86_FSW_C0 ; NaN - must handle it special,
92 je .nan
93
94 ; Pass infinities and unsupported inputs to fcos, assuming it does the right thing.
95 ; We also jump here if we get a finite number in the "good" range, see below.
96.do_fcos:
97 fcos
98 jmp .return_val
99
100 ;
101 ; Finite number.
102 ;
103 ; First check if it's a very tiny number where we can simply return 1.
104 ; Next check if it's in the range where FCOS is reasonable, otherwise
105 ; go to FSIN to do the work.
106 ;
107.finite:
108 fld st0
109 fabs
110 fld qword [.s_r64TinyCosTo1 xWrtRIP]
111 fcomip st1
112 jbe .zero_extra_pop
113
114.not_that_tiny_input:
115 fld qword [.s_r64FCosOkay xWrtRIP]
116 fcomip st1
117 ffreep st0 ; pop fabs(input)
118 ja .do_fcos ; jmp if fabs(input) < .s_r64FCosOkay
119
120 ;
121 ; If we have a positive number we subtract 3pi/2, for negative we add pi/2.
122 ; We still have the FXAM result in AX.
123 ;
124.outside_fcos_range:
125 test ax, X86_FSW_C1 ; The sign bit.
126 jnz .adjust_negative_to_sine
127
128 ; Calc -3pi/2 using FPU-internal pi constant.
129 fldpi
130 fadd st0, st0 ; st0=2pi
131 fldpi
132 fdiv qword [.s_r64Two xWrtRIP] ; st1=2pi; st0=pi/2
133 fsubp st1, st0 ; st0=3pi/2
134 fchs ; st0=-3pi/2
135 jmp .make_sine_adjustment
136
137.adjust_negative_to_sine:
138 ; Calc +pi/2.
139 fldpi
140 fdiv qword [.s_r64Two xWrtRIP] ; st1=2pi; st0=pi/2
141
142.make_sine_adjustment:
143 faddp st1, st0
144
145 ;
146 ; Call internal sine worker to calculate st0=sin(st0)
147 ;
148.do_sine:
149 mov ecx, 1 ; double
150 extern NAME(rtNoCrtMathSinCore)
151 call NAME(rtNoCrtMathSinCore)
152
153 ;
154 ; Return st0.
155 ;
156.return_val:
157%ifdef RT_ARCH_AMD64
158 fstp qword [xBP - 10h]
159 movsd xmm0, [xBP - 10h]
160%endif
161%ifdef RT_OS_WINDOWS
162 fldcw [xBP - 20h] ; restore original
163%endif
164.return:
165 leave
166 ret
167
168 ;
169 ; cos(+/-0) = +1.0
170 ;
171.zero_extra_pop:
172 ffreep st0
173.zero:
174 ffreep st0
175 fld1
176 jmp .return_val
177
178 ;
179 ; Input is NaN, output it unmodified as far as we can (FLD changes SNaN
180 ; to QNaN when masked).
181 ;
182.nan:
183%ifdef RT_ARCH_AMD64
184 ffreep st0
185%endif
186 jmp .return
187
188 ;
189 ; Local constants.
190 ;
191ALIGNCODE(8)
192 ; About 2**-27. When fabs(input) is below this limit we can consider cos(input) ~= 1.0.
193.s_r64TinyCosTo1:
194 dq 7.4505806e-9
195
196 ; The absolute limit for the range which FCOS is expected to produce reasonable results.
197.s_r64FCosOkay:
198 dq 1.1780972450961724644225 ; 3*pi/8
199
200.s_r64Two:
201 dq 2.0
202ENDPROC RT_NOCRT(cos)
203
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette