VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/math/log2f.asm@ 96282

Last change on this file since 96282 was 96279, checked in by vboxsync, 2 years ago

IPRT/nocrt: Implemented log2f and added testing of log2 and log2f. bugref:10261

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 5.5 KB
Line 
1; $Id: log2f.asm 96279 2022-08-18 00:07:28Z vboxsync $
2;; @file
3; IPRT - No-CRT log2f - AMD64 & X86.
4;
5
6;
7; Copyright (C) 2006-2022 Oracle Corporation
8;
9; This file is part of VirtualBox Open Source Edition (OSE), as
10; available from http://www.virtualbox.org. This file is free software;
11; you can redistribute it and/or modify it under the terms of the GNU
12; General Public License (GPL) as published by the Free Software
13; Foundation, in version 2 as it comes in the "COPYING" file of the
14; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16;
17; The contents of this file may alternatively be used under the terms
18; of the Common Development and Distribution License Version 1.0
19; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20; VirtualBox OSE distribution, in which case the provisions of the
21; CDDL are applicable instead of those of the GPL.
22;
23; You may elect to license modified versions of this file under the
24; terms and conditions of either the GPL or the CDDL or both.
25;
26
27
28%define RT_ASM_WITH_SEH64
29%include "iprt/asmdefs.mac"
30%include "iprt/x86.mac"
31
32
33BEGINCODE
34
35extern NAME(RT_NOCRT(feraiseexcept))
36
37;;
38; Compute the log2f of rf
39; @returns st(0) / xmm0
40; @param rf [xSP + xCB*2] / xmm0
41RT_NOCRT_BEGINPROC log2f
42 push xBP
43 SEH64_PUSH_xBP
44 mov xBP, xSP
45 SEH64_SET_FRAME_xBP 0
46 sub xSP, 20h
47 SEH64_ALLOCATE_STACK 20h
48 SEH64_END_PROLOGUE
49
50 ;
51 ; Load the input into st0.
52 ;
53%ifdef RT_ARCH_AMD64
54 movss [xBP - 10h], xmm0
55 fld dword [xBP - 10h]
56%else
57 fld dword [xBP + xCB*2]
58%endif
59
60 ;
61 ; Weed out non-normal values.
62 ;
63 fxam
64 fnstsw ax
65 mov cx, ax
66 and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0
67 cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero)
68 je .finite
69 cmp ax, X86_FSW_C3 ; Zero
70 je .zero
71 cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals
72 je .finite
73 cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity.
74 je .inf
75 jmp .nan
76
77.finite:
78 ; Negative number?
79 test cx, X86_FSW_C1
80 jnz .negative
81
82 ; Is it +1.0?
83 fld1
84 fcomip st1
85 jz .plus_one
86
87 ;
88 ; The fyl2xp1 instruction (ST1=ST1*log2(ST0+1.0), popping ST0) has a
89 ; valid ST0 range of 1(1-sqrt(0.5)) (approx 0.29289321881) on both
90 ; sides of zero. We try use it if we can.
91 ;
92.above_one:
93 ; For both fyl2xp1 and fyl2xp1 we need st1=1.0.
94 fld1
95 fxch st0, st1 ; -> st0=input; st1=1.0
96
97 ; Check if the input is within the fyl2xp1 range.
98 fld qword [.s_r64AbsFyL2xP1InputMax xWrtRIP]
99 fcomip st0, st1
100 jbe .cannot_use_fyl2xp1
101
102 fld qword [.s_r64AbsFyL2xP1InputMin xWrtRIP]
103 fcomip st0, st1
104 jae .cannot_use_fyl2xp1
105
106 ; Do the calculation.
107.use_fyl2xp1:
108 fsub st0, st1 ; -> st0=input-1; st1=1.0
109 fyl2xp1 ; -> st0=1.0*log2(st0+1.0)
110 jmp .return_val
111
112.cannot_use_fyl2xp1:
113 fyl2x ; -> st0=1.0*log2(st0)
114
115 ;
116 ; Run st0.
117 ;
118.return_val:
119%ifdef RT_ARCH_AMD64
120 fstp dword [xBP - 10h]
121 movss xmm0, [xBP - 10h]
122%endif
123.return:
124 leave
125 ret
126
127
128 ;
129 ; +1.0: Return +0.0.
130 ;
131.plus_one:
132 ffreep st0
133 fldz
134 jmp .return_val
135
136 ;
137 ; Negative numbers: Return NaN and raise invalid operation.
138 ;
139.negative:
140.minus_inf:
141 ; Raise invalid operation
142%ifdef RT_ARCH_X86
143 mov dword [xSP], X86_FSW_IE
144%elifdef ASM_CALL64_GCC
145 mov edi, X86_FSW_IE
146%elifdef ASM_CALL64_MSC
147 mov ecx, X86_FSW_IE
148%else
149 %error calling conv.
150%endif
151 call NAME(RT_NOCRT(feraiseexcept))
152
153 ; Load NaN
154%ifdef RT_ARCH_AMD64
155 movss xmm0, [.s_r32NaN xWrtRIP]
156%else
157 fld dword [.s_r32NaN xWrtRIP]
158%endif
159 jmp .return
160
161 ;
162 ; +/-0.0: Return inf and raise divide by zero error.
163 ;
164.zero:
165 ffreep st0
166
167 ; Raise div/0
168%ifdef RT_ARCH_X86
169 mov dword [xSP], X86_FSW_ZE
170%elifdef ASM_CALL64_GCC
171 mov edi, X86_FSW_ZE
172%elifdef ASM_CALL64_MSC
173 mov ecx, X86_FSW_ZE
174%else
175 %error calling conv.
176%endif
177 call NAME(RT_NOCRT(feraiseexcept))
178
179 ; Load +Inf
180%ifdef RT_ARCH_AMD64
181 movss xmm0, [.s_r32MinusInf xWrtRIP]
182%else
183 fld dword [.s_r32MinusInf xWrtRIP]
184%endif
185 jmp .return
186
187 ;
188 ; -Inf: Same as other negative numbers
189 ; +Inf: return +Inf. Join path with NaN.
190 ;
191.inf:
192 test cx, X86_FSW_C1 ; sign bit
193 jnz .minus_inf
194
195 ;
196 ; NaN: Return the input NaN value as is, if we can.
197 ;
198.nan:
199%ifdef RT_ARCH_AMD64
200 ffreep st0
201%endif
202 jmp .return
203
204ALIGNCODE(8)
205 ;; The fyl2xp1 instruction only works between +/-1(1-sqrt(0.5)).
206 ; These two variables is that range + 1.0, so we can compare directly
207 ; with the input w/o any extra fsub and fabs work.
208.s_r64AbsFyL2xP1InputMin:
209 dq 0.708 ; -0.292 + 1.0
210.s_r64AbsFyL2xP1InputMax:
211 dq 1.292
212.s_r32MinusInf:
213 dd RTFLOAT32U_INF_MINUS
214.s_r32NaN:
215 dd RTFLOAT32U_QNAN_MINUS
216ENDPROC RT_NOCRT(log2f)
217
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette