log2f.asm@ 96282

Last change on this file since 96282 was 96279, checked in by vboxsync, 2 years ago
IPRT/nocrt: Implemented log2f and added testing of log2 and log2f. bugref:10261
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 5.5 KB

Line
1	; $Id: log2f.asm 96279 2022-08-18 00:07:28Z vboxsync $
2	;; @file
3	; IPRT - No-CRT log2f - AMD64 & X86.
4	;
5
6	;
7	; Copyright (C) 2006-2022 Oracle Corporation
8	;
9	; This file is part of VirtualBox Open Source Edition (OSE), as
10	; available from http://www.virtualbox.org. This file is free software;
11	; you can redistribute it and/or modify it under the terms of the GNU
12	; General Public License (GPL) as published by the Free Software
13	; Foundation, in version 2 as it comes in the "COPYING" file of the
14	; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	;
17	; The contents of this file may alternatively be used under the terms
18	; of the Common Development and Distribution License Version 1.0
19	; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20	; VirtualBox OSE distribution, in which case the provisions of the
21	; CDDL are applicable instead of those of the GPL.
22	;
23	; You may elect to license modified versions of this file under the
24	; terms and conditions of either the GPL or the CDDL or both.
25	;
26
27
28	%define RT_ASM_WITH_SEH64
29	%include "iprt/asmdefs.mac"
30	%include "iprt/x86.mac"
31
32
33	BEGINCODE
34
35	extern NAME(RT_NOCRT(feraiseexcept))
36
37	;;
38	; Compute the log2f of rf
39	; @returns st(0) / xmm0
40	; @param rf [xSP + xCB*2] / xmm0
41	RT_NOCRT_BEGINPROC log2f
42	push xBP
43	SEH64_PUSH_xBP
44	mov xBP, xSP
45	SEH64_SET_FRAME_xBP 0
46	sub xSP, 20h
47	SEH64_ALLOCATE_STACK 20h
48	SEH64_END_PROLOGUE
49
50	;
51	; Load the input into st0.
52	;
53	%ifdef RT_ARCH_AMD64
54	movss [xBP - 10h], xmm0
55	fld dword [xBP - 10h]
56	%else
57	fld dword [xBP + xCB*2]
58	%endif
59
60	;
61	; Weed out non-normal values.
62	;
63	fxam
64	fnstsw ax
65	mov cx, ax
66	and ax, X86_FSW_C3 \| X86_FSW_C2 \| X86_FSW_C0
67	cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero)
68	je .finite
69	cmp ax, X86_FSW_C3 ; Zero
70	je .zero
71	cmp ax, X86_FSW_C3 \| X86_FSW_C2 ; Denormals
72	je .finite
73	cmp ax, X86_FSW_C0 \| X86_FSW_C2 ; Infinity.
74	je .inf
75	jmp .nan
76
77	.finite:
78	; Negative number?
79	test cx, X86_FSW_C1
80	jnz .negative
81
82	; Is it +1.0?
83	fld1
84	fcomip st1
85	jz .plus_one
86
87	;
88	; The fyl2xp1 instruction (ST1=ST1*log2(ST0+1.0), popping ST0) has a
89	; valid ST0 range of 1(1-sqrt(0.5)) (approx 0.29289321881) on both
90	; sides of zero. We try use it if we can.
91	;
92	.above_one:
93	; For both fyl2xp1 and fyl2xp1 we need st1=1.0.
94	fld1
95	fxch st0, st1 ; -> st0=input; st1=1.0
96
97	; Check if the input is within the fyl2xp1 range.
98	fld qword [.s_r64AbsFyL2xP1InputMax xWrtRIP]
99	fcomip st0, st1
100	jbe .cannot_use_fyl2xp1
101
102	fld qword [.s_r64AbsFyL2xP1InputMin xWrtRIP]
103	fcomip st0, st1
104	jae .cannot_use_fyl2xp1
105
106	; Do the calculation.
107	.use_fyl2xp1:
108	fsub st0, st1 ; -> st0=input-1; st1=1.0
109	fyl2xp1 ; -> st0=1.0*log2(st0+1.0)
110	jmp .return_val
111
112	.cannot_use_fyl2xp1:
113	fyl2x ; -> st0=1.0*log2(st0)
114
115	;
116	; Run st0.
117	;
118	.return_val:
119	%ifdef RT_ARCH_AMD64
120	fstp dword [xBP - 10h]
121	movss xmm0, [xBP - 10h]
122	%endif
123	.return:
124	leave
125	ret
126
127
128	;
129	; +1.0: Return +0.0.
130	;
131	.plus_one:
132	ffreep st0
133	fldz
134	jmp .return_val
135
136	;
137	; Negative numbers: Return NaN and raise invalid operation.
138	;
139	.negative:
140	.minus_inf:
141	; Raise invalid operation
142	%ifdef RT_ARCH_X86
143	mov dword [xSP], X86_FSW_IE
144	%elifdef ASM_CALL64_GCC
145	mov edi, X86_FSW_IE
146	%elifdef ASM_CALL64_MSC
147	mov ecx, X86_FSW_IE
148	%else
149	%error calling conv.
150	%endif
151	call NAME(RT_NOCRT(feraiseexcept))
152
153	; Load NaN
154	%ifdef RT_ARCH_AMD64
155	movss xmm0, [.s_r32NaN xWrtRIP]
156	%else
157	fld dword [.s_r32NaN xWrtRIP]
158	%endif
159	jmp .return
160
161	;
162	; +/-0.0: Return inf and raise divide by zero error.
163	;
164	.zero:
165	ffreep st0
166
167	; Raise div/0
168	%ifdef RT_ARCH_X86
169	mov dword [xSP], X86_FSW_ZE
170	%elifdef ASM_CALL64_GCC
171	mov edi, X86_FSW_ZE
172	%elifdef ASM_CALL64_MSC
173	mov ecx, X86_FSW_ZE
174	%else
175	%error calling conv.
176	%endif
177	call NAME(RT_NOCRT(feraiseexcept))
178
179	; Load +Inf
180	%ifdef RT_ARCH_AMD64
181	movss xmm0, [.s_r32MinusInf xWrtRIP]
182	%else
183	fld dword [.s_r32MinusInf xWrtRIP]
184	%endif
185	jmp .return
186
187	;
188	; -Inf: Same as other negative numbers
189	; +Inf: return +Inf. Join path with NaN.
190	;
191	.inf:
192	test cx, X86_FSW_C1 ; sign bit
193	jnz .minus_inf
194
195	;
196	; NaN: Return the input NaN value as is, if we can.
197	;
198	.nan:
199	%ifdef RT_ARCH_AMD64
200	ffreep st0
201	%endif
202	jmp .return
203
204	ALIGNCODE(8)
205	;; The fyl2xp1 instruction only works between +/-1(1-sqrt(0.5)).
206	; These two variables is that range + 1.0, so we can compare directly
207	; with the input w/o any extra fsub and fabs work.
208	.s_r64AbsFyL2xP1InputMin:
209	dq 0.708 ; -0.292 + 1.0
210	.s_r64AbsFyL2xP1InputMax:
211	dq 1.292
212	.s_r32MinusInf:
213	dd RTFLOAT32U_INF_MINUS
214	.s_r32NaN:
215	dd RTFLOAT32U_QNAN_MINUS
216	ENDPROC RT_NOCRT(log2f)
217

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Runtime/common/math/log2f.asm@ 96282

Download in other formats: