1 | ; $Id: expf.asm 96282 2022-08-18 02:52:12Z vboxsync $
|
---|
2 | ;; @file
|
---|
3 | ; IPRT - No-CRT expf - AMD64 & X86.
|
---|
4 | ;
|
---|
5 |
|
---|
6 | ;
|
---|
7 | ; Copyright (C) 2006-2022 Oracle Corporation
|
---|
8 | ;
|
---|
9 | ; This file is part of VirtualBox Open Source Edition (OSE), as
|
---|
10 | ; available from http://www.virtualbox.org. This file is free software;
|
---|
11 | ; you can redistribute it and/or modify it under the terms of the GNU
|
---|
12 | ; General Public License (GPL) as published by the Free Software
|
---|
13 | ; Foundation, in version 2 as it comes in the "COPYING" file of the
|
---|
14 | ; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
|
---|
15 | ; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
|
---|
16 | ;
|
---|
17 | ; The contents of this file may alternatively be used under the terms
|
---|
18 | ; of the Common Development and Distribution License Version 1.0
|
---|
19 | ; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
|
---|
20 | ; VirtualBox OSE distribution, in which case the provisions of the
|
---|
21 | ; CDDL are applicable instead of those of the GPL.
|
---|
22 | ;
|
---|
23 | ; You may elect to license modified versions of this file under the
|
---|
24 | ; terms and conditions of either the GPL or the CDDL or both.
|
---|
25 | ;
|
---|
26 |
|
---|
27 |
|
---|
28 | %define RT_ASM_WITH_SEH64
|
---|
29 | %include "iprt/asmdefs.mac"
|
---|
30 | %include "iprt/x86.mac"
|
---|
31 |
|
---|
32 |
|
---|
33 | BEGINCODE
|
---|
34 |
|
---|
35 | extern NAME(RT_NOCRT(feraiseexcept))
|
---|
36 |
|
---|
37 | ;;
|
---|
38 | ; Compute the e (2.7182818...) to the power of rd.
|
---|
39 | ; @returns st(0) / xmm0
|
---|
40 | ; @param rd [xSP + xCB*2] / xmm0
|
---|
41 | RT_NOCRT_BEGINPROC expf
|
---|
42 | push xBP
|
---|
43 | SEH64_PUSH_xBP
|
---|
44 | mov xBP, xSP
|
---|
45 | SEH64_SET_FRAME_xBP 0
|
---|
46 | sub xSP, 20h
|
---|
47 | SEH64_ALLOCATE_STACK 20h
|
---|
48 | SEH64_END_PROLOGUE
|
---|
49 |
|
---|
50 | ;
|
---|
51 | ; Load the input into st0.
|
---|
52 | ;
|
---|
53 | %ifdef RT_ARCH_AMD64
|
---|
54 | movss [xBP - 10h], xmm0
|
---|
55 | fld dword [xBP - 10h]
|
---|
56 | %else
|
---|
57 | fld dword [xBP + xCB*2]
|
---|
58 | %endif
|
---|
59 |
|
---|
60 | ;
|
---|
61 | ; Weed out non-normal values.
|
---|
62 | ;
|
---|
63 | fxam
|
---|
64 | fnstsw ax
|
---|
65 | mov cx, ax
|
---|
66 | and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0
|
---|
67 | cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero)
|
---|
68 | je .finite
|
---|
69 | cmp ax, X86_FSW_C3 ; Zero
|
---|
70 | je .zero
|
---|
71 | cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals
|
---|
72 | je .finite
|
---|
73 | cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity.
|
---|
74 | je .inf
|
---|
75 | jmp .nan
|
---|
76 |
|
---|
77 | .finite:
|
---|
78 | ;
|
---|
79 | ; Convert to power of 2 and it'll be the same as exp2.
|
---|
80 | ;
|
---|
81 | fldl2e ; -> st0=log2(e); st1=input
|
---|
82 | fmulp ; -> st0=input*log2(e)
|
---|
83 |
|
---|
84 | ;
|
---|
85 | ; Split the job in two on the fraction and integer input parts.
|
---|
86 | ;
|
---|
87 | fld st0 ; Push a copy of the input on the stack.
|
---|
88 | frndint ; st0 = (int)(input*log2(e))
|
---|
89 | fsub st1, st0 ; st1 = input*log2(e) - (int)input*log2(e); i.e. st1 = fraction, st0 = integer.
|
---|
90 | fxch ; st0 = fraction, st1 = integer.
|
---|
91 |
|
---|
92 | ; 1. Calculate on the fraction.
|
---|
93 | f2xm1 ; st0 = 2**fraction - 1.0
|
---|
94 | fld1
|
---|
95 | faddp ; st0 = 2**fraction
|
---|
96 |
|
---|
97 | ; 2. Apply the integer power of two.
|
---|
98 | fscale ; st0 = result; st1 = integer part of input.
|
---|
99 | fstp st1 ; st0 = result; no st1.
|
---|
100 |
|
---|
101 | ;
|
---|
102 | ; Return st0.
|
---|
103 | ;
|
---|
104 | .return_val:
|
---|
105 | %ifdef RT_ARCH_AMD64
|
---|
106 | fstp dword [xBP - 10h]
|
---|
107 | movss xmm0, [xBP - 10h]
|
---|
108 | %endif
|
---|
109 | .return:
|
---|
110 | leave
|
---|
111 | ret
|
---|
112 |
|
---|
113 | ;
|
---|
114 | ; +/-0.0: Return +1.0
|
---|
115 | ;
|
---|
116 | .zero:
|
---|
117 | ffreep st0
|
---|
118 | fld1
|
---|
119 | jmp .return_val
|
---|
120 |
|
---|
121 | ;
|
---|
122 | ; -Inf: Return +0.0.
|
---|
123 | ; +Inf: Return +Inf. Join path with NaN.
|
---|
124 | ;
|
---|
125 | .inf:
|
---|
126 | test cx, X86_FSW_C1 ; sign bit
|
---|
127 | jz .nan
|
---|
128 | ffreep st0
|
---|
129 | fldz
|
---|
130 | jmp .return_val
|
---|
131 |
|
---|
132 | ;
|
---|
133 | ; NaN: Return the input NaN value as is, if we can.
|
---|
134 | ;
|
---|
135 | .nan:
|
---|
136 | %ifdef RT_ARCH_AMD64
|
---|
137 | ffreep st0
|
---|
138 | %endif
|
---|
139 | jmp .return
|
---|
140 | ENDPROC RT_NOCRT(expf)
|
---|
141 |
|
---|