VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/math/fma-asm.asm@ 96240

Last change on this file since 96240 was 96108, checked in by vboxsync, 2 years ago

IPRT/nocrt: fma and fmaf. bugref:10261

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 2.6 KB
Line 
1; $Id: fma-asm.asm 96108 2022-08-08 11:16:23Z vboxsync $
2;; @file
3; IPRT - No-CRT fma alternatives - AMD64 & X86.
4;
5
6;
7; Copyright (C) 2006-2022 Oracle Corporation
8;
9; This file is part of VirtualBox Open Source Edition (OSE), as
10; available from http://www.virtualbox.org. This file is free software;
11; you can redistribute it and/or modify it under the terms of the GNU
12; General Public License (GPL) as published by the Free Software
13; Foundation, in version 2 as it comes in the "COPYING" file of the
14; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16;
17; The contents of this file may alternatively be used under the terms
18; of the Common Development and Distribution License Version 1.0
19; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20; VirtualBox OSE distribution, in which case the provisions of the
21; CDDL are applicable instead of those of the GPL.
22;
23; You may elect to license modified versions of this file under the
24; terms and conditions of either the GPL or the CDDL or both.
25;
26
27%define RT_ASM_WITH_SEH64
28%include "iprt/asmdefs.mac"
29
30BEGINCODE
31
32;;
33; Fused multiplication and add, intel version.
34;
35; @returns st(0) / xmm0
36; @param rdFactor1 [rbp + 08h] / xmm0
37; @param rdFactor2 [rbp + 10h] / xmm1
38; @param rdAddend [rbp + 18h] / xmm2
39BEGINPROC rtNoCrtMathFma3
40 push xBP
41 SEH64_PUSH_xBP
42 mov xBP, xSP
43 SEH64_SET_FRAME_xBP 0
44 SEH64_END_PROLOGUE
45
46%ifdef RT_ARCH_X86
47 movsd xmm0, qword [xBP + xCB*2 + 00h]
48 movsd xmm1, qword [xBP + xCB*2 + 08h]
49 movsd xmm2, qword [xBP + xCB*2 + 10h]
50%endif
51
52 vfmadd132sd xmm0, xmm2, xmm1 ; xmm0 = xmm0 * xmm1 + xmm2 (132 = multiply op1 with op3 and add op2)
53
54%ifdef RT_ARCH_X86
55 sub xSP, 10h
56 movsd [xSP], xmm0
57 fld qword [xSP]
58%endif
59 leave
60 ret
61ENDPROC rtNoCrtMathFma3
62
63
64;;
65; Fused multiplication and add, amd version.
66;
67; @returns st(0) / xmm0
68; @param rdFactor1 [rbp + 08h] / xmm0
69; @param rdFactor2 [rbp + 10h] / xmm1
70; @param rdAddend [rbp + 18h] / xmm2
71BEGINPROC rtNoCrtMathFma4
72 push xBP
73 SEH64_PUSH_xBP
74 mov xBP, xSP
75 SEH64_SET_FRAME_xBP 0
76 SEH64_END_PROLOGUE
77
78%ifdef RT_ARCH_X86
79 movsd xmm0, qword [xBP + xCB*2 + 00h]
80 movsd xmm1, qword [xBP + xCB*2 + 08h]
81 movsd xmm2, qword [xBP + xCB*2 + 10h]
82%endif
83
84 vfmaddsd xmm0, xmm0, xmm1, xmm2 ; xmm0 = xmm0 * xmm1 + xmm2
85
86%ifdef RT_ARCH_X86
87 sub xSP, 10h
88 movsd [xSP], xmm0
89 fld qword [xSP]
90%endif
91 leave
92 ret
93ENDPROC rtNoCrtMathFma4
94
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette