VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/math/powcore.asm@ 96337

Last change on this file since 96337 was 96337, checked in by vboxsync, 3 years ago

IPRT/nocrt: Split out the core of the pow() code into a common function and can be shared with powf and powl. bugref:10261

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 20.8 KB
Line 
1; $Id: powcore.asm 96337 2022-08-19 14:49:44Z vboxsync $
2;; @file
3; IPRT - No-CRT common pow code - AMD64 & X86.
4;
5
6;
7; Copyright (C) 2006-2022 Oracle Corporation
8;
9; This file is part of VirtualBox Open Source Edition (OSE), as
10; available from http://www.virtualbox.org. This file is free software;
11; you can redistribute it and/or modify it under the terms of the GNU
12; General Public License (GPL) as published by the Free Software
13; Foundation, in version 2 as it comes in the "COPYING" file of the
14; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16;
17; The contents of this file may alternatively be used under the terms
18; of the Common Development and Distribution License Version 1.0
19; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20; VirtualBox OSE distribution, in which case the provisions of the
21; CDDL are applicable instead of those of the GPL.
22;
23; You may elect to license modified versions of this file under the
24; terms and conditions of either the GPL or the CDDL or both.
25;
26
27
28%define RT_ASM_WITH_SEH64
29%include "iprt/asmdefs.mac"
30%include "iprt/x86.mac"
31
32
33BEGINCODE
34
35extern NAME(RT_NOCRT(feraiseexcept))
36
37;;
38; Call feraiseexcept(%1)
39%macro CALL_feraiseexcept_WITH 1
40 %ifdef RT_ARCH_X86
41 mov dword [xSP], X86_FSW_IE
42 %elifdef ASM_CALL64_GCC
43 mov edi, X86_FSW_IE
44 %elifdef ASM_CALL64_MSC
45 mov ecx, X86_FSW_IE
46 %else
47 %error calling conv.
48 %endif
49 call NAME(RT_NOCRT(feraiseexcept))
50%endmacro
51
52
53;;
54; Compute the st1 to the power of st0.
55;
56; @returns st(0) = result
57; eax = what's being returned:
58; 0 - Just a value.
59; 1 - The rBase value. Caller may take steps to ensure it's exactly the same.
60; 2 - The rExp value. Caller may take steps to ensure it's exactly the same.
61; @param rBase/st1 The base.
62; @param rExp/st0 The exponent
63; @param fFxamBase/dx The status flags after fxam(rBase).
64; @param enmType/ebx The original parameter and return types:
65; 0 - 32-bit / float
66; 1 - 64-bit / double
67; 2 - 80-bit / long double
68;
69BEGINPROC rtNoCrtMathPowCore
70 push xBP
71 SEH64_PUSH_xBP
72 mov xBP, xSP
73 SEH64_SET_FRAME_xBP 0
74 sub xSP, 30h
75 SEH64_ALLOCATE_STACK 30h
76 SEH64_END_PROLOGUE
77
78 ;
79 ; Weed out special values, starting with the exponent.
80 ;
81 fxam
82 fnstsw ax
83 mov cx, ax ; cx=fxam(exp)
84
85 and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0
86 cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero)
87 je .exp_finite
88 cmp ax, X86_FSW_C3 ; Zero
89 je .exp_zero
90 cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals
91 je .exp_finite
92 cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity.
93 je .exp_inf
94 jmp .exp_nan
95
96.exp_finite:
97 ;
98 ; Detect special base values.
99 ;
100 mov ax, dx ; ax=fxam(base)
101 and ax, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0
102 cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero)
103 je .base_finite
104 cmp ax, X86_FSW_C3 ; Zero
105 je .base_zero
106 cmp ax, X86_FSW_C3 | X86_FSW_C2 ; Denormals
107 je .base_finite
108 cmp ax, X86_FSW_C0 | X86_FSW_C2 ; Infinity.
109 je .base_inf
110 jmp .base_nan
111
112.base_finite:
113 ;
114 ; 1 in the base is also special.
115 ; Rule 6 (see below): base == +1 and exponent = whatever: Return +1.0
116 ;
117 fld1
118 fcomip st0, st2
119 je .return_base_value
120
121 ;
122 ; Check if the exponent is an integer value we can handle in a 64-bit
123 ; GRP as that is simpler to handle accurately.
124 ;
125 ; In 64-bit integer range?
126 fld tword [.s_r80MaxInt xWrtRIP]
127 fcomip st0, st1
128 jb .not_integer_exp
129
130 fld tword [.s_r80MinInt xWrtRIP]
131 fcomip st0, st1
132 ja .not_integer_exp
133
134 ; Convert it to integer.
135 fld st0 ; -> st0=exp; st1=exp; st2=base
136 fistp qword [xBP - 8] ; Save and pop 64-bit int (no non-popping version of this instruction).
137
138 fild qword [xBP - 8] ; Load it again for comparison.
139 fucomip st0, st1 ; Compare integer exp and floating point exp to see if they are the same. Pop.
140 jne .not_integer_exp
141
142
143 ;
144 ;
145 ; Ok, we've got an integer exponent value in that fits into a 64-bit.
146 ; We'll multiply the base exponention bit by exponention bit, applying
147 ; it as a factor for bits that are set.
148 ;
149 ;
150.integer_exp:
151 ; Load the integer value into edx:exx / rdx and ditch the floating point exponent.
152 mov xDX, [xBP - 8]
153%ifdef RT_ARCH_X86
154 mov eax, [xBP - 8 + 4]
155%endif
156 ffreep st0 ; -> st0=base;
157
158 ; Load a 1 onto the stack, we'll need it below as well as for converting
159 ; a negative exponent to a positive one.
160 fld1 ; -> st0=1.0; st1=base;
161
162 ; If the exponent is negative, negate it and change base to 1/base.
163 or xDX, xDX
164 jns .integer_exp_positive
165 neg xDX
166%ifdef RT_ARCH_X86
167 neg eax
168 sbb edx, 0
169%endif
170 fdivr st1, st0 ; -> st0=1.0; st1=1/base
171.integer_exp_positive:
172
173 ;
174 ; We'll process edx:eax / rdx bit by bit till it's zero, using st0 for
175 ; the multiplication factor corresponding to the current exponent bit
176 ; and st1 as the result.
177 ;
178 fxch ; -> st0=base; st1=1.0;
179.integer_exp_loop:
180%ifdef RT_ARCH_X86
181 shrd eax, edx, 1
182%else
183 shr rdx, 1
184%endif
185 jnc .integer_exp_loop_advance
186 fmul st1, st0
187
188.integer_exp_loop_advance:
189 ; Check if we're done.
190%ifdef RT_ARCH_AMD64
191 jz .integer_exp_return ; (we will have the flags for the shr rdx above)
192%else
193 shr edx, 1 ; complete the above shift operation
194
195 mov ecx, edx ; check if edx:eax is zero.
196 or ecx, eax
197 jz .integer_exp_return
198%endif
199 ; Calculate the factor for the next bit.
200 fmul st0, st0
201 jmp .integer_exp_loop
202
203.integer_exp_return:
204 ffreep st0 ; drop the factor -> st0=result; no st1.
205 jmp .return_val
206
207
208 ;
209 ;
210 ; Non-integer or value was out of range for an int64_t.
211 ;
212 ; The approach here is the same as in exp.asm, only we have to do the
213 ; log2(base) calculation first as it's a parameter and not a constant.
214 ;
215 ;
216.not_integer_exp:
217
218 ; First reject negative numbers. We still have the fxam(base) status in dx.
219 test dx, X86_FSW_C1
220 jnz .base_negative_non_integer_exp
221
222 ; Swap the items on the stack, so we can process the base first.
223 fxch st0, st1 ; -> st0=base; st1=exponent;
224
225 ;
226 ; From log2.asm:
227 ;
228 ; The fyl2xp1 instruction (ST1=ST1*log2(ST0+1.0), popping ST0) has a
229 ; valid ST0 range of 1(1-sqrt(0.5)) (approx 0.29289321881) on both
230 ; sides of zero. We try use it if we can.
231 ;
232.above_one:
233 ; For both fyl2xp1 and fyl2xp1 we need st1=1.0.
234 fld1
235 fxch st0, st1 ; -> st0=base; st1=1.0; st2=exponent
236
237 ; Check if the input is within the fyl2xp1 range.
238 fld qword [.s_r64AbsFyL2xP1InputMax xWrtRIP]
239 fcomip st0, st1
240 jbe .cannot_use_fyl2xp1
241
242 fld qword [.s_r64AbsFyL2xP1InputMin xWrtRIP]
243 fcomip st0, st1
244 jae .cannot_use_fyl2xp1
245
246 ; Do the calculation.
247.use_fyl2xp1:
248 fsub st0, st1 ; -> st0=base-1; st1=1.0; st2=exponent
249 fyl2xp1 ; -> st0=1.0*log2(base-1.0+1.0); st1=exponent
250 jmp .done_log2
251
252.cannot_use_fyl2xp1:
253 fyl2x ; -> st0=1.0*log2(base); st1=exponent
254.done_log2:
255
256 ;
257 ; From exp.asm:
258 ;
259 ; Convert to power of 2 and it'll be the same as exp2.
260 ;
261 fmulp ; st0=log2(base); st1=exponent -> st0=pow2exp
262
263 ;
264 ; Split the job in two on the fraction and integer l2base parts.
265 ;
266 fld st0 ; Push a copy of the pow2exp on the stack.
267 frndint ; st0 = (int)pow2exp
268 fsub st1, st0 ; st1 = pow2exp - (int)pow2exp; i.e. st1 = fraction, st0 = integer.
269 fxch ; st0 = fraction, st1 = integer.
270
271 ; 1. Calculate on the fraction.
272 f2xm1 ; st0 = 2**fraction - 1.0
273 fld1
274 faddp ; st0 = 2**fraction
275
276 ; 2. Apply the integer power of two.
277 fscale ; st0 = result; st1 = integer part of pow2exp.
278 fstp st1 ; st0 = result; no st1.
279
280 ;
281 ; Return st0.
282 ;
283.return_val:
284 xor eax, eax
285.return:
286 leave
287 ret
288
289
290 ;
291 ;
292 ; pow() has a lot of defined behavior for special values, which is why
293 ; this is the largest and most difficult part of the code. :-)
294 ;
295 ; On https://pubs.opengroup.org/onlinepubs/9699919799/functions/pow.html
296 ; there are 21 error conditions listed in the return value section.
297 ; The code below refers to this by number.
298 ;
299 ; When we get here:
300 ; dx=fxam(base)
301 ; cx=fxam(exponent)
302 ; st1=base
303 ; st0=exponent
304 ;
305
306 ;
307 ; 1. Finit base < 0 and finit non-interger exponent: -> domain error (#IE) + NaN.
308 ;
309 ; The non-integer exponent claim might be wrong, as we only check if it
310 ; fits into a int64_t register. But, I don't see how we can calculate
311 ; it right now.
312 ;
313.base_negative_non_integer_exp:
314 CALL_feraiseexcept_WITH X86_FSW_IE
315 jmp .return_nan
316
317 ;
318 ; 7. Exponent = +/-0.0, any base value including NaN: return +1.0
319 ; Note! According to https://en.cppreference.com/w/c/numeric/math/pow a
320 ; domain error (#IE) occur if base=+/-0. Not implemented.
321.exp_zero:
322.return_plus_one:
323 fld1
324 jmp .return_pop_pop_val
325
326 ;
327 ; 6. Exponent = whatever and base = 1: Return 1.0
328 ; 10. Exponent = +/-Inf and base = -1: Return 1.0
329 ;6+10 => Exponent = +/-Inf and |base| = 1: Return 1.0
330 ; 11. Exponent = -Inf and |base| < 1: Return +Inf
331 ; 12. Exponent = -Inf and |base| > 1: Return +0
332 ; 13. Exponent = +Inf and |base| < 1: Return +0
333 ; 14. Exponent = +Inf and |base| > 1: Return +Inf
334 ;
335 ; Note! Rule 4 would trigger for the same conditions as 11 when base == 0,
336 ; but it's optional to raise div/0 and it's apparently marked as
337 ; obsolete in C23, so not implemented.
338 ;
339.exp_inf:
340 ; Check if base is NaN or unsupported.
341 and dx, X86_FSW_C3 | X86_FSW_C2 | X86_FSW_C0 ; fxam(base)
342 cmp dx, X86_FSW_C0
343 jbe .return_base_nan
344
345 ; Calc fabs(base) and replace the exponent with 1.0 as we're very likely to need this here.
346 ffreep st0
347 fabs
348 fld1 ; st0=1.0; st1=|rdBase|
349 fcomi st0, st1
350 je .return_plus_one ; Matches rule 6 + 10 (base is +/-1).
351 ja .exp_inf_base_smaller_than_one
352.exp_inf_base_larger_than_one:
353 test cx, X86_FSW_C1 ; cx=faxm(exponent); C1=sign
354 jz .return_plus_inf ; Matches rule 14 (exponent is +Inf).
355 jmp .return_plus_zero ; Matches rule 12 (exponent is -Inf).
356
357.exp_inf_base_smaller_than_one:
358 test cx, X86_FSW_C1 ; cx=faxm(exponent); C1=sign
359 jnz .return_plus_inf ; Matches rule 11 (exponent is -Inf).
360 jmp .return_plus_zero ; Matches rule 13 (exponent is +Inf).
361
362 ;
363 ; 6. Exponent = whatever and base = 1: Return 1.0
364 ; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN.
365 ;
366.exp_nan:
367 ; Check if base is a number and possible 1.
368 test dx, X86_FSW_C2 ; dx=fxam(base); C2 is set for finite number, infinity and denormals.
369 jz .return_exp_nan
370 fld1
371 fcomip st0, st2
372 jne .return_exp_nan
373 jmp .return_plus_one
374
375 ;
376 ; 4a. base == +/-0.0 and exp < 0 and exp is odd integer: Return +/-Inf, raise div/0.
377 ; 4b. base == +/-0.0 and exp < 0 and exp is not odd int: Return +Inf, raise div/0.
378 ; 8. base == +/-0.0 and exp > 0 and exp is odd integer: Return +/-0.0
379 ; 9. base == +/-0.0 and exp > 0 and exp is not odd int: Return +0
380 ;
381 ; Note! Exponent must be finite and non-zero if we get here.
382 ;
383.base_zero:
384 fldz
385 fcomip st0, st1
386 jbe .base_zero_plus_exp
387.base_zero_minus_exp:
388 mov cx, dx ; stashing fxam(base) in CX because EDX is trashed by .is_exp_odd_integer
389 call .is_exp_odd_integer ; trashes EDX but no ECX.
390 or eax, eax
391 jz .base_zero_minus_exp_not_odd_int
392
393 ; Matching 4a.
394.base_zero_minus_exp_odd_int:
395 test cx, X86_FSW_C1 ; base sign
396 jz .raise_de_and_return_plus_inf
397.raise_de_and_return_minus_inf:
398 CALL_feraiseexcept_WITH X86_FSW_DE
399 jmp .return_minus_inf
400.raise_de_and_return_plus_inf:
401 CALL_feraiseexcept_WITH X86_FSW_DE
402 jmp .return_plus_inf
403
404 ; Matching 4b.
405.base_zero_minus_exp_not_odd_int:
406 CALL_feraiseexcept_WITH X86_FSW_DE
407 jmp .return_plus_inf
408
409.base_zero_plus_exp:
410 call .is_exp_odd_integer
411 or eax, eax
412 jnz .return_base_value ; Matching 8
413.return_plus_zero: ; Matching 9
414 fldz
415 jmp .return_pop_pop_val
416
417 ;
418 ; 15. base == -Inf and exp < 0 and exp is odd integer: Return -0
419 ; 16. base == -Inf and exp < 0 and exp is not odd int: Return +0
420 ; 17. base == -Inf and exp > 0 and exp is odd integer: Return -Inf
421 ; 18. base == -Inf and exp > 0 and exp is not odd int: Return +Inf
422 ; 19. base == +Inf and exp < 0: Return +0
423 ; 20. base == +Inf and exp > 0: Return +Inf
424 ;
425 ; Note! Exponent must be finite and non-zero if we get here.
426 ;
427.base_inf:
428 fldz
429 fcomip st0, st1
430 jbe .base_inf_plus_exp
431.base_inf_minus_exp:
432 test dx, X86_FSW_C1
433 jz .return_plus_zero ; Matches 19 (base == +Inf).
434.base_minus_inf_minus_exp:
435 call .is_exp_odd_integer
436 or eax, eax
437 jz .return_plus_zero ; Matches 16 (exp not odd and < 0, base == -Inf)
438.return_minus_zero: ; Matches 15 (exp is odd and < 0, base == -Inf)
439 fldz
440 fchs
441 jmp .return_pop_pop_val
442
443.base_inf_plus_exp:
444 test dx, X86_FSW_C1
445 jz .return_plus_inf ; Matches 20 (base == +Inf).
446.base_minus_inf_plus_exp:
447 call .is_exp_odd_integer
448 or eax, eax
449 jnz .return_minus_inf ; Matches 17 (exp is odd and > 0, base == +Inf)
450 jmp .return_plus_inf ; Matches 18 (exp not odd and > 0, base == +Inf)
451
452 ;
453 ; Return the exponent NaN (or whatever) value.
454 ;
455.return_exp_nan:
456 fld st0
457 mov eax, 2 ; return param 2
458 jmp .return_pop_pop_val_with_eax
459
460 ;
461 ; Return the base NaN (or whatever) value.
462 ;
463.return_base_nan:
464.return_base_value:
465.base_nan: ; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN.
466 fld st1
467 mov eax, 1 ; return param 1
468 jmp .return_pop_pop_val_with_eax
469
470 ;
471 ; Pops the two values off the FPU stack and returns NaN.
472 ;
473.return_nan:
474 fld qword [.s_r64QNan xWrtRIP]
475 jmp .return_pop_pop_val
476
477 ;
478 ; Pops the two values off the FPU stack and returns +Inf.
479 ;
480.return_plus_inf:
481 fld qword [.s_r64PlusInf xWrtRIP]
482 jmp .return_pop_pop_val
483
484 ;
485 ; Pops the two values off the FPU stack and returns -Inf.
486 ;
487.return_minus_inf:
488 fld qword [.s_r64MinusInf xWrtRIP]
489 jmp .return_pop_pop_val
490
491 ;
492 ; Return st0, remove st1 and st2.
493 ;
494.return_pop_pop_val:
495 xor eax, eax
496.return_pop_pop_val_with_eax:
497 fstp st2
498 ffreep st0
499 jmp .return
500
501
502ALIGNCODE(8)
503.s_r80MaxInt:
504 dt +9223372036854775807.0
505
506ALIGNCODE(8)
507.s_r80MinInt:
508 dt -9223372036854775807.0
509
510ALIGNCODE(8)
511 ;; The fyl2xp1 instruction only works between +/-1(1-sqrt(0.5)).
512 ; These two variables is that range + 1.0, so we can compare directly
513 ; with the input w/o any extra fsub and fabs work.
514.s_r64AbsFyL2xP1InputMin:
515 dq 0.708 ; -0.292 + 1.0
516.s_r64AbsFyL2xP1InputMax:
517 dq 1.292
518
519.s_r64QNan:
520 dq RTFLOAT64U_QNAN_MINUS
521.s_r64PlusInf:
522 dq RTFLOAT64U_INF_PLUS
523.s_r64MinusInf:
524 dq RTFLOAT64U_INF_MINUS
525
526 ;;
527 ; Sub-function that checks if the exponent (st0) is an odd integer or not.
528 ;
529 ; @returns eax = 1 if odd, 0 if even or not integer.
530 ; @uses eax, edx, eflags.
531 ;
532.is_exp_odd_integer:
533 ;
534 ; Save the FPU enviornment and mask all exceptions.
535 ;
536 fnstenv [xBP - 30h]
537 mov ax, [xBP - 30h + X86FSTENV32P.FCW]
538 or word [xBP - 30h + X86FSTENV32P.FCW], X86_FCW_MASK_ALL
539 fldcw [xBP - 30h + X86FSTENV32P.FCW]
540 mov [xBP - 30h + X86FSTENV32P.FCW], ax
541
542 ;
543 ; Convert to 64-bit integer (probably not 100% correct).
544 ;
545 fld st0 ; -> st0=exponent st1=exponent; st2=base;
546 fistp qword [xBP - 10h]
547 fild qword [xBP - 10h] ; -> st0=int(exponent) st1=exponent; st2=base;
548 fcomip st0, st1 ; -> st0=exponent; st1=base;
549 jne .is_exp_odd_integer__return_false ; jump if not integer.
550 mov xAX, [xBP - 10h]
551%ifdef
552 mov edx, [xBP - 10h + 4]
553%endif
554
555 ;
556 ; Check the lowest bit if it might be odd.
557 ; This works both for positive and negative numbers.
558 ;
559 test al, 1
560 jz .is_exp_odd_integer__return_false ; jump if even.
561
562 ;
563 ; If the result is negative, convert to positive.
564 ;
565%ifdef RT_ARCH_AMD64
566 bt rax, 63
567%else
568 bt edx, 31
569%endif
570 jnc .is_exp_odd_integer__positive
571%ifdef RT_ARCH_AMD64
572 neg xAX
573%else
574 neg edx
575 neg eax
576 sbb edx, 0
577%endif
578.is_exp_odd_integer__positive:
579
580 ;
581 ; Now find the most significant bit in the value so we can verify that
582 ; the odd bit was part of the mantissa/fraction of the input.
583 ;
584 cmp bl, 3 ; Skip if 80-bit input, as it has a 64-bit mantissa which
585 je .is_exp_odd_integer__return_true ; makes it a 1 bit more precision than out integer reg(s).
586
587%ifdef RT_ARCH_AMD64
588 bsr rax, rax
589%else
590 bsr edx, edx
591 jnz .is_exp_odd_integer__high_dword_is_zero
592 lea eax, [edx + 20h]
593 jmp .is_exp_odd_integer__first_bit_in_eax
594.is_exp_odd_integer__high_dword_is_zero:
595 bsr eax, eax
596.is_exp_odd_integer__first_bit_in_eax:
597%endif
598 ;
599 ; The limit is 53 for double precision (one implicit bit + 52 bits fraction),
600 ; and 24 for single precision types.
601 ;
602 mov ah, 53 ; RTFLOAT64U_FRACTION_BITS + 1
603 cmp bl, 0
604 jne .is_exp_odd_integer__is_double_limit
605 mov ah, 24 ; RTFLOAT32U_FRACTION_BITS + 1
606.is_exp_odd_integer__is_double_limit:
607
608 cmp al, ah
609 jae .is_exp_odd_integer__return_false
610 mov eax, 1
611
612 ; Return.
613.is_exp_odd_integer__return_true:
614 jmp .is_exp_odd_integer__return
615.is_exp_odd_integer__return_false:
616 xor eax, eax
617.is_exp_odd_integer__return:
618 ffreep st0
619 fldenv [xBP - 30h]
620 ret
621
622ENDPROC rtNoCrtMathPowCore
623
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette