powcore.asm@ 96337

Last change on this file since 96337 was 96337, checked in by vboxsync, 3 years ago
IPRT/nocrt: Split out the core of the pow() code into a common function and can be shared with powf and powl. bugref:10261
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 20.8 KB

Line
1	; $Id: powcore.asm 96337 2022-08-19 14:49:44Z vboxsync $
2	;; @file
3	; IPRT - No-CRT common pow code - AMD64 & X86.
4	;
5
6	;
7	; Copyright (C) 2006-2022 Oracle Corporation
8	;
9	; This file is part of VirtualBox Open Source Edition (OSE), as
10	; available from http://www.virtualbox.org. This file is free software;
11	; you can redistribute it and/or modify it under the terms of the GNU
12	; General Public License (GPL) as published by the Free Software
13	; Foundation, in version 2 as it comes in the "COPYING" file of the
14	; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15	; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16	;
17	; The contents of this file may alternatively be used under the terms
18	; of the Common Development and Distribution License Version 1.0
19	; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20	; VirtualBox OSE distribution, in which case the provisions of the
21	; CDDL are applicable instead of those of the GPL.
22	;
23	; You may elect to license modified versions of this file under the
24	; terms and conditions of either the GPL or the CDDL or both.
25	;
26
27
28	%define RT_ASM_WITH_SEH64
29	%include "iprt/asmdefs.mac"
30	%include "iprt/x86.mac"
31
32
33	BEGINCODE
34
35	extern NAME(RT_NOCRT(feraiseexcept))
36
37	;;
38	; Call feraiseexcept(%1)
39	%macro CALL_feraiseexcept_WITH 1
40	%ifdef RT_ARCH_X86
41	mov dword [xSP], X86_FSW_IE
42	%elifdef ASM_CALL64_GCC
43	mov edi, X86_FSW_IE
44	%elifdef ASM_CALL64_MSC
45	mov ecx, X86_FSW_IE
46	%else
47	%error calling conv.
48	%endif
49	call NAME(RT_NOCRT(feraiseexcept))
50	%endmacro
51
52
53	;;
54	; Compute the st1 to the power of st0.
55	;
56	; @returns st(0) = result
57	; eax = what's being returned:
58	; 0 - Just a value.
59	; 1 - The rBase value. Caller may take steps to ensure it's exactly the same.
60	; 2 - The rExp value. Caller may take steps to ensure it's exactly the same.
61	; @param rBase/st1 The base.
62	; @param rExp/st0 The exponent
63	; @param fFxamBase/dx The status flags after fxam(rBase).
64	; @param enmType/ebx The original parameter and return types:
65	; 0 - 32-bit / float
66	; 1 - 64-bit / double
67	; 2 - 80-bit / long double
68	;
69	BEGINPROC rtNoCrtMathPowCore
70	push xBP
71	SEH64_PUSH_xBP
72	mov xBP, xSP
73	SEH64_SET_FRAME_xBP 0
74	sub xSP, 30h
75	SEH64_ALLOCATE_STACK 30h
76	SEH64_END_PROLOGUE
77
78	;
79	; Weed out special values, starting with the exponent.
80	;
81	fxam
82	fnstsw ax
83	mov cx, ax ; cx=fxam(exp)
84
85	and ax, X86_FSW_C3 \| X86_FSW_C2 \| X86_FSW_C0
86	cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero)
87	je .exp_finite
88	cmp ax, X86_FSW_C3 ; Zero
89	je .exp_zero
90	cmp ax, X86_FSW_C3 \| X86_FSW_C2 ; Denormals
91	je .exp_finite
92	cmp ax, X86_FSW_C0 \| X86_FSW_C2 ; Infinity.
93	je .exp_inf
94	jmp .exp_nan
95
96	.exp_finite:
97	;
98	; Detect special base values.
99	;
100	mov ax, dx ; ax=fxam(base)
101	and ax, X86_FSW_C3 \| X86_FSW_C2 \| X86_FSW_C0
102	cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero)
103	je .base_finite
104	cmp ax, X86_FSW_C3 ; Zero
105	je .base_zero
106	cmp ax, X86_FSW_C3 \| X86_FSW_C2 ; Denormals
107	je .base_finite
108	cmp ax, X86_FSW_C0 \| X86_FSW_C2 ; Infinity.
109	je .base_inf
110	jmp .base_nan
111
112	.base_finite:
113	;
114	; 1 in the base is also special.
115	; Rule 6 (see below): base == +1 and exponent = whatever: Return +1.0
116	;
117	fld1
118	fcomip st0, st2
119	je .return_base_value
120
121	;
122	; Check if the exponent is an integer value we can handle in a 64-bit
123	; GRP as that is simpler to handle accurately.
124	;
125	; In 64-bit integer range?
126	fld tword [.s_r80MaxInt xWrtRIP]
127	fcomip st0, st1
128	jb .not_integer_exp
129
130	fld tword [.s_r80MinInt xWrtRIP]
131	fcomip st0, st1
132	ja .not_integer_exp
133
134	; Convert it to integer.
135	fld st0 ; -> st0=exp; st1=exp; st2=base
136	fistp qword [xBP - 8] ; Save and pop 64-bit int (no non-popping version of this instruction).
137
138	fild qword [xBP - 8] ; Load it again for comparison.
139	fucomip st0, st1 ; Compare integer exp and floating point exp to see if they are the same. Pop.
140	jne .not_integer_exp
141
142
143	;
144	;
145	; Ok, we've got an integer exponent value in that fits into a 64-bit.
146	; We'll multiply the base exponention bit by exponention bit, applying
147	; it as a factor for bits that are set.
148	;
149	;
150	.integer_exp:
151	; Load the integer value into edx:exx / rdx and ditch the floating point exponent.
152	mov xDX, [xBP - 8]
153	%ifdef RT_ARCH_X86
154	mov eax, [xBP - 8 + 4]
155	%endif
156	ffreep st0 ; -> st0=base;
157
158	; Load a 1 onto the stack, we'll need it below as well as for converting
159	; a negative exponent to a positive one.
160	fld1 ; -> st0=1.0; st1=base;
161
162	; If the exponent is negative, negate it and change base to 1/base.
163	or xDX, xDX
164	jns .integer_exp_positive
165	neg xDX
166	%ifdef RT_ARCH_X86
167	neg eax
168	sbb edx, 0
169	%endif
170	fdivr st1, st0 ; -> st0=1.0; st1=1/base
171	.integer_exp_positive:
172
173	;
174	; We'll process edx:eax / rdx bit by bit till it's zero, using st0 for
175	; the multiplication factor corresponding to the current exponent bit
176	; and st1 as the result.
177	;
178	fxch ; -> st0=base; st1=1.0;
179	.integer_exp_loop:
180	%ifdef RT_ARCH_X86
181	shrd eax, edx, 1
182	%else
183	shr rdx, 1
184	%endif
185	jnc .integer_exp_loop_advance
186	fmul st1, st0
187
188	.integer_exp_loop_advance:
189	; Check if we're done.
190	%ifdef RT_ARCH_AMD64
191	jz .integer_exp_return ; (we will have the flags for the shr rdx above)
192	%else
193	shr edx, 1 ; complete the above shift operation
194
195	mov ecx, edx ; check if edx:eax is zero.
196	or ecx, eax
197	jz .integer_exp_return
198	%endif
199	; Calculate the factor for the next bit.
200	fmul st0, st0
201	jmp .integer_exp_loop
202
203	.integer_exp_return:
204	ffreep st0 ; drop the factor -> st0=result; no st1.
205	jmp .return_val
206
207
208	;
209	;
210	; Non-integer or value was out of range for an int64_t.
211	;
212	; The approach here is the same as in exp.asm, only we have to do the
213	; log2(base) calculation first as it's a parameter and not a constant.
214	;
215	;
216	.not_integer_exp:
217
218	; First reject negative numbers. We still have the fxam(base) status in dx.
219	test dx, X86_FSW_C1
220	jnz .base_negative_non_integer_exp
221
222	; Swap the items on the stack, so we can process the base first.
223	fxch st0, st1 ; -> st0=base; st1=exponent;
224
225	;
226	; From log2.asm:
227	;
228	; The fyl2xp1 instruction (ST1=ST1*log2(ST0+1.0), popping ST0) has a
229	; valid ST0 range of 1(1-sqrt(0.5)) (approx 0.29289321881) on both
230	; sides of zero. We try use it if we can.
231	;
232	.above_one:
233	; For both fyl2xp1 and fyl2xp1 we need st1=1.0.
234	fld1
235	fxch st0, st1 ; -> st0=base; st1=1.0; st2=exponent
236
237	; Check if the input is within the fyl2xp1 range.
238	fld qword [.s_r64AbsFyL2xP1InputMax xWrtRIP]
239	fcomip st0, st1
240	jbe .cannot_use_fyl2xp1
241
242	fld qword [.s_r64AbsFyL2xP1InputMin xWrtRIP]
243	fcomip st0, st1
244	jae .cannot_use_fyl2xp1
245
246	; Do the calculation.
247	.use_fyl2xp1:
248	fsub st0, st1 ; -> st0=base-1; st1=1.0; st2=exponent
249	fyl2xp1 ; -> st0=1.0*log2(base-1.0+1.0); st1=exponent
250	jmp .done_log2
251
252	.cannot_use_fyl2xp1:
253	fyl2x ; -> st0=1.0*log2(base); st1=exponent
254	.done_log2:
255
256	;
257	; From exp.asm:
258	;
259	; Convert to power of 2 and it'll be the same as exp2.
260	;
261	fmulp ; st0=log2(base); st1=exponent -> st0=pow2exp
262
263	;
264	; Split the job in two on the fraction and integer l2base parts.
265	;
266	fld st0 ; Push a copy of the pow2exp on the stack.
267	frndint ; st0 = (int)pow2exp
268	fsub st1, st0 ; st1 = pow2exp - (int)pow2exp; i.e. st1 = fraction, st0 = integer.
269	fxch ; st0 = fraction, st1 = integer.
270
271	; 1. Calculate on the fraction.
272	f2xm1 ; st0 = 2**fraction - 1.0
273	fld1
274	faddp ; st0 = 2**fraction
275
276	; 2. Apply the integer power of two.
277	fscale ; st0 = result; st1 = integer part of pow2exp.
278	fstp st1 ; st0 = result; no st1.
279
280	;
281	; Return st0.
282	;
283	.return_val:
284	xor eax, eax
285	.return:
286	leave
287	ret
288
289
290	;
291	;
292	; pow() has a lot of defined behavior for special values, which is why
293	; this is the largest and most difficult part of the code. :-)
294	;
295	; On https://pubs.opengroup.org/onlinepubs/9699919799/functions/pow.html
296	; there are 21 error conditions listed in the return value section.
297	; The code below refers to this by number.
298	;
299	; When we get here:
300	; dx=fxam(base)
301	; cx=fxam(exponent)
302	; st1=base
303	; st0=exponent
304	;
305
306	;
307	; 1. Finit base < 0 and finit non-interger exponent: -> domain error (#IE) + NaN.
308	;
309	; The non-integer exponent claim might be wrong, as we only check if it
310	; fits into a int64_t register. But, I don't see how we can calculate
311	; it right now.
312	;
313	.base_negative_non_integer_exp:
314	CALL_feraiseexcept_WITH X86_FSW_IE
315	jmp .return_nan
316
317	;
318	; 7. Exponent = +/-0.0, any base value including NaN: return +1.0
319	; Note! According to https://en.cppreference.com/w/c/numeric/math/pow a
320	; domain error (#IE) occur if base=+/-0. Not implemented.
321	.exp_zero:
322	.return_plus_one:
323	fld1
324	jmp .return_pop_pop_val
325
326	;
327	; 6. Exponent = whatever and base = 1: Return 1.0
328	; 10. Exponent = +/-Inf and base = -1: Return 1.0
329	;6+10 => Exponent = +/-Inf and \|base\| = 1: Return 1.0
330	; 11. Exponent = -Inf and \|base\| < 1: Return +Inf
331	; 12. Exponent = -Inf and \|base\| > 1: Return +0
332	; 13. Exponent = +Inf and \|base\| < 1: Return +0
333	; 14. Exponent = +Inf and \|base\| > 1: Return +Inf
334	;
335	; Note! Rule 4 would trigger for the same conditions as 11 when base == 0,
336	; but it's optional to raise div/0 and it's apparently marked as
337	; obsolete in C23, so not implemented.
338	;
339	.exp_inf:
340	; Check if base is NaN or unsupported.
341	and dx, X86_FSW_C3 \| X86_FSW_C2 \| X86_FSW_C0 ; fxam(base)
342	cmp dx, X86_FSW_C0
343	jbe .return_base_nan
344
345	; Calc fabs(base) and replace the exponent with 1.0 as we're very likely to need this here.
346	ffreep st0
347	fabs
348	fld1 ; st0=1.0; st1=\|rdBase\|
349	fcomi st0, st1
350	je .return_plus_one ; Matches rule 6 + 10 (base is +/-1).
351	ja .exp_inf_base_smaller_than_one
352	.exp_inf_base_larger_than_one:
353	test cx, X86_FSW_C1 ; cx=faxm(exponent); C1=sign
354	jz .return_plus_inf ; Matches rule 14 (exponent is +Inf).
355	jmp .return_plus_zero ; Matches rule 12 (exponent is -Inf).
356
357	.exp_inf_base_smaller_than_one:
358	test cx, X86_FSW_C1 ; cx=faxm(exponent); C1=sign
359	jnz .return_plus_inf ; Matches rule 11 (exponent is -Inf).
360	jmp .return_plus_zero ; Matches rule 13 (exponent is +Inf).
361
362	;
363	; 6. Exponent = whatever and base = 1: Return 1.0
364	; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN.
365	;
366	.exp_nan:
367	; Check if base is a number and possible 1.
368	test dx, X86_FSW_C2 ; dx=fxam(base); C2 is set for finite number, infinity and denormals.
369	jz .return_exp_nan
370	fld1
371	fcomip st0, st2
372	jne .return_exp_nan
373	jmp .return_plus_one
374
375	;
376	; 4a. base == +/-0.0 and exp < 0 and exp is odd integer: Return +/-Inf, raise div/0.
377	; 4b. base == +/-0.0 and exp < 0 and exp is not odd int: Return +Inf, raise div/0.
378	; 8. base == +/-0.0 and exp > 0 and exp is odd integer: Return +/-0.0
379	; 9. base == +/-0.0 and exp > 0 and exp is not odd int: Return +0
380	;
381	; Note! Exponent must be finite and non-zero if we get here.
382	;
383	.base_zero:
384	fldz
385	fcomip st0, st1
386	jbe .base_zero_plus_exp
387	.base_zero_minus_exp:
388	mov cx, dx ; stashing fxam(base) in CX because EDX is trashed by .is_exp_odd_integer
389	call .is_exp_odd_integer ; trashes EDX but no ECX.
390	or eax, eax
391	jz .base_zero_minus_exp_not_odd_int
392
393	; Matching 4a.
394	.base_zero_minus_exp_odd_int:
395	test cx, X86_FSW_C1 ; base sign
396	jz .raise_de_and_return_plus_inf
397	.raise_de_and_return_minus_inf:
398	CALL_feraiseexcept_WITH X86_FSW_DE
399	jmp .return_minus_inf
400	.raise_de_and_return_plus_inf:
401	CALL_feraiseexcept_WITH X86_FSW_DE
402	jmp .return_plus_inf
403
404	; Matching 4b.
405	.base_zero_minus_exp_not_odd_int:
406	CALL_feraiseexcept_WITH X86_FSW_DE
407	jmp .return_plus_inf
408
409	.base_zero_plus_exp:
410	call .is_exp_odd_integer
411	or eax, eax
412	jnz .return_base_value ; Matching 8
413	.return_plus_zero: ; Matching 9
414	fldz
415	jmp .return_pop_pop_val
416
417	;
418	; 15. base == -Inf and exp < 0 and exp is odd integer: Return -0
419	; 16. base == -Inf and exp < 0 and exp is not odd int: Return +0
420	; 17. base == -Inf and exp > 0 and exp is odd integer: Return -Inf
421	; 18. base == -Inf and exp > 0 and exp is not odd int: Return +Inf
422	; 19. base == +Inf and exp < 0: Return +0
423	; 20. base == +Inf and exp > 0: Return +Inf
424	;
425	; Note! Exponent must be finite and non-zero if we get here.
426	;
427	.base_inf:
428	fldz
429	fcomip st0, st1
430	jbe .base_inf_plus_exp
431	.base_inf_minus_exp:
432	test dx, X86_FSW_C1
433	jz .return_plus_zero ; Matches 19 (base == +Inf).
434	.base_minus_inf_minus_exp:
435	call .is_exp_odd_integer
436	or eax, eax
437	jz .return_plus_zero ; Matches 16 (exp not odd and < 0, base == -Inf)
438	.return_minus_zero: ; Matches 15 (exp is odd and < 0, base == -Inf)
439	fldz
440	fchs
441	jmp .return_pop_pop_val
442
443	.base_inf_plus_exp:
444	test dx, X86_FSW_C1
445	jz .return_plus_inf ; Matches 20 (base == +Inf).
446	.base_minus_inf_plus_exp:
447	call .is_exp_odd_integer
448	or eax, eax
449	jnz .return_minus_inf ; Matches 17 (exp is odd and > 0, base == +Inf)
450	jmp .return_plus_inf ; Matches 18 (exp not odd and > 0, base == +Inf)
451
452	;
453	; Return the exponent NaN (or whatever) value.
454	;
455	.return_exp_nan:
456	fld st0
457	mov eax, 2 ; return param 2
458	jmp .return_pop_pop_val_with_eax
459
460	;
461	; Return the base NaN (or whatever) value.
462	;
463	.return_base_nan:
464	.return_base_value:
465	.base_nan: ; 5. Unless specified elsewhere, return NaN if any of the parameters are NaN.
466	fld st1
467	mov eax, 1 ; return param 1
468	jmp .return_pop_pop_val_with_eax
469
470	;
471	; Pops the two values off the FPU stack and returns NaN.
472	;
473	.return_nan:
474	fld qword [.s_r64QNan xWrtRIP]
475	jmp .return_pop_pop_val
476
477	;
478	; Pops the two values off the FPU stack and returns +Inf.
479	;
480	.return_plus_inf:
481	fld qword [.s_r64PlusInf xWrtRIP]
482	jmp .return_pop_pop_val
483
484	;
485	; Pops the two values off the FPU stack and returns -Inf.
486	;
487	.return_minus_inf:
488	fld qword [.s_r64MinusInf xWrtRIP]
489	jmp .return_pop_pop_val
490
491	;
492	; Return st0, remove st1 and st2.
493	;
494	.return_pop_pop_val:
495	xor eax, eax
496	.return_pop_pop_val_with_eax:
497	fstp st2
498	ffreep st0
499	jmp .return
500
501
502	ALIGNCODE(8)
503	.s_r80MaxInt:
504	dt +9223372036854775807.0
505
506	ALIGNCODE(8)
507	.s_r80MinInt:
508	dt -9223372036854775807.0
509
510	ALIGNCODE(8)
511	;; The fyl2xp1 instruction only works between +/-1(1-sqrt(0.5)).
512	; These two variables is that range + 1.0, so we can compare directly
513	; with the input w/o any extra fsub and fabs work.
514	.s_r64AbsFyL2xP1InputMin:
515	dq 0.708 ; -0.292 + 1.0
516	.s_r64AbsFyL2xP1InputMax:
517	dq 1.292
518
519	.s_r64QNan:
520	dq RTFLOAT64U_QNAN_MINUS
521	.s_r64PlusInf:
522	dq RTFLOAT64U_INF_PLUS
523	.s_r64MinusInf:
524	dq RTFLOAT64U_INF_MINUS
525
526	;;
527	; Sub-function that checks if the exponent (st0) is an odd integer or not.
528	;
529	; @returns eax = 1 if odd, 0 if even or not integer.
530	; @uses eax, edx, eflags.
531	;
532	.is_exp_odd_integer:
533	;
534	; Save the FPU enviornment and mask all exceptions.
535	;
536	fnstenv [xBP - 30h]
537	mov ax, [xBP - 30h + X86FSTENV32P.FCW]
538	or word [xBP - 30h + X86FSTENV32P.FCW], X86_FCW_MASK_ALL
539	fldcw [xBP - 30h + X86FSTENV32P.FCW]
540	mov [xBP - 30h + X86FSTENV32P.FCW], ax
541
542	;
543	; Convert to 64-bit integer (probably not 100% correct).
544	;
545	fld st0 ; -> st0=exponent st1=exponent; st2=base;
546	fistp qword [xBP - 10h]
547	fild qword [xBP - 10h] ; -> st0=int(exponent) st1=exponent; st2=base;
548	fcomip st0, st1 ; -> st0=exponent; st1=base;
549	jne .is_exp_odd_integer__return_false ; jump if not integer.
550	mov xAX, [xBP - 10h]
551	%ifdef
552	mov edx, [xBP - 10h + 4]
553	%endif
554
555	;
556	; Check the lowest bit if it might be odd.
557	; This works both for positive and negative numbers.
558	;
559	test al, 1
560	jz .is_exp_odd_integer__return_false ; jump if even.
561
562	;
563	; If the result is negative, convert to positive.
564	;
565	%ifdef RT_ARCH_AMD64
566	bt rax, 63
567	%else
568	bt edx, 31
569	%endif
570	jnc .is_exp_odd_integer__positive
571	%ifdef RT_ARCH_AMD64
572	neg xAX
573	%else
574	neg edx
575	neg eax
576	sbb edx, 0
577	%endif
578	.is_exp_odd_integer__positive:
579
580	;
581	; Now find the most significant bit in the value so we can verify that
582	; the odd bit was part of the mantissa/fraction of the input.
583	;
584	cmp bl, 3 ; Skip if 80-bit input, as it has a 64-bit mantissa which
585	je .is_exp_odd_integer__return_true ; makes it a 1 bit more precision than out integer reg(s).
586
587	%ifdef RT_ARCH_AMD64
588	bsr rax, rax
589	%else
590	bsr edx, edx
591	jnz .is_exp_odd_integer__high_dword_is_zero
592	lea eax, [edx + 20h]
593	jmp .is_exp_odd_integer__first_bit_in_eax
594	.is_exp_odd_integer__high_dword_is_zero:
595	bsr eax, eax
596	.is_exp_odd_integer__first_bit_in_eax:
597	%endif
598	;
599	; The limit is 53 for double precision (one implicit bit + 52 bits fraction),
600	; and 24 for single precision types.
601	;
602	mov ah, 53 ; RTFLOAT64U_FRACTION_BITS + 1
603	cmp bl, 0
604	jne .is_exp_odd_integer__is_double_limit
605	mov ah, 24 ; RTFLOAT32U_FRACTION_BITS + 1
606	.is_exp_odd_integer__is_double_limit:
607
608	cmp al, ah
609	jae .is_exp_odd_integer__return_false
610	mov eax, 1
611
612	; Return.
613	.is_exp_odd_integer__return_true:
614	jmp .is_exp_odd_integer__return
615	.is_exp_odd_integer__return_false:
616	xor eax, eax
617	.is_exp_odd_integer__return:
618	ffreep st0
619	fldenv [xBP - 30h]
620	ret
621
622	ENDPROC rtNoCrtMathPowCore
623

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Runtime/common/math/powcore.asm@ 96337

Download in other formats: