VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/math/bignum-amd64-x86.asm@ 96206

Last change on this file since 96206 was 95981, checked in by vboxsync, 2 years ago

IPRT/bignum-amd64-x86.asm: warning

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 26.7 KB
Line 
1; $Id: bignum-amd64-x86.asm 95981 2022-08-02 07:28:29Z vboxsync $
2;; @file
3; IPRT - Big Integer Numbers, AMD64 and X86 Assembly Workers
4;
5
6;
7; Copyright (C) 2006-2022 Oracle Corporation
8;
9; This file is part of VirtualBox Open Source Edition (OSE), as
10; available from http://www.virtualbox.org. This file is free software;
11; you can redistribute it and/or modify it under the terms of the GNU
12; General Public License (GPL) as published by the Free Software
13; Foundation, in version 2 as it comes in the "COPYING" file of the
14; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16;
17; The contents of this file may alternatively be used under the terms
18; of the Common Development and Distribution License Version 1.0
19; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20; VirtualBox OSE distribution, in which case the provisions of the
21; CDDL are applicable instead of those of the GPL.
22;
23; You may elect to license modified versions of this file under the
24; terms and conditions of either the GPL or the CDDL or both.
25;
26
27
28;*********************************************************************************************************************************
29;* Header Files *
30;*********************************************************************************************************************************
31%define RT_ASM_WITH_SEH64
32%include "iprt/asmdefs.mac"
33%include "internal/bignum.mac"
34
35
36;*********************************************************************************************************************************
37;* Defined Constants And Macros *
38;*********************************************************************************************************************************
39%ifdef RT_ARCH_AMD64
40 %macro sahf 0
41 %error "SAHF not supported on ancient AMD64"
42 %endmacro
43 %macro lahf 0
44 %error "LAHF not supported on ancient AMD64"
45 %endmacro
46%endif
47
48
49BEGINCODE
50
51;;
52; Subtracts a number (pauSubtrahend) from a larger number (pauMinuend) and
53; stores the result in pauResult.
54;
55; All three numbers are zero padded such that a borrow can be carried one (or
56; two for 64-bit) elements beyond the end of the largest number.
57;
58; @returns nothing.
59; @param pauResult x86:[ebp + 8] gcc:rdi msc:rcx
60; @param pauMinuend x86:[ebp + 12] gcc:rsi msc:rdx
61; @param pauSubtrahend x86:[ebp + 16] gcc:rdx msc:r8
62; @param cUsed x86:[ebp + 20] gcc:rcx msc:r9
63;
64BEGINPROC rtBigNumMagnitudeSubAssemblyWorker
65 push xBP
66 SEH64_PUSH_xBP
67 mov xBP, xSP
68 SEH64_SET_FRAME_xBP 0
69SEH64_END_PROLOGUE
70
71%ifdef RT_ARCH_AMD64
72 %ifdef ASM_CALL64_GCC
73 %define pauResult rdi
74 %define pauMinuend rsi
75 %define pauSubtrahend rdx
76 %define cUsed ecx
77 %else
78 %define pauResult rcx
79 %define pauMinuend rdx
80 %define pauSubtrahend r8
81 %define cUsed r9d
82 %endif
83 xor r11d, r11d ; index register.
84
85 %if RTBIGNUM_ELEMENT_SIZE == 4
86 add cUsed, 1 ; cUsed = RT_ALIGN(cUsed, 2) / 2
87 shr cUsed, 1
88 %endif
89 cmp cUsed, 8 ; Skip the big loop if small number.
90 jb .small_job
91
92 mov r10d, cUsed
93 shr r10d, 3
94 clc
95.big_loop:
96 mov rax, [pauMinuend + r11]
97 sbb rax, [pauSubtrahend + r11]
98 mov [pauResult + r11], rax
99 mov rax, [pauMinuend + r11 + 8]
100 sbb rax, [pauSubtrahend + r11 + 8]
101 mov [pauResult + r11 + 8], rax
102 mov rax, [pauMinuend + r11 + 16]
103 sbb rax, [pauSubtrahend + r11 + 16]
104 mov [pauResult + r11 + 16], rax
105 mov rax, [pauMinuend + r11 + 24]
106 sbb rax, [pauSubtrahend + r11 + 24]
107 mov [pauResult + r11 + 24], rax
108 mov rax, [pauMinuend + r11 + 32]
109 sbb rax, [pauSubtrahend + r11 + 32]
110 mov [pauResult + r11 + 32], rax
111 mov rax, [pauMinuend + r11 + 40]
112 sbb rax, [pauSubtrahend + r11 + 40]
113 mov [pauResult + r11 + 40], rax
114 mov rax, [pauMinuend + r11 + 48]
115 sbb rax, [pauSubtrahend + r11 + 48]
116 mov [pauResult + r11 + 48], rax
117 mov rax, [pauMinuend + r11 + 56]
118 sbb rax, [pauSubtrahend + r11 + 56]
119 mov [pauResult + r11 + 56], rax
120 lea r11, [r11 + 64]
121 dec r10d ; Does not change CF.
122 jnz .big_loop
123
124 %if 0 ; Ancient AMD CPUs does have lahf/sahf, thus the mess in the %else.
125 lahf ; Save CF
126 and cUsed, 7 ; Up to seven odd rounds.
127 jz .done
128 sahf ; Restore CF.
129 jmp .small_loop ; Skip CF=1 (clc).
130 %else
131 jnc .no_carry
132 and cUsed, 7 ; Up to seven odd rounds.
133 jz .done
134 stc
135 jmp .small_loop ; Skip CF=1 (clc).
136.no_carry:
137 and cUsed, 7 ; Up to seven odd rounds.
138 jz .done
139 %endif
140.small_job:
141 clc
142.small_loop:
143 mov rax, [pauMinuend + r11]
144 sbb rax, [pauSubtrahend + r11]
145 mov [pauResult + r11], rax
146 lea r11, [r11 + 8]
147 dec cUsed ; does not change CF.
148 jnz .small_loop
149 %ifdef RT_STRICT
150 jnc .done
151 int3
152 %endif
153.done:
154
155%elifdef RT_ARCH_X86
156 push edi
157 push esi
158 push ebx
159
160 mov edi, [ebp + 08h] ; pauResult
161 %define pauResult edi
162 mov ecx, [ebp + 0ch] ; pauMinuend
163 %define pauMinuend ecx
164 mov edx, [ebp + 10h] ; pauSubtrahend
165 %define pauSubtrahend edx
166 mov esi, [ebp + 14h] ; cUsed
167 %define cUsed esi
168
169 xor ebx, ebx ; index register.
170
171 cmp cUsed, 8 ; Skip the big loop if small number.
172 jb .small_job
173
174 shr cUsed, 3
175 clc
176.big_loop:
177 mov eax, [pauMinuend + ebx]
178 sbb eax, [pauSubtrahend + ebx]
179 mov [pauResult + ebx], eax
180 mov eax, [pauMinuend + ebx + 4]
181 sbb eax, [pauSubtrahend + ebx + 4]
182 mov [pauResult + ebx + 4], eax
183 mov eax, [pauMinuend + ebx + 8]
184 sbb eax, [pauSubtrahend + ebx + 8]
185 mov [pauResult + ebx + 8], eax
186 mov eax, [pauMinuend + ebx + 12]
187 sbb eax, [pauSubtrahend + ebx + 12]
188 mov [pauResult + ebx + 12], eax
189 mov eax, [pauMinuend + ebx + 16]
190 sbb eax, [pauSubtrahend + ebx + 16]
191 mov [pauResult + ebx + 16], eax
192 mov eax, [pauMinuend + ebx + 20]
193 sbb eax, [pauSubtrahend + ebx + 20]
194 mov [pauResult + ebx + 20], eax
195 mov eax, [pauMinuend + ebx + 24]
196 sbb eax, [pauSubtrahend + ebx + 24]
197 mov [pauResult + ebx + 24], eax
198 mov eax, [pauMinuend + ebx + 28]
199 sbb eax, [pauSubtrahend + ebx + 28]
200 mov [pauResult + ebx + 28], eax
201 lea ebx, [ebx + 32]
202 dec cUsed ; Does not change CF.
203 jnz .big_loop
204
205 lahf ; Save CF
206 mov cUsed, [ebp + 14h] ; Up to three final rounds.
207 and cUsed, 7
208 jz .done
209 sahf ; Restore CF.
210 jmp .small_loop ; Skip CF=1 (clc).
211
212.small_job:
213 clc
214.small_loop:
215 mov eax, [pauMinuend + ebx]
216 sbb eax, [pauSubtrahend + ebx]
217 mov [pauResult + ebx], eax
218 lea ebx, [ebx + 4]
219 dec cUsed ; Does not change CF
220 jnz .small_loop
221 %ifdef RT_STRICT
222 jnc .done
223 int3
224 %endif
225.done:
226
227 pop ebx
228 pop esi
229 pop edi
230%else
231 %error "Unsupported arch"
232%endif
233
234 leave
235 ret
236%undef pauResult
237%undef pauMinuend
238%undef pauSubtrahend
239%undef cUsed
240ENDPROC rtBigNumMagnitudeSubAssemblyWorker
241
242
243
244;;
245; Subtracts a number (pauSubtrahend) from a larger number (pauMinuend) and
246; stores the result in pauResult.
247;
248; All three numbers are zero padded such that a borrow can be carried one (or
249; two for 64-bit) elements beyond the end of the largest number.
250;
251; @returns nothing.
252; @param pauResultMinuend x86:[ebp + 8] gcc:rdi msc:rcx
253; @param pauSubtrahend x86:[ebp + 12] gcc:rsi msc:rdx
254; @param cUsed x86:[ebp + 16] gcc:rdx msc:r8
255;
256BEGINPROC rtBigNumMagnitudeSubThisAssemblyWorker
257 push xBP
258 SEH64_PUSH_xBP
259 mov xBP, xSP
260 SEH64_SET_FRAME_xBP 0
261SEH64_END_PROLOGUE
262
263%ifdef RT_ARCH_AMD64
264 %ifdef ASM_CALL64_GCC
265 %define pauResultMinuend rdi
266 %define pauSubtrahend rsi
267 %define cUsed edx
268 %else
269 %define pauResultMinuend rcx
270 %define pauSubtrahend rdx
271 %define cUsed r8d
272 %endif
273 xor r11d, r11d ; index register.
274
275 %if RTBIGNUM_ELEMENT_SIZE == 4
276 add cUsed, 1 ; cUsed = RT_ALIGN(cUsed, 2) / 2
277 shr cUsed, 1
278 %endif
279 cmp cUsed, 8 ; Skip the big loop if small number.
280 jb .small_job
281
282 mov r10d, cUsed
283 shr r10d, 3
284 clc
285.big_loop:
286 mov rax, [pauSubtrahend + r11]
287 sbb [pauResultMinuend + r11], rax
288 mov rax, [pauSubtrahend + r11 + 8]
289 sbb [pauResultMinuend + r11 + 8], rax
290 mov rax, [pauSubtrahend + r11 + 16]
291 sbb [pauResultMinuend + r11 + 16], rax
292 mov rax, [pauSubtrahend + r11 + 24]
293 sbb [pauResultMinuend + r11 + 24], rax
294 mov rax, [pauSubtrahend + r11 + 32]
295 sbb [pauResultMinuend + r11 + 32], rax
296 mov rax, [pauSubtrahend + r11 + 40]
297 sbb [pauResultMinuend + r11 + 40], rax
298 mov rax, [pauSubtrahend + r11 + 48]
299 sbb [pauResultMinuend + r11 + 48], rax
300 mov rax, [pauSubtrahend + r11 + 56]
301 sbb [pauResultMinuend + r11 + 56], rax
302 lea r11, [r11 + 64]
303 dec r10d ; Does not change CF.
304 jnz .big_loop
305
306 %if 0 ; Ancient AMD CPUs does have lahf/sahf, thus the mess in the %else.
307 lahf ; Save CF
308 and cUsed, 7 ; Up to seven odd rounds.
309 jz .done
310 sahf ; Restore CF.
311 jmp .small_loop ; Skip CF=1 (clc).
312 %else
313 jnc .no_carry
314 and cUsed, 7 ; Up to seven odd rounds.
315 jz .done
316 stc
317 jmp .small_loop ; Skip CF=1 (clc).
318.no_carry:
319 and cUsed, 7 ; Up to seven odd rounds.
320 jz .done
321 %endif
322.small_job:
323 clc
324.small_loop:
325 mov rax, [pauSubtrahend + r11]
326 sbb [pauResultMinuend + r11], rax
327 lea r11, [r11 + 8]
328 dec cUsed ; does not change CF.
329 jnz .small_loop
330 %ifdef RT_STRICT
331 jnc .done
332 int3
333 %endif
334.done:
335
336%elifdef RT_ARCH_X86
337 push edi
338 push ebx
339
340 mov edi, [ebp + 08h] ; pauResultMinuend
341 %define pauResultMinuend edi
342 mov edx, [ebp + 0ch] ; pauSubtrahend
343 %define pauSubtrahend edx
344 mov ecx, [ebp + 10h] ; cUsed
345 %define cUsed ecx
346
347 xor ebx, ebx ; index register.
348
349 cmp cUsed, 8 ; Skip the big loop if small number.
350 jb .small_job
351
352 shr cUsed, 3
353 clc
354.big_loop:
355 mov eax, [pauSubtrahend + ebx]
356 sbb [pauResultMinuend + ebx], eax
357 mov eax, [pauSubtrahend + ebx + 4]
358 sbb [pauResultMinuend + ebx + 4], eax
359 mov eax, [pauSubtrahend + ebx + 8]
360 sbb [pauResultMinuend + ebx + 8], eax
361 mov eax, [pauSubtrahend + ebx + 12]
362 sbb [pauResultMinuend + ebx + 12], eax
363 mov eax, [pauSubtrahend + ebx + 16]
364 sbb [pauResultMinuend + ebx + 16], eax
365 mov eax, [pauSubtrahend + ebx + 20]
366 sbb [pauResultMinuend + ebx + 20], eax
367 mov eax, [pauSubtrahend + ebx + 24]
368 sbb [pauResultMinuend + ebx + 24], eax
369 mov eax, [pauSubtrahend + ebx + 28]
370 sbb [pauResultMinuend + ebx + 28], eax
371 lea ebx, [ebx + 32]
372 dec cUsed ; Does not change CF.
373 jnz .big_loop
374
375 lahf ; Save CF
376 mov cUsed, [ebp + 10h] ; Up to seven odd rounds.
377 and cUsed, 7
378 jz .done
379 sahf ; Restore CF.
380 jmp .small_loop ; Skip CF=1 (clc).
381
382.small_job:
383 clc
384.small_loop:
385 mov eax, [pauSubtrahend + ebx]
386 sbb [pauResultMinuend + ebx], eax
387 lea ebx, [ebx + 4]
388 dec cUsed ; Does not change CF
389 jnz .small_loop
390 %ifdef RT_STRICT
391 jnc .done
392 int3
393 %endif
394.done:
395
396 pop ebx
397 pop edi
398%else
399 %error "Unsupported arch"
400%endif
401
402 leave
403 ret
404ENDPROC rtBigNumMagnitudeSubThisAssemblyWorker
405
406
407;;
408; Shifts an element array one bit to the left, returning the final carry value.
409;
410; On 64-bit hosts the array is always zero padded to a multiple of 8 bytes, so
411; we can use 64-bit operand sizes even if the element type is 32-bit.
412;
413; @returns The final carry value.
414; @param pauElements x86:[ebp + 8] gcc:rdi msc:rcx
415; @param cUsed x86:[ebp + 12] gcc:rsi msc:rdx
416; @param uCarry x86:[ebp + 16] gcc:rdx msc:r8
417;
418BEGINPROC rtBigNumMagnitudeShiftLeftOneAssemblyWorker
419 push xBP
420 SEH64_PUSH_xBP
421 mov xBP, xSP
422 SEH64_SET_FRAME_xBP 0
423SEH64_END_PROLOGUE
424
425%ifdef RT_ARCH_AMD64
426 %ifdef ASM_CALL64_GCC
427 %define pauElements rdi
428 %define cUsed esi
429 %define uCarry edx
430 %else
431 %define pauElements rcx
432 %define cUsed edx
433 %define uCarry r8d
434 %endif
435%elifdef RT_ARCH_X86
436 %define pauElements ecx
437 mov pauElements, [ebp + 08h]
438 %define cUsed edx
439 mov cUsed, [ebp + 0ch]
440 %define uCarry eax
441 mov uCarry, [ebp + 10h]
442%else
443 %error "Unsupported arch."
444%endif
445 ; Lots to do?
446 cmp cUsed, 8
447 jae .big_loop_init
448
449 ; Check for empty array.
450 test cUsed, cUsed
451 jz .no_elements
452 jmp .small_loop_init
453
454 ; Big loop - 8 unrolled loop iterations.
455.big_loop_init:
456%ifdef RT_ARCH_AMD64
457 mov r11d, cUsed
458%endif
459 shr cUsed, 3
460 test uCarry, uCarry ; clear the carry flag
461 jz .big_loop
462 stc
463.big_loop:
464%if RTBIGNUM_ELEMENT_SIZE == 8
465 rcl qword [pauElements], 1
466 rcl qword [pauElements + 8], 1
467 rcl qword [pauElements + 16], 1
468 rcl qword [pauElements + 24], 1
469 rcl qword [pauElements + 32], 1
470 rcl qword [pauElements + 40], 1
471 rcl qword [pauElements + 48], 1
472 rcl qword [pauElements + 56], 1
473 lea pauElements, [pauElements + 64]
474%else
475 rcl dword [pauElements], 1
476 rcl dword [pauElements + 4], 1
477 rcl dword [pauElements + 8], 1
478 rcl dword [pauElements + 12], 1
479 rcl dword [pauElements + 16], 1
480 rcl dword [pauElements + 20], 1
481 rcl dword [pauElements + 24], 1
482 rcl dword [pauElements + 28], 1
483 lea pauElements, [pauElements + 32]
484%endif
485 dec cUsed
486 jnz .big_loop
487
488 ; More to do?
489 pushf ; save carry flag (uCarry no longer used on x86).
490%ifdef RT_ARCH_AMD64
491 mov cUsed, r11d
492%else
493 mov cUsed, [ebp + 0ch]
494%endif
495 and cUsed, 7
496 jz .restore_cf_and_return ; Jump if we're good and done.
497 popf ; Restore CF.
498 jmp .small_loop ; Deal with the odd rounds.
499.restore_cf_and_return:
500 popf
501 jmp .carry_to_eax
502
503 ; Small loop - One round at the time.
504.small_loop_init:
505 test uCarry, uCarry ; clear the carry flag
506 jz .small_loop
507 stc
508.small_loop:
509%if RTBIGNUM_ELEMENT_SIZE == 8
510 rcl qword [pauElements], 1
511 lea pauElements, [pauElements + 8]
512%else
513 rcl dword [pauElements], 1
514 lea pauElements, [pauElements + 4]
515%endif
516 dec cUsed
517 jnz .small_loop
518
519 ; Calculate return value.
520.carry_to_eax:
521 mov eax, 0
522 jnc .return
523 inc eax
524.return:
525 leave
526 ret
527
528.no_elements:
529 mov eax, uCarry
530 jmp .return
531ENDPROC rtBigNumMagnitudeShiftLeftOneAssemblyWorker
532
533
534;;
535; Performs a 128-bit by 64-bit division on 64-bit and
536; a 64-bit by 32-bit divison on 32-bit.
537;
538; @returns nothing.
539; @param puQuotient x86:[ebp + 8] gcc:rdi msc:rcx Double element.
540; @param puRemainder x86:[ebp + 12] gcc:rsi msc:rdx Normal element.
541; @param uDividendHi x86:[ebp + 16] gcc:rdx msc:r8
542; @param uDividendLo x86:[ebp + 20] gcc:rcx msc:r9
543; @param uDivisior x86:[ebp + 24] gcc:r8 msc:[rbp + 30h]
544;
545BEGINPROC rtBigNumElement2xDiv2xBy1x
546 push xBP
547 SEH64_PUSH_xBP
548 mov xBP, xSP
549 SEH64_SET_FRAME_xBP 0
550SEH64_END_PROLOGUE
551
552%ifdef RT_ARCH_AMD64
553 %if RTBIGNUM_ELEMENT_SIZE == 4
554 %error "sorry not implemented yet."
555 sorry not implemented yet.
556 %endif
557
558 %define uDividendHi rdx
559 %define uDividendLo rax
560 %ifdef ASM_CALL64_GCC
561 %define uDivisor r8
562 %define puQuotient rdi
563 %define puRemainder rsi
564 mov rax, rcx
565 %else
566 %define puQuotient rcx
567 %define puRemainder r11
568 %define uDivisor r10
569 mov r11, rdx
570 mov r10, [rbp + 30h]
571 mov rdx, r8
572 mov rax, r9
573 %endif
574
575%elifdef RT_ARCH_X86
576 push edi
577 push ebx
578
579 %define uDividendHi edx
580 mov uDividendHi, [ebp + 10h]
581 %define uDividendLo eax
582 mov uDividendLo, [ebp + 14h]
583 %define uDivisor ecx
584 mov uDivisor, [ebp + 18h]
585 %define puQuotient edi
586 mov puQuotient, [ebp + 08h]
587 %define puRemainder ebx
588 mov puRemainder, [ebp + 0ch]
589%else
590 %error "Unsupported arch."
591%endif
592
593%ifdef RT_STRICT
594 ;
595 ; The dividend shall not be zero.
596 ;
597 test uDivisor, uDivisor
598 jnz .divisor_not_zero
599 int3
600.divisor_not_zero:
601%endif
602
603 ;
604 ; Avoid division overflow. This will calculate the high part of the quotient.
605 ;
606 mov RTBIGNUM_ELEMENT_PRE [puQuotient + RTBIGNUM_ELEMENT_SIZE], 0
607 cmp uDividendHi, uDivisor
608 jb .do_divide
609 push xAX
610 mov xAX, xDX
611 xor edx, edx
612 div uDivisor
613 mov RTBIGNUM_ELEMENT_PRE [puQuotient + RTBIGNUM_ELEMENT_SIZE], xAX
614 pop xAX
615
616 ;
617 ; Perform the division and store the result.
618 ;
619.do_divide:
620 div uDivisor
621 mov RTBIGNUM_ELEMENT_PRE [puQuotient], xAX
622 mov RTBIGNUM_ELEMENT_PRE [puRemainder], xDX
623
624
625%ifdef RT_ARCH_X86
626 pop ebx
627 pop edi
628%endif
629 leave
630 ret
631ENDPROC rtBigNumElement2xDiv2xBy1x
632
633
634;;
635; Performs the core of long multiplication.
636;
637; @returns nothing.
638; @param pauResult x86:[ebp + 8] gcc:rdi msc:rcx Initialized to zero.
639; @param pauMultiplier x86:[ebp + 12] gcc:rsi msc:rdx
640; @param cMultiplier x86:[ebp + 16] gcc:rdx msc:r8
641; @param pauMultiplicand x86:[ebp + 20] gcc:rcx msc:r9
642; @param cMultiplicand x86:[ebp + 24] gcc:r8 msc:[rbp + 30h]
643;
644BEGINPROC rtBigNumMagnitudeMultiplyAssemblyWorker
645 push xBP
646 SEH64_PUSH_xBP
647 mov xBP, xSP
648 SEH64_SET_FRAME_xBP 0
649SEH64_END_PROLOGUE
650
651%ifdef RT_ARCH_AMD64
652 %if RTBIGNUM_ELEMENT_SIZE == 4
653 %error "sorry not implemented yet."
654 sorry not implemented yet.
655 %endif
656
657 %ifdef ASM_CALL64_GCC
658 %define pauResult rdi
659 %define pauMultiplier rsi
660 %define cMultiplier r9
661 %define pauMultiplicand rcx
662 %define cMultiplicand r8
663 mov r9d, edx ; cMultiplier
664 mov r8d, r8d ; cMultiplicand - paranoia
665 %define uMultiplier r10
666 %define iMultiplicand r11
667 %else
668 %define pauResult rcx
669 %define pauMultiplier r11
670 %define cMultiplier r8
671 %define pauMultiplicand r9
672 %define cMultiplicand r10
673 mov pauMultiplier, rdx
674 mov r10d, dword [rbp + 30h] ; cMultiplicand
675 mov r8d, r8d ; cMultiplier - paranoia
676 %define uMultiplier r12
677 push r12
678 %define iMultiplicand r13
679 push r13
680 %endif
681
682%elifdef RT_ARCH_X86
683 push edi
684 push esi
685 push ebx
686 sub esp, 10h
687 %define pauResult edi
688 mov pauResult, [ebp + 08h]
689 %define pauMultiplier dword [ebp + 0ch]
690 %define cMultiplier dword [ebp + 10h]
691 %define pauMultiplicand ecx
692 mov pauMultiplicand, [ebp + 14h]
693 %define cMultiplicand dword [ebp + 18h]
694 %define uMultiplier dword [ebp - 10h]
695 %define iMultiplicand ebx
696
697%else
698 %error "Unsupported arch."
699%endif
700
701 ;
702 ; Check that the multiplicand isn't empty (avoids an extra jump in the inner loop).
703 ;
704 cmp cMultiplicand, 0
705 je .done
706
707 ;
708 ; Loop thru each element in the multiplier.
709 ;
710 ; while (cMultiplier-- > 0)
711.multiplier_loop:
712 cmp cMultiplier, 0
713 jz .done
714 dec cMultiplier
715
716 ; uMultiplier = *pauMultiplier
717%ifdef RT_ARCH_X86
718 mov edx, pauMultiplier
719 mov eax, [edx]
720 mov uMultiplier, eax
721%else
722 mov uMultiplier, [pauMultiplier]
723%endif
724 ; for (iMultiplicand = 0; iMultiplicand < cMultiplicand; iMultiplicand++)
725 xor iMultiplicand, iMultiplicand
726.multiplicand_loop:
727 mov xAX, [pauMultiplicand + iMultiplicand * RTBIGNUM_ELEMENT_SIZE]
728 mul uMultiplier
729 add [pauResult + iMultiplicand * RTBIGNUM_ELEMENT_SIZE], xAX
730 adc [pauResult + iMultiplicand * RTBIGNUM_ELEMENT_SIZE + RTBIGNUM_ELEMENT_SIZE], xDX
731 jnc .next_multiplicand
732 lea xDX, [iMultiplicand + 2]
733.next_adc:
734 adc RTBIGNUM_ELEMENT_PRE [pauResult + xDX * RTBIGNUM_ELEMENT_SIZE], 0
735 inc xDX
736 jc .next_adc
737
738.next_multiplicand:
739 inc iMultiplicand ; iMultiplicand++
740 cmp iMultiplicand, cMultiplicand ; iMultiplicand < cMultiplicand
741 jb .multiplicand_loop
742
743 ; Advance and loop on multiplier.
744 add pauMultiplier, RTBIGNUM_ELEMENT_SIZE
745 add pauResult, RTBIGNUM_ELEMENT_SIZE
746 jmp .multiplier_loop
747
748.done:
749
750%ifdef RT_ARCH_AMD64
751 %ifdef ASM_CALL64_GCC
752 %else
753 pop r13
754 pop r12
755 %endif
756%elifdef RT_ARCH_X86
757 add esp, 10h
758 pop ebx
759 pop esi
760 pop edi
761%endif
762 leave
763 ret
764ENDPROC rtBigNumMagnitudeMultiplyAssemblyWorker
765
766;;
767; Assembly implementation of the D4 step of Knuth's division algorithm.
768;
769; This subtracts Divisor * Qhat from the dividend at the current J index.
770;
771; @returns true if negative result (unlikely), false if positive.
772; @param pauDividendJ x86:[ebp + 8] gcc:rdi msc:rcx Initialized to zero.
773; @param pauDivisor x86:[ebp + 12] gcc:rsi msc:rdx
774; @param cDivisor x86:[ebp + 16] gcc:edx msc:r8d
775; @param uQhat x86:[ebp + 16] gcc:rcx msc:r9
776;
777BEGINPROC rtBigNumKnuthD4_MulSub
778 push xBP
779 SEH64_PUSH_xBP
780 mov xBP, xSP
781 SEH64_SET_FRAME_xBP 0
782SEH64_END_PROLOGUE
783
784%ifdef RT_ARCH_AMD64
785 %if RTBIGNUM_ELEMENT_SIZE == 4
786 %error "sorry not implemented yet."
787 sorry not implemented yet.
788 %endif
789
790 %ifdef ASM_CALL64_GCC
791 %define pauDividendJ rdi
792 %define pauDivisor rsi
793 %define cDivisor r8
794 %define uQhat rcx
795 mov r8d, edx ; cDivisor
796 %define uMulCarry r11
797 %else
798 %define pauDividendJ rcx
799 %define pauDivisor r10
800 %define cDivisor r8
801 %define uQhat r9
802 mov r10, rdx ; pauDivisor
803 mov r8d, r8d ; cDivisor - paranoia
804 %define uMulCarry r11
805 %endif
806
807%elifdef RT_ARCH_X86
808 push edi
809 push esi
810 push ebx
811 %define pauDividendJ edi
812 mov pauDividendJ, [ebp + 08h]
813 %define pauDivisor esi
814 mov pauDivisor, [ebp + 0ch]
815 %define cDivisor ecx
816 mov cDivisor, [ebp + 10h]
817 %define uQhat dword [ebp + 14h]
818 %define uMulCarry ebx
819%else
820 %error "Unsupported arch."
821%endif
822
823%ifdef RT_STRICT
824 ;
825 ; Some sanity checks.
826 ;
827 cmp cDivisor, 0
828 jne .cDivisor_not_zero
829 int3
830.cDivisor_not_zero:
831%endif
832
833 ;
834 ; Initialize the loop.
835 ;
836 xor uMulCarry, uMulCarry
837
838 ;
839 ; do ... while (cDivisor-- > 0);
840 ;
841.the_loop:
842 ; RTUInt128MulU64ByU64(&uSub, uQhat, pauDivisor[i]);
843 mov xAX, uQhat
844 mul RTBIGNUM_ELEMENT_PRE [pauDivisor]
845 ; RTUInt128AssignAddU64(&uSub, uMulCarry);
846 add xAX, uMulCarry
847 adc xDX, 0
848 mov uMulCarry, xDX
849 ; Subtract uSub.s.Lo+fCarry from pauDividendJ[i]
850 sub [pauDividendJ], xAX
851 adc uMulCarry, 0
852%ifdef RT_STRICT
853 jnc .uMulCarry_did_not_overflow
854 int3
855.uMulCarry_did_not_overflow:
856%endif
857
858 ; Advance.
859 add pauDividendJ, RTBIGNUM_ELEMENT_SIZE
860 add pauDivisor, RTBIGNUM_ELEMENT_SIZE
861 dec cDivisor
862 jnz .the_loop
863
864 ;
865 ; Final dividend element (no corresponding divisor element).
866 ;
867 sub [pauDividendJ], uMulCarry
868 sbb eax, eax
869 and eax, 1
870
871.done:
872%ifdef RT_ARCH_AMD64
873%elifdef RT_ARCH_X86
874 pop ebx
875 pop esi
876 pop edi
877%endif
878 leave
879 ret
880ENDPROC rtBigNumKnuthD4_MulSub
881
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette