Changeset 52335 in vbox for trunk/src/VBox/Runtime/common/math
- Timestamp:
- Aug 11, 2014 12:30:20 PM (11 years ago)
- svn:sync-xref-src-repo-rev:
- 95460
- Location:
- trunk/src/VBox/Runtime/common/math
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/Runtime/common/math/bignum-amd64-x86.asm
r52330 r52335 495 495 ENDPROC rtBigNumMagnitudeShiftLeftOneAssemblyWorker 496 496 497 498 ;; 499 ; Performs a 128-bit by 64-bit division on 64-bit and 500 ; a 64-bit by 32-bit divison on 32-bit. 501 ; 502 ; @returns nothing. 503 ; @param puQuotient x86:[ebp + 8] gcc:rdi msc:rcx Double element. 504 ; @param puRemainder x86:[ebp + 12] gcc:rsi msc:rdx Normal element. 505 ; @param uDividendHi x86:[ebp + 16] gcc:rdx msc:r8 506 ; @param uDividendLo x86:[ebp + 20] gcc:rcx msc:r9 507 ; @param uDivisior x86:[ebp + 24] gcc:r8 msc:[rbp + 30h] 508 ; 509 BEGINPROC rtBigNumElement2xDiv2xBy1x 510 push xBP 511 SEH64_PUSH_xBP 512 mov xBP, xSP 513 SEH64_SET_FRAME_xBP 0 514 SEH64_END_PROLOGUE 515 516 %ifdef RT_ARCH_AMD64 517 %if RTBIGNUM_ELEMENT_SIZE == 4 518 %error "sorry not implemented yet." 519 sorry not implemented yet. 520 %endif 521 522 %define uDividendHi rdx 523 %define uDividendLo rax 524 %ifdef ASM_CALL64_GCC 525 %define uDivisior r8 526 %define puQuotient rdi 527 %define puRemainder rsi 528 mov rax, rcx 529 %else 530 %define puQuotient rcx 531 %define puRemainder r11 532 %define uDivisor r10 533 mov r11, rdx 534 mov r10, [rbp + 30h] 535 mov rdx, r8 536 mov rax, r9 537 %endif 538 539 %elifdef RT_ARCH_X86 540 push edi 541 push ebx 542 543 %define uDividendHi edx 544 mov uDividendHi, [ebp + 10h] 545 %define uDividendLo eax 546 mov uDividendLo, [ebp + 14h] 547 %define uDivisor ecx 548 mov uDivisor, [ebp + 18h] 549 %define puQuotient edi 550 mov puQuotient, [ebp + 08h] 551 %define puRemainder ebx 552 mov puRemainder, [ebp + 0ch] 553 %else 554 %error "Unsupported arch." 555 %endif 556 557 %ifdef RT_STRICT 558 ; 559 ; The dividend shall not be zero. 560 ; 561 test uDivisor, uDivisor 562 jnz .divisor_not_zero 563 int3 564 .divisor_not_zero: 565 %endif 566 567 ; 568 ; Avoid division overflow. This will calculate the high part of the quotient. 569 ; 570 mov RTBIGNUM_ELEMENT_PRE [puQuotient + RTBIGNUM_ELEMENT_SIZE], 0 571 cmp uDividendHi, uDivisor 572 jb .do_divide 573 push xAX 574 mov xAX, xDX 575 xor edx, edx 576 div uDivisor 577 mov RTBIGNUM_ELEMENT_PRE [puQuotient + RTBIGNUM_ELEMENT_SIZE], xAX 578 pop xAX 579 580 ; 581 ; Perform the division and store the result. 582 ; 583 .do_divide: 584 div uDivisor 585 mov RTBIGNUM_ELEMENT_PRE [puQuotient], xAX 586 mov RTBIGNUM_ELEMENT_PRE [puRemainder], xDX 587 588 589 %ifdef RT_ARCH_X86 590 pop ebx 591 pop edi 592 %endif 593 leave 594 ret 595 ENDPROC rtBigNumElement2xDiv2xBy1x 596 597 598 ;; 599 ; Performs the core of long multiplication. 600 ; 601 ; @returns nothing. 602 ; @param pauResult x86:[ebp + 8] gcc:rdi msc:rcx Initialized to zero. 603 ; @param pauMultiplier x86:[ebp + 12] gcc:rsi msc:rdx 604 ; @param cMultiplier x86:[ebp + 16] gcc:rdx msc:r8 605 ; @param pauMultiplicand x86:[ebp + 20] gcc:rcx msc:r9 606 ; @param cMultiplicand x86:[ebp + 24] gcc:r8 msc:[rbp + 30h] 607 ; 608 BEGINPROC rtBigNumMagnitudeMultiplyAssemblyWorker 609 push xBP 610 SEH64_PUSH_xBP 611 mov xBP, xSP 612 SEH64_SET_FRAME_xBP 0 613 SEH64_END_PROLOGUE 614 615 %ifdef RT_ARCH_AMD64 616 %if RTBIGNUM_ELEMENT_SIZE == 4 617 %error "sorry not implemented yet." 618 sorry not implemented yet. 619 %endif 620 621 %ifdef ASM_CALL64_GCC 622 %define pauResult rdi 623 %define pauMultiplier rsi 624 %define cMultiplier r9 625 %define pauMultiplicand rcx 626 %define cMultiplicand r8 627 mov r9d, edx ; cMultiplier 628 mov r8d, r8d ; cMultiplicand - paranoia 629 %define uMultiplier r10 630 %define iMultiplicand r11 631 %else 632 %define pauResult rcx 633 %define pauMultiplier r11 634 %define cMultiplier r8 635 %define pauMultiplicand r9 636 %define cMultiplicand r10 637 mov pauMultiplier, rdx 638 mov r10d, dword [rbp + 30h] ; cMultiplicand 639 mov r8d, r8d ; cMultiplier - paranoia 640 %define uMultiplier r12 641 push r12 642 %define iMultiplicand r13 643 push r13 644 %endif 645 646 %elifdef RT_ARCH_X86 647 push edi 648 push esi 649 push ebx 650 sub esp, 10h 651 %define pauResult edi 652 mov pauResult, [ebp + 08h] 653 %define pauMultiplier dword [ebp + 0ch] 654 %define cMultiplier dword [ebp + 10h] 655 %define pauMultiplicand ecx 656 mov pauMultiplicand, [ebp + 14h] 657 %define cMultiplicand dword [ebp + 18h] 658 %define uMultiplier dword [ebp - 10h] 659 %define iMultiplicand ebx 660 661 %else 662 %error "Unsupported arch." 663 %endif 664 665 ; 666 ; Check that the multiplicand isn't empty (avoids an extra jump in the inner loop). 667 ; 668 cmp cMultiplicand, 0 669 je .done 670 671 ; 672 ; Loop thru each element in the multiplier. 673 ; 674 ; while (cMultiplier-- > 0) 675 .multiplier_loop: 676 cmp cMultiplier, 0 677 jz .done 678 dec cMultiplier 679 680 ; uMultiplier = *pauMultiplier 681 %ifdef RT_ARCH_X86 682 mov edx, pauMultiplier 683 mov eax, [edx] 684 mov uMultiplier, eax 685 %else 686 mov uMultiplier, [pauMultiplier] 687 %endif 688 ; for (iMultiplicand = 0; iMultiplicand < cMultiplicand; iMultiplicand++) 689 xor iMultiplicand, iMultiplicand 690 .multiplicand_loop: 691 mov xAX, [pauMultiplicand + iMultiplicand * RTBIGNUM_ELEMENT_SIZE] 692 mul uMultiplier 693 add [pauResult + iMultiplicand * RTBIGNUM_ELEMENT_SIZE], xAX 694 adc [pauResult + iMultiplicand * RTBIGNUM_ELEMENT_SIZE + RTBIGNUM_ELEMENT_SIZE], xDX 695 jnc .next_multiplicand 696 lea xDX, [iMultiplicand + 2] 697 .next_adc: 698 adc RTBIGNUM_ELEMENT_PRE [pauResult + xDX * RTBIGNUM_ELEMENT_SIZE], 0 699 inc xDX 700 jc .next_adc 701 702 .next_multiplicand: 703 inc iMultiplicand ; iMultiplicand++ 704 cmp iMultiplicand, cMultiplicand ; iMultiplicand < cMultiplicand 705 jb .multiplicand_loop 706 707 ; Advance and loop on multiplier. 708 add pauMultiplier, RTBIGNUM_ELEMENT_SIZE 709 add pauResult, RTBIGNUM_ELEMENT_SIZE 710 jmp .multiplier_loop 711 712 .done: 713 714 %ifdef RT_ARCH_AMD64 715 %ifdef ASM_CALL64_GCC 716 %else 717 pop r13 718 pop r12 719 %endif 720 %elifdef RT_ARCH_X86 721 add esp, 10h 722 pop ebx 723 pop esi 724 pop edi 725 %endif 726 leave 727 ret 728 ENDPROC rtBigNumMagnitudeMultiplyAssemblyWorker 729 730 ;; 731 ; Assembly implementation of the D4 step of Knuth's division algorithm. 732 ; 733 ; This subtracts Divisor * Qhat from the dividend at the current J index. 734 ; 735 ; @returns true if negative result (unlikely), false if positive. 736 ; @param pauDividendJ x86:[ebp + 8] gcc:rdi msc:rcx Initialized to zero. 737 ; @param pauDivisor x86:[ebp + 12] gcc:rsi msc:rdx 738 ; @param cDivisor x86:[ebp + 16] gcc:edx msc:r8d 739 ; @param uQhat x86:[ebp + 16] gcc:rcx msc:r9 740 ; 741 BEGINPROC rtBigNumKnuthD4_MulSub 742 push xBP 743 SEH64_PUSH_xBP 744 mov xBP, xSP 745 SEH64_SET_FRAME_xBP 0 746 SEH64_END_PROLOGUE 747 748 %ifdef RT_ARCH_AMD64 749 %if RTBIGNUM_ELEMENT_SIZE == 4 750 %error "sorry not implemented yet." 751 sorry not implemented yet. 752 %endif 753 754 %ifdef ASM_CALL64_GCC 755 %define pauDividendJ rdi 756 %define pauDivisor rsi 757 %define cDivisor r8 758 %define uQhat rcx 759 mov r8d, edx ; cDivisor 760 %define uMulCarry r11 761 %else 762 %define pauDividendJ rcx 763 %define pauDivisor r10 764 %define cDivisor r8 765 %define uQhat r9 766 mov r10, rdx ; pauDivisor 767 mov r8d, r8d ; cDivisor - paranoia 768 %define uMulCarry r11 769 %endif 770 771 %elifdef RT_ARCH_X86 772 push edi 773 push esi 774 push ebx 775 %define pauDividendJ edi 776 mov pauDividendJ, [ebp + 08h] 777 %define pauDivisor esi 778 mov pauDivisor, [ebp + 0ch] 779 %define cDivisor ecx 780 mov cDivisor, [ebp + 10h] 781 %define uQhat dword [ebp + 14h] 782 %define uMulCarry ebx 783 %else 784 %error "Unsupported arch." 785 %endif 786 787 %ifdef RT_STRICT 788 ; 789 ; Some sanity checks. 790 ; 791 cmp cDivisor, 0 792 jne .cDivisor_not_zero 793 int3 794 .cDivisor_not_zero: 795 %endif 796 797 ; 798 ; Initialize the loop. 799 ; 800 xor uMulCarry, uMulCarry 801 802 ; 803 ; do ... while (cDivisor-- > 0); 804 ; 805 .the_loop: 806 ; RTUInt128MulU64ByU64(&uSub, uQhat, pauDivisor[i]); 807 mov xAX, uQhat 808 mul RTBIGNUM_ELEMENT_PRE [pauDivisor] 809 ; RTUInt128AssignAddU64(&uSub, uMulCarry); 810 add xAX, uMulCarry 811 adc xDX, 0 812 mov uMulCarry, xDX 813 ; Subtract uSub.s.Lo+fCarry from pauDividendJ[i] 814 sub [pauDividendJ], xAX 815 adc uMulCarry, 0 816 %ifdef RT_STRICT 817 jnc .uMulCarry_did_not_overflow 818 int3 819 .uMulCarry_did_not_overflow 820 %endif 821 822 ; Advance. 823 add pauDividendJ, RTBIGNUM_ELEMENT_SIZE 824 add pauDivisor, RTBIGNUM_ELEMENT_SIZE 825 dec cDivisor 826 jnz .the_loop 827 828 ; 829 ; Final dividend element (no corresponding divisor element). 830 ; 831 sub [pauDividendJ], uMulCarry 832 sbb eax, eax 833 and eax, 1 834 835 .done: 836 %ifdef RT_ARCH_AMD64 837 %elifdef RT_ARCH_X86 838 pop ebx 839 pop esi 840 pop edi 841 %endif 842 leave 843 ret 844 ENDPROC rtBigNumKnuthD4_MulSub 845 -
trunk/src/VBox/Runtime/common/math/bignum.cpp
r52290 r52335 41 41 #include <iprt/memsafer.h> 42 42 #include <iprt/string.h> 43 #if RTBIGNUM_ELEMENT_BITS == 64 44 # include <iprt/uint128.h> 45 #endif 43 46 44 47 … … 91 94 #endif 92 95 96 #define RTBIGNUMELEMENT_HALF_MASK ( ((RTBIGNUMELEMENT)1 << (RTBIGNUM_ELEMENT_BITS / 2)) - (RTBIGNUMELEMENT)1) 97 #define RTBIGNUMELEMENT_LO_HALF(a_uElement) ( (RTBIGNUMELEMENT_HALF_MASK) & (a_uElement) ) 98 #define RTBIGNUMELEMENT_HI_HALF(a_uElement) ( (a_uElement) >> (RTBIGNUM_ELEMENT_BITS / 2) ) 99 100 101 /******************************************************************************* 102 * Structures and Typedefs * 103 *******************************************************************************/ 104 /** Type the size of two elements. */ 105 #if RTBIGNUM_ELEMENT_BITS == 64 106 typedef RTUINT128U RTBIGNUMELEMENT2X; 107 #else 108 typedef RTUINT64U RTBIGNUMELEMENT2X; 109 #endif 110 93 111 94 112 /******************************************************************************* 95 113 * Internal Functions * 96 114 *******************************************************************************/ 115 DECLINLINE(int) rtBigNumSetUsed(PRTBIGNUM pBigNum, uint32_t cNewUsed); 116 97 117 #ifdef IPRT_BIGINT_WITH_ASM 98 118 /* bignum-amd64-x86.asm: */ … … 103 123 DECLASM(RTBIGNUMELEMENT) rtBigNumMagnitudeShiftLeftOneAssemblyWorker(RTBIGNUMELEMENT *pauElements, uint32_t cUsed, 104 124 RTBIGNUMELEMENT uCarry); 105 #endif 125 DECLASM(void) rtBigNumElement2xDiv2xBy1x(RTBIGNUMELEMENT2X *puQuotient, RTBIGNUMELEMENT *puRemainder, 126 RTBIGNUMELEMENT uDividendHi, RTBIGNUMELEMENT uDividendLo, RTBIGNUMELEMENT uDivisor); 127 DECLASM(void) rtBigNumMagnitudeMultiplyAssemblyWorker(PRTBIGNUMELEMENT pauResult, 128 PCRTBIGNUMELEMENT pauMultiplier, uint32_t cMultiplier, 129 PCRTBIGNUMELEMENT pauMultiplicand, uint32_t cMultiplicand); 130 #endif 131 132 133 134 135 136 /** @name Functions working on one element. 137 * @{ */ 138 139 DECLINLINE(uint32_t) rtBigNumElementBitCount(RTBIGNUMELEMENT uElement) 140 { 141 #if RTBIGNUM_ELEMENT_SIZE == 8 142 if (uElement >> 32) 143 return ASMBitLastSetU32((uint32_t)(uElement >> 32)) + 32; 144 return ASMBitLastSetU32((uint32_t)uElement); 145 #elif RTBIGNUM_ELEMENT_SIZE == 4 146 return ASMBitLastSetU32(uElement); 147 #else 148 # error "Bad RTBIGNUM_ELEMENT_SIZE value" 149 #endif 150 } 151 152 153 /** 154 * Does addition with carry. 155 * 156 * This is a candidate for inline assembly on some platforms. 157 * 158 * @returns The result (the sum) 159 * @param uAugend What to add to. 160 * @param uAddend What to add to it. 161 * @param pfCarry Where to read the input carry and return the output 162 * carry. 163 */ 164 DECLINLINE(RTBIGNUMELEMENT) rtBigNumElementAddWithCarry(RTBIGNUMELEMENT uAugend, RTBIGNUMELEMENT uAddend, 165 RTBIGNUMELEMENT *pfCarry) 166 { 167 RTBIGNUMELEMENT uRet = uAugend + uAddend; 168 if (!*pfCarry) 169 *pfCarry = uRet < uAugend; 170 else 171 { 172 uRet += 1; 173 *pfCarry = uRet <= uAugend; 174 } 175 return uRet; 176 } 177 178 179 /** 180 * Does addition with borrow. 181 * 182 * This is a candidate for inline assembly on some platforms. 183 * 184 * @returns The result (the sum) 185 * @param uMinuend What to subtract from. 186 * @param uSubtrahend What to subtract. 187 * @param pfBorrow Where to read the input borrow and return the output 188 * borrow. 189 */ 190 DECLINLINE(RTBIGNUMELEMENT) rtBigNumElementSubWithBorrow(RTBIGNUMELEMENT uMinuend, RTBIGNUMELEMENT uSubtrahend, 191 RTBIGNUMELEMENT *pfBorrow) 192 { 193 RTBIGNUMELEMENT uRet = uMinuend - uSubtrahend - *pfBorrow; 194 195 /* Figure out if we borrowed. */ 196 *pfBorrow = !*pfBorrow ? uMinuend < uSubtrahend : uMinuend <= uSubtrahend; 197 return uRet; 198 } 199 200 /** @} */ 201 202 203 204 205 /** @name Double element primitives. 206 * @{ */ 207 208 static int rtBigNumElement2xCopyToMagnitude(RTBIGNUMELEMENT2X const *pValue2x, PRTBIGNUM pDst) 209 { 210 int rc; 211 if (pValue2x->s.Hi) 212 { 213 rc = rtBigNumSetUsed(pDst, 2); 214 if (RT_SUCCESS(rc)) 215 { 216 pDst->pauElements[0] = pValue2x->s.Lo; 217 pDst->pauElements[1] = pValue2x->s.Hi; 218 } 219 } 220 else if (pValue2x->s.Lo) 221 { 222 rc = rtBigNumSetUsed(pDst, 1); 223 if (RT_SUCCESS(rc)) 224 pDst->pauElements[0] = pValue2x->s.Lo; 225 } 226 else 227 rc = rtBigNumSetUsed(pDst, 0); 228 return rc; 229 } 230 231 static void rtBigNumElement2xDiv(RTBIGNUMELEMENT2X *puQuotient, RTBIGNUMELEMENT2X *puRemainder, 232 RTBIGNUMELEMENT uDividendHi, RTBIGNUMELEMENT uDividendLo, 233 RTBIGNUMELEMENT uDivisorHi, RTBIGNUMELEMENT uDivisorLo) 234 { 235 RTBIGNUMELEMENT2X uDividend; 236 uDividend.s.Lo = uDividendLo; 237 uDividend.s.Hi = uDividendHi; 238 239 RTBIGNUMELEMENT2X uDivisor; 240 uDivisor.s.Lo = uDivisorLo; 241 uDivisor.s.Hi = uDivisorHi; 242 243 #if RTBIGNUM_ELEMENT_BITS == 64 244 RTUInt128DivRem(puQuotient, puRemainder, &uDividend, &uDivisor); 245 #else 246 puQuotient->u = uDividend.u / uDivisor.u; 247 puRemainder->u = uDividend.u % uDivisor.u; 248 #endif 249 } 250 251 #ifndef IPRT_BIGINT_WITH_ASM 252 static void rtBigNumElement2xDiv2xBy1x(RTBIGNUMELEMENT2X *puQuotient, RTBIGNUMELEMENT *puRemainder, 253 RTBIGNUMELEMENT uDividendHi, RTBIGNUMELEMENT uDividendLo, RTBIGNUMELEMENT uDivisor) 254 { 255 RTBIGNUMELEMENT2X uDividend; 256 uDividend.s.Lo = uDividendLo; 257 uDividend.s.Hi = uDividendHi; 258 259 # if RTBIGNUM_ELEMENT_BITS == 64 260 RTBIGNUMELEMENT2X uRemainder2x; 261 RTBIGNUMELEMENT2X uDivisor2x; 262 uDivisor2x.s.Hi = 0; 263 uDivisor2x.s.Lo = uDivisor; 264 /** @todo optimize this. */ 265 RTUInt128DivRem(puQuotient, &uRemainder2x, &uDividend, &uDivisor2x); 266 puRemainder->u = uRemainder2x.s.Lo; 267 # else 268 puQuotient->u = uDividend.u / uDivisor; 269 puRemainder->u = uDividend.u % uDivisor; 270 # endif 271 } 272 #endif 273 274 DECLINLINE(void) rtBigNumElement2xDec(RTBIGNUMELEMENT2X *puValue) 275 { 276 #if RTBIGNUM_ELEMENT_BITS == 64 277 if (puValue->s.Lo-- == 0) 278 puValue->s.Hi--; 279 #else 280 puValue->u -= 1; 281 #endif 282 } 283 284 DECLINLINE(void) rtBigNumElement2xAdd1x(RTBIGNUMELEMENT2X *puValue, RTBIGNUMELEMENT uAdd) 285 { 286 #if RTBIGNUM_ELEMENT_BITS == 64 287 RTUInt128AssignAddU64(puValue, uAdd); 288 #else 289 puValue->u += uAdd; 290 #endif 291 } 292 293 /** @} */ 294 295 296 106 297 107 298 … … 181 372 uint32_t const cbNew = cNew * RTBIGNUM_ELEMENT_SIZE; 182 373 Assert(cbNew > cbOld); 183 184 void *pvNew; 185 if (pBigNum->fSensitive) 186 pvNew = RTMemSaferReallocZ(cbOld, pBigNum->pauElements, cbNew); 187 else 188 pvNew = RTMemRealloc(pBigNum->pauElements, cbNew); 189 if (RT_LIKELY(pvNew)) 190 { 191 if (cbNew > cbOld) 192 RT_BZERO((char *)pvNew + cbOld, cbNew - cbOld); 193 194 pBigNum->pauElements = (RTBIGNUMELEMENT *)pvNew; 195 pBigNum->cUsed = cNewUsed; 196 pBigNum->cAllocated = cNew; 197 return VINF_SUCCESS; 198 } 199 return VERR_NO_MEMORY; 374 if (cbNew <= RTBIGNUM_MAX_SIZE && cbNew > cbOld) 375 { 376 void *pvNew; 377 if (pBigNum->fSensitive) 378 pvNew = RTMemSaferReallocZ(cbOld, pBigNum->pauElements, cbNew); 379 else 380 pvNew = RTMemRealloc(pBigNum->pauElements, cbNew); 381 if (RT_LIKELY(pvNew)) 382 { 383 if (cbNew > cbOld) 384 RT_BZERO((char *)pvNew + cbOld, cbNew - cbOld); 385 if (pBigNum->cUsed > cNewUsed) 386 RT_BZERO((RTBIGNUMELEMENT *)pvNew + cNewUsed, (pBigNum->cUsed - cNewUsed) * RTBIGNUM_ELEMENT_SIZE); 387 388 pBigNum->pauElements = (RTBIGNUMELEMENT *)pvNew; 389 pBigNum->cUsed = cNewUsed; 390 pBigNum->cAllocated = cNew; 391 return VINF_SUCCESS; 392 } 393 return VERR_NO_MEMORY; 394 } 395 return VERR_OUT_OF_RANGE; 200 396 } 201 397 … … 690 886 } 691 887 return rc; 692 }693 694 695 static uint32_t rtBigNumElementBitCount(RTBIGNUMELEMENT uElement)696 {697 #if RTBIGNUM_ELEMENT_SIZE == 8698 if (uElement >> 32)699 return ASMBitLastSetU32((uint32_t)(uElement >> 32)) + 32;700 return ASMBitLastSetU32((uint32_t)uElement);701 #elif RTBIGNUM_ELEMENT_SIZE == 4702 return ASMBitLastSetU32(uElement);703 #else704 # error "Bad RTBIGNUM_ELEMENT_SIZE value"705 #endif706 888 } 707 889 … … 923 1105 if (pLeft->fNegative == (iRight < 0)) 924 1106 { 1107 AssertCompile(RTBIGNUM_ELEMENT_SIZE <= sizeof(iRight)); 925 1108 if (pLeft->cUsed * RTBIGNUM_ELEMENT_SIZE <= sizeof(iRight)) 926 1109 { … … 961 1144 962 1145 963 #define RTBIGNUMELEMENT_HALF_MASK ( ((RTBIGNUMELEMENT)1 << (RTBIGNUM_ELEMENT_BITS / 2)) - (RTBIGNUMELEMENT)1)964 #define RTBIGNUMELEMENT_LO_HALF(a_uElement) ( (RTBIGNUMELEMENT_HALF_MASK) & (a_uElement) )965 #define RTBIGNUMELEMENT_HI_HALF(a_uElement) ( (a_uElement) >> (RTBIGNUM_ELEMENT_BITS / 2) )966 967 968 1146 /** 969 1147 * Compares the magnitude values of two big numbers. … … 1014 1192 } 1015 1193 1016 1017 /**1018 * Does addition with carry.1019 *1020 * This is a candidate for inline assembly on some platforms.1021 *1022 * @returns The result (the sum)1023 * @param uAugend What to add to.1024 * @param uAddend What to add to it.1025 * @param pfCarry Where to read the input carry and return the output1026 * carry.1027 */1028 DECLINLINE(RTBIGNUMELEMENT) rtBigNumElementAddWithCarry(RTBIGNUMELEMENT uAugend, RTBIGNUMELEMENT uAddend,1029 RTBIGNUMELEMENT *pfCarry)1030 {1031 RTBIGNUMELEMENT uRet = uAugend + uAddend + *pfCarry;1032 1033 /* Determin carry the expensive way. */1034 RTBIGNUMELEMENT uTmp = RTBIGNUMELEMENT_HI_HALF(uAugend) + RTBIGNUMELEMENT_HI_HALF(uAddend);1035 if (uTmp < RTBIGNUMELEMENT_HALF_MASK)1036 *pfCarry = 0;1037 else1038 *pfCarry = uTmp > RTBIGNUMELEMENT_HALF_MASK1039 || RTBIGNUMELEMENT_LO_HALF(uAugend) + RTBIGNUMELEMENT_LO_HALF(uAddend) + *pfCarry1040 > RTBIGNUMELEMENT_HALF_MASK;1041 return uRet;1042 }1043 1194 1044 1195 … … 1082 1233 1083 1234 return rc; 1084 }1085 1086 1087 /**1088 * Does addition with borrow.1089 *1090 * This is a candidate for inline assembly on some platforms.1091 *1092 * @returns The result (the sum)1093 * @param uMinuend What to subtract from.1094 * @param uSubtrahend What to subtract.1095 * @param pfBorrow Where to read the input borrow and return the output1096 * borrow.1097 */1098 DECLINLINE(RTBIGNUMELEMENT) rtBigNumElementSubWithBorrow(RTBIGNUMELEMENT uMinuend, RTBIGNUMELEMENT uSubtrahend,1099 RTBIGNUMELEMENT *pfBorrow)1100 {1101 RTBIGNUMELEMENT uRet = uMinuend - uSubtrahend - *pfBorrow;1102 1103 /* Figure out if we borrowed. */1104 *pfBorrow = !*pfBorrow ? uMinuend < uSubtrahend : uMinuend <= uSubtrahend;1105 return uRet;1106 1235 } 1107 1236 … … 1407 1536 RT_BZERO(pResult->pauElements, pResult->cUsed * RTBIGNUM_ELEMENT_SIZE); 1408 1537 1538 #ifdef IPRT_BIGINT_WITH_ASM 1539 rtBigNumMagnitudeMultiplyAssemblyWorker(pResult->pauElements, 1540 pMultiplier->pauElements, pMultiplier->cUsed, 1541 pMultiplicand->pauElements, pMultiplicand->cUsed); 1542 #else 1409 1543 for (uint32_t i = 0; i < pMultiplier->cUsed; i++) 1410 1544 { … … 1436 1570 } 1437 1571 } 1572 #endif 1438 1573 1439 1574 /* It's possible we overestimated the output size by 1 element. */ … … 1604 1739 1605 1740 /** 1741 * Shifts the magnitude left by @a cBits. 1742 * 1743 * The variables must be unscrambled. 1744 * 1745 * @returns IPRT status code. 1746 * @param pResult Where to store the result. 1747 * @param pValue The value to shift. 1748 * @param cBits The shift count. 1749 */ 1750 static int rtBigNumMagnitudeShiftLeft(PRTBIGNUM pResult, PCRTBIGNUM pValue, uint32_t cBits) 1751 { 1752 int rc; 1753 if (cBits) 1754 { 1755 uint32_t cBitsNew = rtBigNumMagnitudeBitWidth(pValue); 1756 if (cBitsNew > 0) 1757 { 1758 if (cBitsNew + cBits > cBitsNew) 1759 { 1760 cBitsNew += cBits; 1761 rc = rtBigNumSetUsedEx(pResult, 0, RT_ALIGN_32(cBitsNew, RTBIGNUM_ELEMENT_BITS) / RTBIGNUM_ELEMENT_BITS); 1762 if (RT_SUCCESS(rc)) 1763 rc = rtBigNumSetUsed(pResult, RT_ALIGN_32(cBitsNew, RTBIGNUM_ELEMENT_BITS) / RTBIGNUM_ELEMENT_BITS); 1764 if (RT_SUCCESS(rc)) 1765 { 1766 uint32_t const cLeft = pValue->cUsed; 1767 PCRTBIGNUMELEMENT pauSrc = pValue->pauElements; 1768 PRTBIGNUMELEMENT pauDst = pResult->pauElements; 1769 1770 Assert(ASMMemIsAllU32(pauDst, (cBits / RTBIGNUM_ELEMENT_BITS) * RTBIGNUM_ELEMENT_SIZE, 0) == NULL); 1771 pauDst += cBits / RTBIGNUM_ELEMENT_BITS; 1772 1773 cBits &= RTBIGNUM_ELEMENT_BITS - 1; 1774 if (cBits) 1775 { 1776 RTBIGNUMELEMENT uPrev = 0; 1777 for (uint32_t i = 0; i < cLeft; i++) 1778 { 1779 RTBIGNUMELEMENT uCur = pauSrc[i]; 1780 pauDst[i] = (uCur << cBits) | (uPrev >> (RTBIGNUM_ELEMENT_BITS - cBits)); 1781 uPrev = uCur; 1782 } 1783 uPrev >>= RTBIGNUM_ELEMENT_BITS - cBits; 1784 if (uPrev) 1785 pauDst[pValue->cUsed] = uPrev; 1786 } 1787 else 1788 memcpy(pauDst, pauSrc, cLeft * RTBIGNUM_ELEMENT_SIZE); 1789 } 1790 } 1791 else 1792 rc = VERR_OUT_OF_RANGE; 1793 } 1794 /* Shifting zero always yields a zero result. */ 1795 else 1796 rc = rtBigNumSetUsed(pResult, 0); 1797 } 1798 else 1799 rc = rtBigNumMagnitudeCopy(pResult, pValue); 1800 return rc; 1801 } 1802 1803 1804 RTDECL(int) RTBigNumShiftLeft(PRTBIGNUM pResult, PCRTBIGNUM pValue, uint32_t cBits) 1805 { 1806 Assert(pResult != pValue); 1807 AssertReturn(pResult->fSensitive >= pValue->fSensitive, VERR_BIGNUM_SENSITIVE_INPUT); 1808 1809 int rc = rtBigNumUnscramble(pResult); 1810 if (RT_SUCCESS(rc)) 1811 { 1812 RTBIGNUM_ASSERT_VALID(pResult); 1813 rc = rtBigNumUnscramble((PRTBIGNUM)pValue); 1814 if (RT_SUCCESS(rc)) 1815 { 1816 RTBIGNUM_ASSERT_VALID(pValue); 1817 1818 pResult->fNegative = pValue->fNegative; 1819 rc = rtBigNumMagnitudeShiftLeft(pResult, pValue, cBits); 1820 1821 rtBigNumScramble((PRTBIGNUM)pValue); 1822 } 1823 rtBigNumScramble(pResult); 1824 } 1825 return rc; 1826 } 1827 1828 1829 /** 1830 * Shifts the magnitude right by @a cBits. 1831 * 1832 * The variables must be unscrambled. 1833 * 1834 * @returns IPRT status code. 1835 * @param pResult Where to store the result. 1836 * @param pValue The value to shift. 1837 * @param cBits The shift count. 1838 */ 1839 static int rtBigNumMagnitudeShiftRight(PRTBIGNUM pResult, PCRTBIGNUM pValue, uint32_t cBits) 1840 { 1841 int rc; 1842 if (cBits) 1843 { 1844 uint32_t cBitsNew = rtBigNumMagnitudeBitWidth(pValue); 1845 if (cBitsNew > cBits) 1846 { 1847 cBitsNew -= cBits; 1848 uint32_t cElementsNew = RT_ALIGN_32(cBitsNew, RTBIGNUM_ELEMENT_BITS) / RTBIGNUM_ELEMENT_BITS; 1849 rc = rtBigNumSetUsed(pResult, cElementsNew); 1850 if (RT_SUCCESS(rc)) 1851 { 1852 uint32_t i = cElementsNew; 1853 PCRTBIGNUMELEMENT pauSrc = pValue->pauElements; 1854 PRTBIGNUMELEMENT pauDst = pResult->pauElements; 1855 1856 pauSrc += cBits / RTBIGNUM_ELEMENT_BITS; 1857 1858 cBits &= RTBIGNUM_ELEMENT_BITS - 1; 1859 if (cBits) 1860 { 1861 RTBIGNUMELEMENT uPrev = &pauSrc[i] == &pValue->pauElements[pValue->cUsed] ? 0 : pauSrc[i]; 1862 while (i-- > 0) 1863 { 1864 RTBIGNUMELEMENT uCur = pauSrc[i]; 1865 pauDst[i] = (uCur >> cBits) | (uPrev << (RTBIGNUM_ELEMENT_BITS - cBits)); 1866 uPrev = uCur; 1867 } 1868 } 1869 else 1870 memcpy(pauDst, pauSrc, i * RTBIGNUM_ELEMENT_SIZE); 1871 } 1872 } 1873 else 1874 rc = rtBigNumSetUsed(pResult, 0); 1875 } 1876 else 1877 rc = rtBigNumMagnitudeCopy(pResult, pValue); 1878 return rc; 1879 } 1880 1881 1882 RTDECL(int) RTBigNumShiftRight(PRTBIGNUM pResult, PCRTBIGNUM pValue, uint32_t cBits) 1883 { 1884 Assert(pResult != pValue); 1885 AssertReturn(pResult->fSensitive >= pValue->fSensitive, VERR_BIGNUM_SENSITIVE_INPUT); 1886 1887 int rc = rtBigNumUnscramble(pResult); 1888 if (RT_SUCCESS(rc)) 1889 { 1890 RTBIGNUM_ASSERT_VALID(pResult); 1891 rc = rtBigNumUnscramble((PRTBIGNUM)pValue); 1892 if (RT_SUCCESS(rc)) 1893 { 1894 RTBIGNUM_ASSERT_VALID(pValue); 1895 1896 pResult->fNegative = pValue->fNegative; 1897 rc = rtBigNumMagnitudeShiftRight(pResult, pValue, cBits); 1898 if (!pResult->cUsed) 1899 pResult->fNegative = 0; 1900 1901 rtBigNumScramble((PRTBIGNUM)pValue); 1902 } 1903 rtBigNumScramble(pResult); 1904 } 1905 return rc; 1906 } 1907 1908 1909 /** 1910 * Implements the D3 test for Qhat decrementation. 1911 * 1912 * @returns True if Qhat should be decremented. 1913 * @param puQhat Pointer to Qhat. 1914 * @param uRhat The remainder. 1915 * @param uDivisorY The penultimate divisor element. 1916 * @param uDividendJMinus2 The j-2 dividend element. 1917 */ 1918 DECLINLINE(bool) rtBigNumKnuthD3_ShouldDecrementQhat(RTBIGNUMELEMENT2X const *puQhat, RTBIGNUMELEMENT uRhat, 1919 RTBIGNUMELEMENT uDivisorY, RTBIGNUMELEMENT uDividendJMinus2) 1920 { 1921 if (puQhat->s.Lo == RTBIGNUM_ELEMENT_MAX && puQhat->s.Hi == 0) 1922 return true; 1923 #if RTBIGNUM_ELEMENT_BITS == 64 1924 RTBIGNUMELEMENT2X TmpLeft; 1925 RTUInt128MulByU64(&TmpLeft, puQhat, uDivisorY); 1926 1927 RTBIGNUMELEMENT2X TmpRight; 1928 TmpRight.s.Lo = 0; 1929 TmpRight.s.Hi = uRhat; 1930 RTUInt128AssignAddU64(&TmpRight, uDividendJMinus2); 1931 1932 if (RTUInt128Compare(&TmpLeft, &TmpRight) > 0) 1933 return true; 1934 #else 1935 if (puQhat->u * uDivisorY > ((uint64_t)uRhat << 32) + uDividendJMinus2) 1936 return true; 1937 #endif 1938 return false; 1939 } 1940 1941 1942 /** 1943 * C implementation of the D3 step of Knuth's division algorithm. 1944 * 1945 * This estimates a value Qhat that will be used as quotient "digit" (element) 1946 * at the current level of the division (j). 1947 * 1948 * @returns The Qhat value we've estimated. 1949 * @param pauDividendJN Pointer to the j+n (normalized) dividend element. 1950 * Will access up to two elements prior to this. 1951 * @param uDivZ The last element in the (normalized) divisor. 1952 * @param uDivY The penultimate element in the (normalized) divisor. 1953 */ 1954 DECLINLINE(RTBIGNUMELEMENT) rtBigNumKnuthD3_EstimateQhat(PCRTBIGNUMELEMENT pauDividendJN, 1955 RTBIGNUMELEMENT uDivZ, RTBIGNUMELEMENT uDivY) 1956 { 1957 RTBIGNUMELEMENT2X uQhat; 1958 RTBIGNUMELEMENT uRhat; 1959 RTBIGNUMELEMENT uDividendJN = pauDividendJN[0]; 1960 Assert(uDividendJN <= uDivZ); 1961 if (uDividendJN != uDivZ) 1962 rtBigNumElement2xDiv2xBy1x(&uQhat, &uRhat, uDividendJN, pauDividendJN[-1], uDivZ); 1963 else 1964 { 1965 /* 1966 * This is the case where we end up with an initial Qhat that's all Fs. 1967 */ 1968 /* Calc the remainder for max Qhat value. */ 1969 RTBIGNUMELEMENT2X uTmp1; /* (v[j+n] << bits) + v[J+N-1] */ 1970 uTmp1.s.Hi = uDivZ; 1971 uTmp1.s.Lo = pauDividendJN[-1]; 1972 1973 RTBIGNUMELEMENT2X uTmp2; /* uQhat * uDividendJN */ 1974 uTmp2.s.Hi = uDivZ - 1; 1975 uTmp2.s.Lo = 0 - uDivZ; 1976 #if RTBIGNUM_ELEMENT_BITS == 64 1977 RTUInt128AssignSub(&uTmp1, &uTmp2); 1978 #else 1979 uTmp1.u -= uTmp2.u; 1980 #endif 1981 /* If we overflowed the remainder, don't bother trying to adjust. */ 1982 if (uTmp1.s.Hi) 1983 return RTBIGNUM_ELEMENT_MAX; 1984 1985 uRhat = uTmp1.s.Lo; 1986 uQhat.s.Lo = RTBIGNUM_ELEMENT_MAX; 1987 uQhat.s.Hi = 0; 1988 } 1989 1990 /* 1991 * Adjust Q to eliminate all cases where it's two to large and most cases 1992 * where it's one too large. 1993 */ 1994 while (rtBigNumKnuthD3_ShouldDecrementQhat(&uQhat, uRhat, uDivY, pauDividendJN[-2])) 1995 { 1996 rtBigNumElement2xDec(&uQhat); 1997 uRhat += uDivZ; 1998 if (uRhat < uDivZ /* overflow */ || uRhat == RTBIGNUM_ELEMENT_MAX) 1999 break; 2000 } 2001 2002 return uQhat.s.Lo; 2003 } 2004 2005 2006 #ifdef IPRT_BIGINT_WITH_ASM 2007 DECLASM(bool) rtBigNumKnuthD4_MulSub(PRTBIGNUMELEMENT pauDividendJ, PRTBIGNUMELEMENT pauDivisor, 2008 uint32_t cDivisor, RTBIGNUMELEMENT uQhat); 2009 #else 2010 /** 2011 * C implementation of the D4 step of Knuth's division algorithm. 2012 * 2013 * This subtracts Divisor * Qhat from the dividend at the current J index. 2014 * 2015 * @returns true if negative result (unlikely), false if positive. 2016 * @param pauDividendJ Pointer to the j-th (normalized) dividend element. 2017 * Will access up to two elements prior to this. 2018 * @param uDivZ The last element in the (normalized) divisor. 2019 * @param uDivY The penultimate element in the (normalized) divisor. 2020 */ 2021 DECLINLINE(bool) rtBigNumKnuthD4_MulSub(PRTBIGNUMELEMENT pauDividendJ, PRTBIGNUMELEMENT pauDivisor, 2022 uint32_t cDivisor, RTBIGNUMELEMENT uQhat) 2023 { 2024 uint32_t i; 2025 bool fBorrow = false; 2026 RTBIGNUMELEMENT uMulCarry = 0; 2027 for (i = 0; i < cDivisor; i++) 2028 { 2029 RTBIGNUMELEMENT2X uSub; 2030 # if RTBIGNUM_ELEMENT_BITS == 64 2031 RTUInt128MulU64ByU64(&uSub, uQhat, pauDivisor[i]); 2032 RTUInt128AssignAddU64(&uSub, uMulCarry); 2033 # else 2034 uSub.u = (uint64_t)uQhat * pauDivisor[i] + uMulCarry; 2035 # endif 2036 uMulCarry = uSub.s.Hi; 2037 2038 RTBIGNUMELEMENT uDividendI = pauDividendJ[i]; 2039 if (!fBorrow) 2040 { 2041 fBorrow = uDividendI < uSub.s.Lo; 2042 uDividendI -= uSub.s.Lo; 2043 } 2044 else 2045 { 2046 fBorrow = uDividendI <= uSub.s.Lo; 2047 uDividendI -= uSub.s.Lo + 1; 2048 } 2049 pauDividendJ[i] = uDividendI; 2050 } 2051 2052 /* Carry and borrow into the final dividend element. */ 2053 RTBIGNUMELEMENT uDividendI = pauDividendJ[i]; 2054 if (!fBorrow) 2055 { 2056 fBorrow = uDividendI < uMulCarry; 2057 pauDividendJ[i] = uDividendI - uMulCarry; 2058 } 2059 else 2060 { 2061 fBorrow = uDividendI <= uMulCarry; 2062 pauDividendJ[i] = uDividendI - uMulCarry - 1; 2063 } 2064 2065 return fBorrow; 2066 } 2067 #endif /* !IPRT_BIGINT_WITH_ASM */ 2068 2069 2070 /** 2071 * C implementation of the D6 step of Knuth's division algorithm. 2072 * 2073 * This adds the divisor to the dividend to undo the negative value step D4 2074 * produced. This is not very frequent occurence. 2075 * 2076 * @param pauDividendJ Pointer to the j-th (normalized) dividend element. 2077 * Will access up to two elements prior to this. 2078 * @param uDivZ The last element in the (normalized) divisor. 2079 * @param uDivY The penultimate element in the (normalized) divisor. 2080 */ 2081 DECLINLINE(void) rtBigNumKnuthD6_AddBack(PRTBIGNUMELEMENT pauDividendJ, PRTBIGNUMELEMENT pauDivisor, uint32_t cDivisor) 2082 { 2083 RTBIGNUMELEMENT2X uTmp; 2084 uTmp.s.Lo = 0; 2085 2086 uint32_t i; 2087 for (i = 0; i < cDivisor; i++) 2088 { 2089 uTmp.s.Hi = 0; 2090 #if RTBIGNUM_ELEMENT_BITS == 64 2091 RTUInt128AssignAddU64(&uTmp, pauDivisor[i]); 2092 RTUInt128AssignAddU64(&uTmp, pauDividendJ[i]); 2093 #else 2094 uTmp.u += pauDivisor[i]; 2095 uTmp.u += pauDividendJ[i]; 2096 #endif 2097 pauDividendJ[i] = uTmp.s.Lo; 2098 uTmp.s.Lo = uTmp.s.Hi; 2099 } 2100 2101 /* The final dividend entry. */ 2102 Assert(pauDividendJ[i] + uTmp.s.Lo < uTmp.s.Lo); 2103 pauDividendJ[i] += uTmp.s.Lo; 2104 } 2105 2106 2107 /** 2108 * Knuth's division (core). 2109 * 2110 * @returns IPRT status code. 2111 * @param pQuotient Where to return the quotient. Can be NULL. 2112 * @param pRemainder Where to return the remainder. 2113 * @param pDividend What to divide. 2114 * @param pDivisor What to divide by. 2115 */ 2116 static int rtBigNumMagnitudeDivideKnuth(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor) 2117 { 2118 Assert(pDivisor->cUsed > 1); 2119 uint32_t const cDivisor = pDivisor->cUsed; 2120 Assert(pDividend->cUsed >= cDivisor); 2121 2122 /* 2123 * Make sure we've got enough space in the quotient, so we can build it 2124 * without any trouble come step D5. 2125 */ 2126 int rc; 2127 if (pQuotient) 2128 { 2129 rc = rtBigNumSetUsedEx(pQuotient, 0, pDividend->cUsed - cDivisor + 1); 2130 if (RT_SUCCESS(rc)) 2131 rc = rtBigNumSetUsed(pQuotient, pDividend->cUsed - cDivisor + 1); 2132 if (RT_FAILURE(rc)) 2133 return rc; 2134 } 2135 2136 /* 2137 * D1. Normalize. The goal here is to make sure the last element in the 2138 * divisor is greater than RTBIGNUMELEMENTS_MAX/2. We must also make sure 2139 * we can access element pDividend->cUsed of the normalized dividend. 2140 */ 2141 RTBIGNUM NormDividend; 2142 RTBIGNUM NormDivisor; 2143 PCRTBIGNUM pNormDivisor = &NormDivisor; 2144 rtBigNumInitZeroTemplate(&NormDivisor, pDividend); 2145 2146 uint32_t cNormShift = (RTBIGNUM_ELEMENT_BITS - rtBigNumMagnitudeBitWidth(pDivisor)) & (RTBIGNUM_ELEMENT_BITS - 1); 2147 if (cNormShift) 2148 { 2149 rtBigNumInitZeroTemplate(&NormDividend, pDividend); 2150 rc = rtBigNumMagnitudeShiftLeft(&NormDividend, pDividend, cNormShift); 2151 if (RT_SUCCESS(rc)) 2152 rc = rtBigNumMagnitudeShiftLeft(&NormDivisor, pDivisor, cNormShift); 2153 } 2154 else 2155 { 2156 pNormDivisor = pDivisor; 2157 rc = rtBigNumCloneInternal(&NormDividend, pDividend); 2158 } 2159 if (RT_SUCCESS(rc) && pDividend->cUsed == NormDividend.cUsed) 2160 rc = rtBigNumEnsureExtraZeroElements(&NormDividend, NormDividend.cUsed + 1); 2161 if (RT_SUCCESS(rc)) 2162 { 2163 /* 2164 * D2. Initialize the j index so we can loop thru the elements in the 2165 * dividend that makes it larger than the divisor. 2166 */ 2167 uint32_t j = pDividend->cUsed - cDivisor; 2168 2169 RTBIGNUMELEMENT const DivZ = pNormDivisor->pauElements[cDivisor - 1]; 2170 RTBIGNUMELEMENT const DivY = pNormDivisor->pauElements[cDivisor - 2]; 2171 for (;;) 2172 { 2173 /* 2174 * D3. Estimate a Q' by dividing the j and j-1 dividen elements by 2175 * the last divisor element, then adjust against the next elements. 2176 */ 2177 RTBIGNUMELEMENT uQhat = rtBigNumKnuthD3_EstimateQhat(&NormDividend.pauElements[j + cDivisor], DivZ, DivY); 2178 2179 /* 2180 * D4. Multiply and subtract. 2181 */ 2182 bool fNegative = rtBigNumKnuthD4_MulSub(&NormDividend.pauElements[j], pNormDivisor->pauElements, cDivisor, uQhat); 2183 2184 /* 2185 * D5. Test remainder. 2186 * D6. Add back. 2187 */ 2188 if (fNegative) 2189 { 2190 //__debugbreak(); 2191 rtBigNumKnuthD6_AddBack(&NormDividend.pauElements[j], pNormDivisor->pauElements, cDivisor); 2192 uQhat--; 2193 } 2194 2195 if (pQuotient) 2196 pQuotient->pauElements[j] = uQhat; 2197 2198 /* 2199 * D7. Loop on j. 2200 */ 2201 if (j == 0) 2202 break; 2203 j--; 2204 } 2205 2206 /* 2207 * D8. Unnormalize the remainder. 2208 */ 2209 rtBigNumStripTrailingZeros(&NormDividend); 2210 if (cNormShift) 2211 rc = rtBigNumMagnitudeShiftRight(pRemainder, &NormDividend, cNormShift); 2212 else 2213 rc = rtBigNumMagnitudeCopy(pRemainder, &NormDividend); 2214 if (pQuotient) 2215 rtBigNumStripTrailingZeros(pQuotient); 2216 } 2217 2218 /* 2219 * Delete temporary variables. 2220 */ 2221 RTBigNumDestroy(&NormDividend); 2222 if (pDivisor == &NormDivisor) 2223 RTBigNumDestroy(&NormDivisor); 2224 return rc; 2225 } 2226 2227 2228 static int rtBigNumMagnitudeDivideSlowLong(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor) 2229 { 2230 /* 2231 * Do very simple long division. This ain't fast, but it does the trick. 2232 */ 2233 int rc = VINF_SUCCESS; 2234 uint32_t iBit = rtBigNumMagnitudeBitWidth(pDividend); 2235 while (iBit-- > 0) 2236 { 2237 rc = rtBigNumMagnitudeShiftLeftOne(pRemainder, rtBigNumMagnitudeGetBit(pDividend, iBit)); 2238 AssertRCBreak(rc); 2239 int iDiff = rtBigNumMagnitudeCompare(pRemainder, pDivisor); 2240 if (iDiff >= 0) 2241 { 2242 if (iDiff != 0) 2243 { 2244 rc = rtBigNumMagnitudeSubThis(pRemainder, pDivisor); 2245 AssertRCBreak(rc); 2246 } 2247 else 2248 rtBigNumSetUsed(pRemainder, 0); 2249 rc = rtBigNumMagnitudeSetBit(pQuotient, iBit); 2250 AssertRCBreak(rc); 2251 } 2252 } 2253 2254 /* This shouldn't be necessary. */ 2255 rtBigNumStripTrailingZeros(pQuotient); 2256 rtBigNumStripTrailingZeros(pRemainder); 2257 2258 return rc; 2259 } 2260 2261 2262 /** 1606 2263 * Divides the magnitudes of two values, letting the caller care about the sign 1607 2264 * bit. … … 1612 2269 * @returns IPRT status code. 1613 2270 * @param pQuotient Where to return the quotient. 1614 * @param pRemainder Where to return the rem inder.2271 * @param pRemainder Where to return the remainder. 1615 2272 * @param pDividend What to divide. 1616 2273 * @param pDivisor What to divide by. 1617 */ 1618 static int rtBigNumMagnitudeDivide(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor) 2274 * @param fForceLong Force long division. 2275 */ 2276 static int rtBigNumMagnitudeDivide(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor, 2277 bool fForceLong) 1619 2278 { 1620 2279 Assert(pQuotient != pDividend); Assert(pQuotient != pDivisor); Assert(pRemainder != pDividend); Assert(pRemainder != pDivisor); Assert(pRemainder != pQuotient); … … 1661 2320 1662 2321 /* 1663 * Do very simple long division. This ain't fast, but it does the trick. 1664 */ 1665 int rc = VINF_SUCCESS; 1666 uint32_t iBit = rtBigNumMagnitudeBitWidth(pDividend); 1667 while (iBit-- > 0) 1668 { 1669 rc = rtBigNumMagnitudeShiftLeftOne(pRemainder, rtBigNumMagnitudeGetBit(pDividend, iBit)); 1670 AssertRCBreak(rc); 1671 iDiff = rtBigNumMagnitudeCompare(pRemainder, pDivisor); 1672 if (iDiff >= 0) 1673 { 1674 if (iDiff != 0) 1675 { 1676 rc = rtBigNumMagnitudeSubThis(pRemainder, pDivisor); 1677 AssertRCBreak(rc); 2322 * Sort out special cases before going to the preferred or select algorithm. 2323 */ 2324 int rc; 2325 if (pDividend->cUsed <= 2 && !fForceLong) 2326 { 2327 if (pDividend->cUsed < 2) 2328 { 2329 /* 2330 * Single element division. 2331 */ 2332 RTBIGNUMELEMENT uQ = pDividend->pauElements[0] / pDivisor->pauElements[0]; 2333 RTBIGNUMELEMENT uR = pDividend->pauElements[0] % pDivisor->pauElements[0]; 2334 rc = VINF_SUCCESS; 2335 if (uQ) 2336 { 2337 rc = rtBigNumSetUsed(pQuotient, 1); 2338 if (RT_SUCCESS(rc)) 2339 pQuotient->pauElements[0] = uQ; 2340 } 2341 if (uR && RT_SUCCESS(rc)) 2342 { 2343 rc = rtBigNumSetUsed(pRemainder, 1); 2344 if (RT_SUCCESS(rc)) 2345 pRemainder->pauElements[0] = uR; 2346 } 2347 } 2348 else 2349 { 2350 /* 2351 * Two elements dividend by a one or two element divisor. 2352 */ 2353 RTBIGNUMELEMENT2X uQ, uR; 2354 if (pDivisor->cUsed == 1) 2355 { 2356 rtBigNumElement2xDiv2xBy1x(&uQ, &uR.s.Lo, pDividend->pauElements[1], pDividend->pauElements[0], 2357 pDivisor->pauElements[0]); 2358 uR.s.Hi = 0; 1678 2359 } 1679 2360 else 1680 rtBigNumSetUsed(pRemainder, 0); 1681 rc = rtBigNumMagnitudeSetBit(pQuotient, iBit); 1682 AssertRCBreak(rc); 1683 } 1684 } 1685 1686 /* This shouldn't be necessary. */ 1687 rtBigNumStripTrailingZeros(pQuotient); 1688 rtBigNumStripTrailingZeros(pRemainder); 1689 return rc; 1690 } 1691 1692 1693 RTDECL(int) RTBigNumDivide(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor) 2361 rtBigNumElement2xDiv(&uQ, &uR, pDividend->pauElements[1], pDividend->pauElements[0], 2362 pDivisor->pauElements[1], pDivisor->pauElements[0]); 2363 rc = rtBigNumElement2xCopyToMagnitude(&uQ, pQuotient); 2364 if (RT_SUCCESS(rc)) 2365 rc = rtBigNumElement2xCopyToMagnitude(&uR, pRemainder); 2366 } 2367 } 2368 /* 2369 * Decide upon which algorithm to use. Knuth requires a divisor that's at 2370 * least 2 elements big. 2371 */ 2372 else if (pDivisor->cUsed < 2 || fForceLong) 2373 rc = rtBigNumMagnitudeDivideSlowLong(pQuotient, pRemainder, pDividend, pDivisor); 2374 else 2375 rc = rtBigNumMagnitudeDivideKnuth(pQuotient, pRemainder, pDividend, pDivisor); 2376 return rc; 2377 } 2378 2379 2380 static int rtBigNumDivideCommon(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, 2381 PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor, bool fForceLong) 1694 2382 { 1695 2383 Assert(pQuotient != pDividend); Assert(pQuotient != pDivisor); Assert(pRemainder != pDividend); Assert(pRemainder != pDivisor); Assert(pRemainder != pQuotient); … … 1725 2413 pRemainder->fNegative = pDividend->fNegative; 1726 2414 1727 rc = rtBigNumMagnitudeDivide(pQuotient, pRemainder, pDividend, pDivisor );2415 rc = rtBigNumMagnitudeDivide(pQuotient, pRemainder, pDividend, pDivisor, fForceLong); 1728 2416 1729 2417 if (pQuotient->cUsed == 0) … … 1744 2432 1745 2433 2434 RTDECL(int) RTBigNumDivide(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor) 2435 { 2436 return rtBigNumDivideCommon(pQuotient, pRemainder, pDividend, pDivisor, false /*fForceLong*/); 2437 } 2438 2439 2440 RTDECL(int) RTBigNumDivideLong(PRTBIGNUM pQuotient, PRTBIGNUM pRemainder, PCRTBIGNUM pDividend, PCRTBIGNUM pDivisor) 2441 { 2442 return rtBigNumDivideCommon(pQuotient, pRemainder, pDividend, pDivisor, true /*fForceLong*/); 2443 } 2444 2445 1746 2446 /** 1747 2447 * Calculates the modulus of a magnitude value, leaving the sign bit to the … … 1752 2452 * 1753 2453 * @returns IPRT status code. 1754 * @param pRemainder Where to return the rem inder.2454 * @param pRemainder Where to return the remainder. 1755 2455 * @param pDividend What to divide. 1756 2456 * @param pDivisor What to divide by. … … 1794 2494 return VINF_SUCCESS; 1795 2495 1796 /* 1797 * Do very simple long division. This ain't fast, but it does the trick. 1798 */ 2496 /** @todo optimize small numbers. */ 1799 2497 int rc = VINF_SUCCESS; 1800 uint32_t iBit = rtBigNumMagnitudeBitWidth(pDividend); 1801 while (iBit-- > 0) 1802 { 1803 rc = rtBigNumMagnitudeShiftLeftOne(pRemainder, rtBigNumMagnitudeGetBit(pDividend, iBit)); 1804 AssertRCBreak(rc); 1805 iDiff = rtBigNumMagnitudeCompare(pRemainder, pDivisor); 1806 if (iDiff >= 0) 1807 { 1808 if (iDiff != 0) 1809 { 1810 rc = rtBigNumMagnitudeSubThis(pRemainder, pDivisor); 1811 AssertRCBreak(rc); 1812 } 1813 else 1814 rtBigNumSetUsed(pRemainder, 0); 1815 } 2498 if (pDivisor->cUsed < 2) 2499 { 2500 /* 2501 * Do very simple long division. This ain't fast, but it does the trick. 2502 */ 2503 uint32_t iBit = rtBigNumMagnitudeBitWidth(pDividend); 2504 while (iBit-- > 0) 2505 { 2506 rc = rtBigNumMagnitudeShiftLeftOne(pRemainder, rtBigNumMagnitudeGetBit(pDividend, iBit)); 2507 AssertRCBreak(rc); 2508 iDiff = rtBigNumMagnitudeCompare(pRemainder, pDivisor); 2509 if (iDiff >= 0) 2510 { 2511 if (iDiff != 0) 2512 { 2513 rc = rtBigNumMagnitudeSubThis(pRemainder, pDivisor); 2514 AssertRCBreak(rc); 2515 } 2516 else 2517 rtBigNumSetUsed(pRemainder, 0); 2518 } 2519 } 2520 } 2521 else 2522 { 2523 /* 2524 * Join paths with division. 2525 */ 2526 rc = rtBigNumMagnitudeDivideKnuth(NULL, pRemainder, pDividend, pDivisor); 1816 2527 } 1817 2528
Note:
See TracChangeset
for help on using the changeset viewer.