VirtualBox

Changeset 51884 in vbox for trunk/src


Ignore:
Timestamp:
Jul 6, 2014 4:46:08 PM (11 years ago)
Author:
vboxsync
Message:

alt-sha256: Tiny bswap optimization for 64-bit CPUs.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/Runtime/common/checksum/alt-sha256.cpp

    r51883 r51884  
    202202 *
    203203 * @param   pCtx                The SHA-256 context.
    204  * @param   pbBlock             The block.  Must be 32-bit aligned.
     204 * @param   pbBlock             The block.  Must be arch-bit-width aligned.
    205205 */
    206206DECLINLINE(void) rtSha256BlockInit(PRTSHA256CONTEXT pCtx, uint8_t const *pbBlock)
    207207{
    208208#ifdef RTSHA256_UNROLLED
     209    /* Copy and byte-swap the block. Initializing the rest of the Ws are done
     210       in the processing loop. */
     211# ifdef RT_LITTLE_ENDIAN
     212#  if ARCH_BITS == 64
     213    uint64_t const *puSrc = (uint64_t const *)pbBlock;
     214    uint64_t       *puW   = (uint64_t *)&pCtx->AltPrivate.auW[0];
     215    Assert(!((uintptr_t)puSrc & 7));
     216    Assert(!((uintptr_t)puW & 7));
     217
     218    /* b0 b1 b2 b3  b4 b5 b6 b7 --bwap--> b7 b6 b5 b4 b3 b2 b1 b0 --ror--> b3 b2 b1 b0  b7 b6 b5 b4; */
     219    *puW++ = ASMRotateRightU64(ASMByteSwapU64(*puSrc++), 32);
     220    *puW++ = ASMRotateRightU64(ASMByteSwapU64(*puSrc++), 32);
     221    *puW++ = ASMRotateRightU64(ASMByteSwapU64(*puSrc++), 32);
     222    *puW++ = ASMRotateRightU64(ASMByteSwapU64(*puSrc++), 32);
     223
     224    *puW++ = ASMRotateRightU64(ASMByteSwapU64(*puSrc++), 32);
     225    *puW++ = ASMRotateRightU64(ASMByteSwapU64(*puSrc++), 32);
     226    *puW++ = ASMRotateRightU64(ASMByteSwapU64(*puSrc++), 32);
     227    *puW++ = ASMRotateRightU64(ASMByteSwapU64(*puSrc++), 32);
     228
     229#  else
    209230    uint32_t const *puSrc = (uint32_t const *)pbBlock;
    210231    uint32_t       *puW   = &pCtx->AltPrivate.auW[0];
     
    212233    Assert(!((uintptr_t)puW & 3));
    213234
    214     /* Copy and byte-swap the block. Initializing the rest of the Ws are done
    215        in the processing loop. */
    216 # ifdef RT_LITTLE_ENDIAN
    217     *puW++ = ASMByteSwapU32(*puSrc++);
    218     *puW++ = ASMByteSwapU32(*puSrc++);
    219     *puW++ = ASMByteSwapU32(*puSrc++);
    220     *puW++ = ASMByteSwapU32(*puSrc++);
    221 
    222     *puW++ = ASMByteSwapU32(*puSrc++);
    223     *puW++ = ASMByteSwapU32(*puSrc++);
    224     *puW++ = ASMByteSwapU32(*puSrc++);
    225     *puW++ = ASMByteSwapU32(*puSrc++);
    226 
    227     *puW++ = ASMByteSwapU32(*puSrc++);
    228     *puW++ = ASMByteSwapU32(*puSrc++);
    229     *puW++ = ASMByteSwapU32(*puSrc++);
    230     *puW++ = ASMByteSwapU32(*puSrc++);
    231 
    232     *puW++ = ASMByteSwapU32(*puSrc++);
    233     *puW++ = ASMByteSwapU32(*puSrc++);
    234     *puW++ = ASMByteSwapU32(*puSrc++);
    235     *puW++ = ASMByteSwapU32(*puSrc++);
    236 # else
    237     memcpy(puW, puSrc, RTSHA256_BLOCK_SIZE);
    238 # endif
     235    *puW++ = ASMByteSwapU32(*puSrc++);
     236    *puW++ = ASMByteSwapU32(*puSrc++);
     237    *puW++ = ASMByteSwapU32(*puSrc++);
     238    *puW++ = ASMByteSwapU32(*puSrc++);
     239
     240    *puW++ = ASMByteSwapU32(*puSrc++);
     241    *puW++ = ASMByteSwapU32(*puSrc++);
     242    *puW++ = ASMByteSwapU32(*puSrc++);
     243    *puW++ = ASMByteSwapU32(*puSrc++);
     244
     245    *puW++ = ASMByteSwapU32(*puSrc++);
     246    *puW++ = ASMByteSwapU32(*puSrc++);
     247    *puW++ = ASMByteSwapU32(*puSrc++);
     248    *puW++ = ASMByteSwapU32(*puSrc++);
     249
     250    *puW++ = ASMByteSwapU32(*puSrc++);
     251    *puW++ = ASMByteSwapU32(*puSrc++);
     252    *puW++ = ASMByteSwapU32(*puSrc++);
     253    *puW++ = ASMByteSwapU32(*puSrc++);
     254#  endif
     255# else  /* RT_BIG_ENDIAN */
     256    memcpy(&pCtx->AltPrivate.auW[0], pbBlock, RTSHA256_BLOCK_SIZE);
     257# endif /* RT_BIG_ENDIAN */
    239258
    240259#else  /* !RTSHA256_UNROLLED */
     
    266285{
    267286#ifdef RTSHA256_UNROLLED
    268     uint32_t       *puW   = &pCtx->AltPrivate.auW[0];
    269     Assert(!((uintptr_t)puW & 3));
    270 
    271287    /* Do the byte swap if necessary. Initializing the rest of the Ws are done
    272288       in the processing loop. */
    273289# ifdef RT_LITTLE_ENDIAN
    274     *puW = ASMByteSwapU32(*puW); puW++;
    275     *puW = ASMByteSwapU32(*puW); puW++;
    276     *puW = ASMByteSwapU32(*puW); puW++;
    277     *puW = ASMByteSwapU32(*puW); puW++;
    278 
    279     *puW = ASMByteSwapU32(*puW); puW++;
    280     *puW = ASMByteSwapU32(*puW); puW++;
    281     *puW = ASMByteSwapU32(*puW); puW++;
    282     *puW = ASMByteSwapU32(*puW); puW++;
    283 
    284     *puW = ASMByteSwapU32(*puW); puW++;
    285     *puW = ASMByteSwapU32(*puW); puW++;
    286     *puW = ASMByteSwapU32(*puW); puW++;
    287     *puW = ASMByteSwapU32(*puW); puW++;
    288 
    289     *puW = ASMByteSwapU32(*puW); puW++;
    290     *puW = ASMByteSwapU32(*puW); puW++;
    291     *puW = ASMByteSwapU32(*puW); puW++;
    292     *puW = ASMByteSwapU32(*puW); puW++;
     290#  if ARCH_BITS == 64
     291    uint64_t *puW = (uint64_t *)&pCtx->AltPrivate.auW[0];
     292    Assert(!((uintptr_t)puW & 7));
     293    /* b0 b1 b2 b3  b4 b5 b6 b7 --bwap--> b7 b6 b5 b4 b3 b2 b1 b0 --ror--> b3 b2 b1 b0  b7 b6 b5 b4; */
     294    *puW = ASMRotateRightU64(ASMByteSwapU64(*puW), 32); puW++;
     295    *puW = ASMRotateRightU64(ASMByteSwapU64(*puW), 32); puW++;
     296    *puW = ASMRotateRightU64(ASMByteSwapU64(*puW), 32); puW++;
     297    *puW = ASMRotateRightU64(ASMByteSwapU64(*puW), 32); puW++;
     298
     299    *puW = ASMRotateRightU64(ASMByteSwapU64(*puW), 32); puW++;
     300    *puW = ASMRotateRightU64(ASMByteSwapU64(*puW), 32); puW++;
     301    *puW = ASMRotateRightU64(ASMByteSwapU64(*puW), 32); puW++;
     302    *puW = ASMRotateRightU64(ASMByteSwapU64(*puW), 32); puW++;
     303
     304#  else
     305    uint32_t *puW = &pCtx->AltPrivate.auW[0];
     306    Assert(!((uintptr_t)puW & 3));
     307
     308    *puW = ASMByteSwapU32(*puW); puW++;
     309    *puW = ASMByteSwapU32(*puW); puW++;
     310    *puW = ASMByteSwapU32(*puW); puW++;
     311    *puW = ASMByteSwapU32(*puW); puW++;
     312
     313    *puW = ASMByteSwapU32(*puW); puW++;
     314    *puW = ASMByteSwapU32(*puW); puW++;
     315    *puW = ASMByteSwapU32(*puW); puW++;
     316    *puW = ASMByteSwapU32(*puW); puW++;
     317
     318    *puW = ASMByteSwapU32(*puW); puW++;
     319    *puW = ASMByteSwapU32(*puW); puW++;
     320    *puW = ASMByteSwapU32(*puW); puW++;
     321    *puW = ASMByteSwapU32(*puW); puW++;
     322
     323    *puW = ASMByteSwapU32(*puW); puW++;
     324    *puW = ASMByteSwapU32(*puW); puW++;
     325    *puW = ASMByteSwapU32(*puW); puW++;
     326    *puW = ASMByteSwapU32(*puW); puW++;
     327#  endif
    293328# endif
    294329
     
    444479    }
    445480
    446     if (!((uintptr_t)pbBuf & 3))
     481    if (!((uintptr_t)pbBuf & (sizeof(void *) - 1)))
    447482    {
    448483        /*
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette