VirtualBox

Changeset 51881 in vbox for trunk/src


Ignore:
Timestamp:
Jul 6, 2014 5:06:46 AM (11 years ago)
Author:
vboxsync
Message:

alt-sha256: Unrolled everything and applied the Ch() and Maj() optimizations from SHA-1. Gains 15-30 depending on the arch and the position of the moon.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/Runtime/common/checksum/alt-sha256.cpp

    r51861 r51881  
    3131/** The SHA-256 block size (in bytes). */
    3232#define RTSHA256_BLOCK_SIZE   64U
     33
     34/** Enables the unrolled code. */
     35#define RTSHA1_UNROLLED 1
    3336
    3437
     
    6871*   Global Variables                                                           *
    6972*******************************************************************************/
     73#ifndef RTSHA1_UNROLLED
    7074/** The K constants */
    7175static uint32_t const g_auKs[] =
     
    8892    UINT32_C(0x90befffa), UINT32_C(0xa4506ceb), UINT32_C(0xbef9a3f7), UINT32_C(0xc67178f2),
    8993};
     94#endif /* !RTSHA1_UNROLLED */
    9095
    9196
     
    109114DECL_FORCE_INLINE(uint32_t) rtSha256Ch(uint32_t uX, uint32_t uY, uint32_t uZ)
    110115{
     116#if 1
     117    /* Optimization that saves one operation and probably a temporary variable. */
     118    uint32_t uResult = uY;
     119    uResult ^= uZ;
     120    uResult &= uX;
     121    uResult ^= uZ;
     122    return uResult;
     123#else
     124    /* The original. */
    111125    uint32_t uResult = uX & uY;
    112126    uResult ^= ~uX & uZ;
    113127    return uResult;
     128#endif
    114129}
    115130
     
    118133DECL_FORCE_INLINE(uint32_t) rtSha256Maj(uint32_t uX, uint32_t uY, uint32_t uZ)
    119134{
     135#if 1
     136    /* Optimization that save one operation and probably a temporary variable. */
     137    uint32_t uResult = uY;
     138    uResult ^= uZ;
     139    uResult &= uX;
     140    uResult ^= uY & uZ;
     141    return uResult;
     142#else
     143    /* The original. */
    120144    uint32_t uResult = uX & uY;
    121145    uResult ^= uX & uZ;
    122146    uResult ^= uY & uZ;
    123147    return uResult;
     148#endif
    124149}
    125150
     
    181206DECLINLINE(void) rtSha256BlockInit(PRTSHA256CONTEXT pCtx, uint8_t const *pbBlock)
    182207{
     208#ifdef RTSHA1_UNROLLED
     209    uint32_t const *puSrc = (uint32_t const *)pbBlock;
     210    uint32_t       *puW   = &pCtx->AltPrivate.auW[0];
     211    Assert(!((uintptr_t)puSrc & 3));
     212    Assert(!((uintptr_t)puW & 3));
     213
     214    /* Copy and byte-swap the block. Initializing the rest of the Ws are done
     215       in the processing loop. */
     216# ifdef RT_LITTLE_ENDIAN
     217    *puW++ = ASMByteSwapU32(*puSrc++);
     218    *puW++ = ASMByteSwapU32(*puSrc++);
     219    *puW++ = ASMByteSwapU32(*puSrc++);
     220    *puW++ = ASMByteSwapU32(*puSrc++);
     221
     222    *puW++ = ASMByteSwapU32(*puSrc++);
     223    *puW++ = ASMByteSwapU32(*puSrc++);
     224    *puW++ = ASMByteSwapU32(*puSrc++);
     225    *puW++ = ASMByteSwapU32(*puSrc++);
     226
     227    *puW++ = ASMByteSwapU32(*puSrc++);
     228    *puW++ = ASMByteSwapU32(*puSrc++);
     229    *puW++ = ASMByteSwapU32(*puSrc++);
     230    *puW++ = ASMByteSwapU32(*puSrc++);
     231
     232    *puW++ = ASMByteSwapU32(*puSrc++);
     233    *puW++ = ASMByteSwapU32(*puSrc++);
     234    *puW++ = ASMByteSwapU32(*puSrc++);
     235    *puW++ = ASMByteSwapU32(*puSrc++);
     236# else
     237    memcpy(puW, puSrc, RTSHA1_BLOCK_SIZE);
     238# endif
     239
     240#else  /* !RTSHA1_UNROLLED */
    183241    uint32_t const *pu32Block = (uint32_t const *)pbBlock;
    184242    Assert(!((uintptr_t)pu32Block & 3));
     
    196254        pCtx->AltPrivate.auW[iWord] = u32;
    197255    }
     256#endif /* !RTSHA1_UNROLLED */
    198257}
    199258
     
    206265DECLINLINE(void) rtSha256BlockInitBuffered(PRTSHA256CONTEXT pCtx)
    207266{
     267#ifdef RTSHA1_UNROLLED
     268    uint32_t       *puW   = &pCtx->AltPrivate.auW[0];
     269    Assert(!((uintptr_t)puW & 3));
     270
     271    /* Do the byte swap if necessary. Initializing the rest of the Ws are done
     272       in the processing loop. */
     273# ifdef RT_LITTLE_ENDIAN
     274    *puW = ASMByteSwapU32(*puW); puW++;
     275    *puW = ASMByteSwapU32(*puW); puW++;
     276    *puW = ASMByteSwapU32(*puW); puW++;
     277    *puW = ASMByteSwapU32(*puW); puW++;
     278
     279    *puW = ASMByteSwapU32(*puW); puW++;
     280    *puW = ASMByteSwapU32(*puW); puW++;
     281    *puW = ASMByteSwapU32(*puW); puW++;
     282    *puW = ASMByteSwapU32(*puW); puW++;
     283
     284    *puW = ASMByteSwapU32(*puW); puW++;
     285    *puW = ASMByteSwapU32(*puW); puW++;
     286    *puW = ASMByteSwapU32(*puW); puW++;
     287    *puW = ASMByteSwapU32(*puW); puW++;
     288
     289    *puW = ASMByteSwapU32(*puW); puW++;
     290    *puW = ASMByteSwapU32(*puW); puW++;
     291    *puW = ASMByteSwapU32(*puW); puW++;
     292    *puW = ASMByteSwapU32(*puW); puW++;
     293# endif
     294
     295#else  /* !RTSHA1_UNROLLED */
    208296    unsigned iWord;
    209297    for (iWord = 0; iWord < 16; iWord++)
     
    218306        pCtx->AltPrivate.auW[iWord] = u32;
    219307    }
     308#endif /* !RTSHA1_UNROLLED */
    220309}
    221310
     
    239328    uint32_t uH = pCtx->AltPrivate.auH[7];
    240329
     330#ifdef RTSHA1_UNROLLED
     331    uint32_t *puW = &pCtx->AltPrivate.auW[0];
     332# define RTSHA256_BODY(a_iWord, a_uK, a_uA, a_uB, a_uC, a_uD, a_uE, a_uF, a_uG, a_uH) \
     333        do { \
     334            if ((a_iWord) < 16) \
     335                a_uH += *puW++; \
     336            else \
     337            { \
     338                uint32_t u32 = puW[-16]; \
     339                u32 += rtSha256SmallSigma0(puW[-15]); \
     340                u32 += puW[-7]; \
     341                u32 += rtSha256SmallSigma1(puW[-2]); \
     342                if (a_iWord < 64-2) *puW++ = u32; else puW++; \
     343                a_uH += u32; \
     344            } \
     345            \
     346            a_uH += rtSha256CapitalSigma1(a_uE); \
     347            a_uH += a_uK; \
     348            a_uH += rtSha256Ch(a_uE, a_uF, a_uG); \
     349            a_uD += a_uH; \
     350            \
     351            a_uH += rtSha256CapitalSigma0(a_uA); \
     352            a_uH += rtSha256Maj(a_uA, a_uB, a_uC); \
     353        } while (0)
     354# define RTSHA256_EIGHT(a_uK0, a_uK1, a_uK2, a_uK3, a_uK4, a_uK5, a_uK6, a_uK7, a_iFirst) \
     355        do { \
     356            RTSHA256_BODY(a_iFirst + 0, a_uK0, uA, uB, uC, uD, uE, uF, uG, uH); \
     357            RTSHA256_BODY(a_iFirst + 1, a_uK1, uH, uA, uB, uC, uD, uE, uF, uG); \
     358            RTSHA256_BODY(a_iFirst + 2, a_uK2, uG, uH, uA, uB, uC, uD, uE, uF); \
     359            RTSHA256_BODY(a_iFirst + 3, a_uK3, uF, uG, uH, uA, uB, uC, uD, uE); \
     360            RTSHA256_BODY(a_iFirst + 4, a_uK4, uE, uF, uG, uH, uA, uB, uC, uD); \
     361            RTSHA256_BODY(a_iFirst + 5, a_uK5, uD, uE, uF, uG, uH, uA, uB, uC); \
     362            RTSHA256_BODY(a_iFirst + 6, a_uK6, uC, uD, uE, uF, uG, uH, uA, uB); \
     363            RTSHA256_BODY(a_iFirst + 7, a_uK7, uB, uC, uD, uE, uF, uG, uH, uA); \
     364        } while (0)
     365    RTSHA256_EIGHT(UINT32_C(0x428a2f98), UINT32_C(0x71374491), UINT32_C(0xb5c0fbcf), UINT32_C(0xe9b5dba5),
     366                   UINT32_C(0x3956c25b), UINT32_C(0x59f111f1), UINT32_C(0x923f82a4), UINT32_C(0xab1c5ed5), 0);
     367    RTSHA256_EIGHT(UINT32_C(0xd807aa98), UINT32_C(0x12835b01), UINT32_C(0x243185be), UINT32_C(0x550c7dc3),
     368                   UINT32_C(0x72be5d74), UINT32_C(0x80deb1fe), UINT32_C(0x9bdc06a7), UINT32_C(0xc19bf174), 8);
     369    RTSHA256_EIGHT(UINT32_C(0xe49b69c1), UINT32_C(0xefbe4786), UINT32_C(0x0fc19dc6), UINT32_C(0x240ca1cc),
     370                   UINT32_C(0x2de92c6f), UINT32_C(0x4a7484aa), UINT32_C(0x5cb0a9dc), UINT32_C(0x76f988da), 16);
     371    RTSHA256_EIGHT(UINT32_C(0x983e5152), UINT32_C(0xa831c66d), UINT32_C(0xb00327c8), UINT32_C(0xbf597fc7),
     372                   UINT32_C(0xc6e00bf3), UINT32_C(0xd5a79147), UINT32_C(0x06ca6351), UINT32_C(0x14292967), 24);
     373    RTSHA256_EIGHT(UINT32_C(0x27b70a85), UINT32_C(0x2e1b2138), UINT32_C(0x4d2c6dfc), UINT32_C(0x53380d13),
     374                   UINT32_C(0x650a7354), UINT32_C(0x766a0abb), UINT32_C(0x81c2c92e), UINT32_C(0x92722c85), 32);
     375    RTSHA256_EIGHT(UINT32_C(0xa2bfe8a1), UINT32_C(0xa81a664b), UINT32_C(0xc24b8b70), UINT32_C(0xc76c51a3),
     376                   UINT32_C(0xd192e819), UINT32_C(0xd6990624), UINT32_C(0xf40e3585), UINT32_C(0x106aa070), 40);
     377    RTSHA256_EIGHT(UINT32_C(0x19a4c116), UINT32_C(0x1e376c08), UINT32_C(0x2748774c), UINT32_C(0x34b0bcb5),
     378                   UINT32_C(0x391c0cb3), UINT32_C(0x4ed8aa4a), UINT32_C(0x5b9cca4f), UINT32_C(0x682e6ff3), 48);
     379    RTSHA256_EIGHT(UINT32_C(0x748f82ee), UINT32_C(0x78a5636f), UINT32_C(0x84c87814), UINT32_C(0x8cc70208),
     380                   UINT32_C(0x90befffa), UINT32_C(0xa4506ceb), UINT32_C(0xbef9a3f7), UINT32_C(0xc67178f2), 56);
     381
     382#else  /* !RTSHA1_UNROLLED */
    241383    for (unsigned iWord = 0; iWord < RT_ELEMENTS(pCtx->AltPrivate.auW); iWord++)
    242384    {
     
    259401        uA = uT1 + uT2;
    260402    }
     403#endif /* !RTSHA1_UNROLLED */
    261404
    262405    pCtx->AltPrivate.auH[0] += uA;
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette