VirtualBox

Changeset 85638 in vbox for trunk


Ignore:
Timestamp:
Aug 6, 2020 4:17:54 PM (5 years ago)
Author:
vboxsync
Message:

IPRT/sha3: Some VS2019 performance tweaks. bugref:9734

Location:
trunk/src/VBox/Runtime
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/Runtime/common/checksum/alt-sha3.cpp

    r85624 r85638  
    3333
    3434/** @def RTSHA3_FULL_UNROLL
    35  * Do full loop unrolling unless we're using VS2019 as it seems to degrate
    36  * performances there for some reason.  With gcc 10.2.1 on a recent Intel system
    37  * (10890XE), this results SHA3-512 throughput (tstRTDigest-2) increasing from
    38  * 83532 KiB/s to 194942 KiB/s against a text size jump from 5913 to 6929 bytes.
     35 * Do full loop unrolling.
     36 *
     37 * With gcc 10.2.1 on a recent Intel system (10890XE), this results SHA3-512
     38 * throughput (tstRTDigest-2) increasing from 83532 KiB/s to 194942 KiB/s
     39 * against a text size jump from 5913 to 6929 bytes, i.e. +1016 bytes.
     40 *
     41 * With VS2019 on a half decent AMD system (3990X), this results in SHA3-512
     42 * speedup from 147676 KiB/s to about 192770 KiB/s.  The text cost is +612 bytes
     43 * (4496 to 5108).  When disabling the unrolling of Rho+Pi we get a little
     44 * increase 196591 KiB/s (+3821) for some reason, saving 22 bytes of code.
    3945 *
    4046 * For comparison, openssl 1.1.1g assembly code (AMD64) achives 264915 KiB/s,
     
    4248 * KECCAK_2X without ROL optimizations (they improve it to 203493 KiB/s).
    4349 */
    44 #if !defined(_MSC_VER) || defined(DOXYGEN_RUNNING)
     50#if !defined(IN_SUP_HARDENED_R3) || defined(DOXYGEN_RUNNING)
    4551# define RTSHA3_FULL_UNROLL
    4652#endif
     
    147153         */
    148154        {
    149 #ifndef RTSHA3_FULL_UNROLL
     155#if !defined(RTSHA3_FULL_UNROLL) || defined(_MSC_VER) /* VS2019 is slightly slow with this section unrolled. go figure */
    150156            static uint8_t const s_aidxState[] = {10,7,11,17,18,  3, 5,16, 8,21, 24, 4,15,23,19, 13,12, 2,20,14, 22, 9, 6, 1};
    151157            static uint8_t const s_acRotate[]  = { 1,3, 6,10,15, 21,28,36,45,55,  2,14,27,41,56,  8,25,43,62,18, 39,61,20,44};
  • trunk/src/VBox/Runtime/testcase/tstRTDigest-2.cpp

    r85614 r85638  
    20482048}
    20492049
    2050 int main()
     2050
     2051static unsigned checkArgs(int cArgs, char **papszArgs, const char *pszName, const char *pszFamily)
     2052{
     2053    if (cArgs <= 1)
     2054        return 1;
     2055    size_t const cchName   = strlen(pszName);
     2056    size_t const cchFamily = strlen(pszFamily);
     2057    for (int i = 1; i < cArgs; i++)
     2058    {
     2059        const char  *pszArg = papszArgs[i];
     2060        const char  *pszSep = strpbrk(pszArg, ":=");
     2061        size_t const cchCur = pszSep ? (size_t)(pszSep - pszArg) : strlen(pszArg);
     2062        if (   (cchCur == cchName   && RTStrNICmp(pszArg, pszName, cchCur) == 0)
     2063            || (cchCur == cchFamily && RTStrNICmp(pszArg, pszFamily, cchCur) == 0) )
     2064        {
     2065            if (!pszSep || pszSep[1] == '\0')
     2066                return 1;
     2067            return RTStrToUInt32(pszSep + 1);
     2068        }
     2069    }
     2070    return 0;
     2071}
     2072
     2073
     2074int main(int argc, char **argv)
    20512075{
    20522076    RTTEST hTest;
    2053     int rc = RTTestInitAndCreate("tstRTDigest-2", &hTest);
    2054     if (rc)
    2055         return rc;
     2077    RTEXITCODE rcExit = RTTestInitExAndCreate(argc, &argv, 0, "tstRTDigest-2", &hTest);
     2078    if (rcExit != RTEXITCODE_SUCCESS)
     2079        return rcExit;
    20562080    RTTestBanner(hTest);
    20572081
    2058     testMd2();
    2059     testMd4();
    2060     testMd5();
    2061     testSha1();
    2062     testSha256();
    2063     testSha224();
    2064     testSha512();
    2065     testSha384();
     2082#define DO(a_szName, a_szFamily, a_fnTestExpr) do { \
     2083            unsigned const cTimes = checkArgs(argc, argv, a_szName, a_szFamily); \
     2084            for (unsigned i = 0; i < cTimes; i++) { a_fnTestExpr; } \
     2085        } while (0)
     2086    DO("MD2",    "MD",   testMd2());
     2087    DO("MD4",    "MD",   testMd4());
     2088    DO("MD5",    "MD",   testMd5());
     2089    DO("SHA1",   "SHA",  testSha1());
     2090    DO("SHA256", "SHA2", testSha256());
     2091    DO("SHA224", "SHA2", testSha224());
     2092    DO("SHA512", "SHA2", testSha512());
     2093    DO("SHA384", "SHA2", testSha384());
    20662094#ifndef IPRT_WITHOUT_SHA512T224
    2067     testSha512t224();
     2095    DO("SHA512T224", "SHA2", testSha512t224());
    20682096#endif
    20692097#ifndef IPRT_WITHOUT_SHA512T256
    2070     testSha512t256();
     2098    DO("SHA512T256", "SHA2", testSha512t256());
    20712099#endif
    2072     testSha3_224();
    2073     testSha3_256();
    2074     testSha3_384();
    2075     testSha3_512();
     2100    DO("SHA3-224", "SHA3", testSha3_224());
     2101    DO("SHA3-256", "SHA3", testSha3_256());
     2102    DO("SHA3-384", "SHA3", testSha3_384());
     2103    DO("SHA3-512", "SHA3", testSha3_512());
    20762104
    20772105    return RTTestSummaryAndDestroy(hTest);
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette