Changeset 51882 in vbox
- Timestamp:
- Jul 6, 2014 1:07:55 PM (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/Runtime/common/checksum/alt-sha1.cpp
r51880 r51882 32 32 #define RTSHA1_BLOCK_SIZE 64U 33 33 34 /** Enables the unrolled init code. */ 35 #define RTSHA1_UNROLLED_INIT 1 36 /** Enables the fully unrolled block processing code. */ 37 #define RTSHA1_FULLY_UNROLLED_BLOCK_PROCESSING 1 34 /** Enables the unrolled code. */ 35 #define RTSHA1_UNROLLED 1 38 36 39 37 … … 93 91 DECLINLINE(void) rtSha1BlockInit(PRTSHA1CONTEXT pCtx, uint8_t const *pbBlock) 94 92 { 95 #ifdef RTSHA1_UNROLLED _INIT93 #ifdef RTSHA1_UNROLLED 96 94 uint32_t const *puSrc = (uint32_t const *)pbBlock; 97 95 uint32_t *puW = &pCtx->AltPrivate.auW[0]; … … 99 97 Assert(!((uintptr_t)puW & 3)); 100 98 101 /* Copy and byte-swap the block. */ 99 /* Copy and byte-swap the block. Initializing the rest of the Ws are done 100 in the processing loop. */ 102 101 # ifdef RT_LITTLE_ENDIAN 103 uint32_t uS1; 104 *puW++ = uS1 = ASMByteSwapU32(*puSrc++); 105 uint32_t uS2; 106 *puW++ = uS2 = ASMByteSwapU32(*puSrc++); 102 *puW++ = ASMByteSwapU32(*puSrc++); 103 *puW++ = ASMByteSwapU32(*puSrc++); 107 104 *puW++ = ASMByteSwapU32(*puSrc++); 108 105 *puW++ = ASMByteSwapU32(*puSrc++); … … 124 121 # else 125 122 memcpy(puW, puSrc, RTSHA1_BLOCK_SIZE); 126 uint32_t uS1 = puW[-16];127 uint32_t uS2 = puW[-15];128 123 # endif 129 124 130 /* Initialize W16...W79.*/ 131 /** The uS1/uS2 trick here doesn't save much, but it might shave a little bit 132 * off and we've got enough registers for it on AMD64. */ 133 # define RTSHA1_HIGH_INIT_TWO() \ 134 do { \ 135 u32 = uS1; /*puW[-16];*/ \ 136 u32 ^= uS1 = puW[-14]; \ 137 u32 ^= puW[ -8]; \ 138 u32 ^= puW[ -3]; \ 139 *puW++ = ASMRotateLeftU32(u32, 1); \ 140 \ 141 u32 = uS2; /*puW[-16];*/ \ 142 u32 ^= uS2 = puW[-14]; \ 143 u32 ^= puW[ -8]; \ 144 u32 ^= puW[ -3]; \ 145 *puW++ = ASMRotateLeftU32(u32, 1); \ 146 } while (0) 147 # define RTSHA1_HIGH_INIT_EIGHT() \ 148 RTSHA1_HIGH_INIT_TWO(); RTSHA1_HIGH_INIT_TWO(); RTSHA1_HIGH_INIT_TWO(); RTSHA1_HIGH_INIT_TWO() 149 150 /** This is a variation on the standard one which have some better alignment 151 * properties (no -3 access), but probably more importantly, access memory 152 * we've accessed before by going futher back. */ 153 # define RTSHA1_HIGH_INIT_ONE_HIGH() \ 154 do { \ 155 u32 = puW[-32]; \ 156 u32 ^= puW[-28]; \ 157 u32 ^= puW[-16]; \ 158 u32 ^= puW[ -6]; \ 159 *puW++ = ASMRotateLeftU32(u32, 2); \ 160 } while (0) 161 # define RTSHA1_HIGH_INIT_EIGHT_HIGH() \ 162 RTSHA1_HIGH_INIT_ONE_HIGH(); RTSHA1_HIGH_INIT_ONE_HIGH(); RTSHA1_HIGH_INIT_ONE_HIGH(); RTSHA1_HIGH_INIT_ONE_HIGH(); \ 163 RTSHA1_HIGH_INIT_ONE_HIGH(); RTSHA1_HIGH_INIT_ONE_HIGH(); RTSHA1_HIGH_INIT_ONE_HIGH(); RTSHA1_HIGH_INIT_ONE_HIGH() 164 165 uint32_t u32; 166 RTSHA1_HIGH_INIT_EIGHT(); 167 RTSHA1_HIGH_INIT_EIGHT(); 168 RTSHA1_HIGH_INIT_EIGHT(); 169 RTSHA1_HIGH_INIT_EIGHT(); 170 171 RTSHA1_HIGH_INIT_EIGHT_HIGH(); 172 RTSHA1_HIGH_INIT_EIGHT_HIGH(); 173 RTSHA1_HIGH_INIT_EIGHT_HIGH(); 174 RTSHA1_HIGH_INIT_EIGHT_HIGH(); 175 176 #else /* !RTSHA1_UNROLLED_INIT */ 125 #else /* !RTSHA1_UNROLLED */ 177 126 uint32_t const *pu32Block = (uint32_t const *)pbBlock; 178 127 Assert(!((uintptr_t)pu32Block & 3)); … … 190 139 pCtx->AltPrivate.auW[iWord] = ASMRotateLeftU32(u32, 1); 191 140 } 192 #endif /* !RTSHA1_UNROLLED _INIT*/141 #endif /* !RTSHA1_UNROLLED */ 193 142 } 194 143 … … 201 150 DECLINLINE(void) rtSha1BlockInitBuffered(PRTSHA1CONTEXT pCtx) 202 151 { 203 #ifdef RTSHA1_UNROLLED _INIT152 #ifdef RTSHA1_UNROLLED 204 153 uint32_t *puW = &pCtx->AltPrivate.auW[0]; 205 154 Assert(!((uintptr_t)puW & 3)); 206 155 156 /* Do the byte swap if necessary. Initializing the rest of the Ws are done 157 in the processing loop. */ 207 158 # ifdef RT_LITTLE_ENDIAN 208 /* Do the byte swap. */ 209 uint32_t uS1; 210 *puW = uS1 = ASMByteSwapU32(*puW); puW++; 211 uint32_t uS2; 212 *puW = uS2 = ASMByteSwapU32(*puW); puW++; 213 *puW = ASMByteSwapU32(*puW); puW++; 214 *puW = ASMByteSwapU32(*puW); puW++; 215 216 *puW = ASMByteSwapU32(*puW); puW++; 217 *puW = ASMByteSwapU32(*puW); puW++; 218 *puW = ASMByteSwapU32(*puW); puW++; 219 *puW = ASMByteSwapU32(*puW); puW++; 220 221 *puW = ASMByteSwapU32(*puW); puW++; 222 *puW = ASMByteSwapU32(*puW); puW++; 223 *puW = ASMByteSwapU32(*puW); puW++; 224 *puW = ASMByteSwapU32(*puW); puW++; 225 226 *puW = ASMByteSwapU32(*puW); puW++; 227 *puW = ASMByteSwapU32(*puW); puW++; 228 *puW = ASMByteSwapU32(*puW); puW++; 229 *puW = ASMByteSwapU32(*puW); puW++; 230 # else 231 uint32_t uS1 = puW[-16]; 232 uint32_t uS2 = puW[-15]; 159 *puW = ASMByteSwapU32(*puW); puW++; 160 *puW = ASMByteSwapU32(*puW); puW++; 161 *puW = ASMByteSwapU32(*puW); puW++; 162 *puW = ASMByteSwapU32(*puW); puW++; 163 164 *puW = ASMByteSwapU32(*puW); puW++; 165 *puW = ASMByteSwapU32(*puW); puW++; 166 *puW = ASMByteSwapU32(*puW); puW++; 167 *puW = ASMByteSwapU32(*puW); puW++; 168 169 *puW = ASMByteSwapU32(*puW); puW++; 170 *puW = ASMByteSwapU32(*puW); puW++; 171 *puW = ASMByteSwapU32(*puW); puW++; 172 *puW = ASMByteSwapU32(*puW); puW++; 173 174 *puW = ASMByteSwapU32(*puW); puW++; 175 *puW = ASMByteSwapU32(*puW); puW++; 176 *puW = ASMByteSwapU32(*puW); puW++; 177 *puW = ASMByteSwapU32(*puW); puW++; 233 178 # endif 234 235 /* Initialize W16...W79. */236 uint32_t u32;237 RTSHA1_HIGH_INIT_EIGHT();238 RTSHA1_HIGH_INIT_EIGHT();239 RTSHA1_HIGH_INIT_EIGHT();240 RTSHA1_HIGH_INIT_EIGHT();241 242 RTSHA1_HIGH_INIT_EIGHT_HIGH();243 RTSHA1_HIGH_INIT_EIGHT_HIGH();244 RTSHA1_HIGH_INIT_EIGHT_HIGH();245 RTSHA1_HIGH_INIT_EIGHT_HIGH();246 179 247 180 #else /* !RTSHA1_UNROLLED_INIT */ … … 326 259 uint32_t uE = pCtx->AltPrivate.auH[4]; 327 260 328 #ifdef RTSHA1_ FULLY_UNROLLED_BLOCK_PROCESSING261 #ifdef RTSHA1_UNROLLED 329 262 /* This fully unrolled version will avoid the variable rotation by 330 263 embedding it into the loop unrolling. */ 331 uint32_t const*puW = &pCtx->AltPrivate.auW[0];332 # define SHA1_BODY(a_ uW, a_uK, a_fnFt, a_uA, a_uB, a_uC, a_uD, a_uE) \264 uint32_t *puW = &pCtx->AltPrivate.auW[0]; 265 # define SHA1_BODY(a_iWord, a_uK, a_fnFt, a_uA, a_uB, a_uC, a_uD, a_uE) \ 333 266 do { \ 334 a_uE += a_uW; \ 267 if (a_iWord < 16) \ 268 a_uE += *puW++; \ 269 else \ 270 { \ 271 uint32_t u32 = puW[-16]; \ 272 u32 ^= puW[-14]; \ 273 u32 ^= puW[-8]; \ 274 u32 ^= puW[-3]; \ 275 u32 = ASMRotateLeftU32(u32, 1); \ 276 *puW++ = u32; \ 277 a_uE += u32; \ 278 } \ 335 279 a_uE += (a_uK); \ 336 280 a_uE += ASMRotateLeftU32(a_uA, 5); \ … … 338 282 a_uB = ASMRotateLeftU32(a_uB, 30); \ 339 283 } while (0) 340 # define FIVE_ITERATIONS(a_i Start, a_uK, a_fnFt) \284 # define FIVE_ITERATIONS(a_iFirst, a_uK, a_fnFt) \ 341 285 do { \ 342 SHA1_BODY( /*puW[a_iStart + 0]*/ *puW++, a_uK, a_fnFt, uA, uB, uC, uD, uE); \343 SHA1_BODY( /*puW[a_iStart + 1]*/ *puW++, a_uK, a_fnFt, uE, uA, uB, uC, uD); \344 SHA1_BODY( /*puW[a_iStart + 2]*/ *puW++, a_uK, a_fnFt, uD, uE, uA, uB, uC); \345 SHA1_BODY( /*puW[a_iStart + 3]*/ *puW++, a_uK, a_fnFt, uC, uD, uE, uA, uB); \346 SHA1_BODY( /*puW[a_iStart + 4]*/ *puW++, a_uK, a_fnFt, uB, uC, uD, uE, uA); \286 SHA1_BODY(a_iFirst + 0, a_uK, a_fnFt, uA, uB, uC, uD, uE); \ 287 SHA1_BODY(a_iFirst + 1, a_uK, a_fnFt, uE, uA, uB, uC, uD); \ 288 SHA1_BODY(a_iFirst + 2, a_uK, a_fnFt, uD, uE, uA, uB, uC); \ 289 SHA1_BODY(a_iFirst + 3, a_uK, a_fnFt, uC, uD, uE, uA, uB); \ 290 SHA1_BODY(a_iFirst + 4, a_uK, a_fnFt, uB, uC, uD, uE, uA); \ 347 291 } while (0) 348 # if 0 /* Variation that reduces the code size by a factor of 4 without much loss in preformance. */ 349 # define TWENTY_ITERATIONS(a_iFirst, a_uK, a_fnFt) \ 350 do { unsigned i = 4; while (i-- > 0) FIVE_ITERATIONS(a_iFirst + (3 - i) * 5, a_uK, a_fnFt); } while (0) 351 /*for (unsigned i = a_iFirst; i < (a_iFirst + 20); i += 5) FIVE_ITERATIONS(i, a_uK, a_fnFt);*/ 352 # else 353 # define TWENTY_ITERATIONS(a_iFirst, a_uK, a_fnFt) \ 292 # define TWENTY_ITERATIONS(a_iStart, a_uK, a_fnFt) \ 354 293 do { \ 355 FIVE_ITERATIONS(a_i First + 0, a_uK, a_fnFt); \356 FIVE_ITERATIONS(a_i First + 5, a_uK, a_fnFt); \357 FIVE_ITERATIONS(a_i First + 10, a_uK, a_fnFt); \358 FIVE_ITERATIONS(a_i First + 15, a_uK, a_fnFt); \294 FIVE_ITERATIONS(a_iStart + 0, a_uK, a_fnFt); \ 295 FIVE_ITERATIONS(a_iStart + 5, a_uK, a_fnFt); \ 296 FIVE_ITERATIONS(a_iStart + 10, a_uK, a_fnFt); \ 297 FIVE_ITERATIONS(a_iStart + 15, a_uK, a_fnFt); \ 359 298 } while (0) 360 # endif 299 361 300 TWENTY_ITERATIONS( 0, UINT32_C(0x5a827999), rtSha1Ch); 362 301 TWENTY_ITERATIONS(20, UINT32_C(0x6ed9eba1), rtSha1Parity); … … 364 303 TWENTY_ITERATIONS(60, UINT32_C(0xca62c1d6), rtSha1Parity); 365 304 366 #elif 0/* Version avoiding the constant selection. */305 #elif 1 /* Version avoiding the constant selection. */ 367 306 unsigned iWord = 0; 368 307 # define TWENTY_ITERATIONS(a_iWordStop, a_uK, a_uExprBCD) \
Note:
See TracChangeset
for help on using the changeset viewer.