VirtualBox

source: vbox/trunk/src/libs/openssl-3.1.7/crypto/sha/sha512.c@ 107835

Last change on this file since 107835 was 104078, checked in by vboxsync, 11 months ago

openssl-3.1.5: Applied and adjusted our OpenSSL changes to 3.1.4. bugref:10638

File size: 32.0 KB
Line 
1/*
2 * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10/*
11 * SHA512 low level APIs are deprecated for public use, but still ok for
12 * internal use.
13 */
14#include "internal/deprecated.h"
15
16#include <stdio.h>
17#include <openssl/opensslconf.h>
18/*-
19 * IMPLEMENTATION NOTES.
20 *
21 * As you might have noticed 32-bit hash algorithms:
22 *
23 * - permit SHA_LONG to be wider than 32-bit
24 * - optimized versions implement two transform functions: one operating
25 * on [aligned] data in host byte order and one - on data in input
26 * stream byte order;
27 * - share common byte-order neutral collector and padding function
28 * implementations, ../md32_common.h;
29 *
30 * Neither of the above applies to this SHA-512 implementations. Reasons
31 * [in reverse order] are:
32 *
33 * - it's the only 64-bit hash algorithm for the moment of this writing,
34 * there is no need for common collector/padding implementation [yet];
35 * - by supporting only one transform function [which operates on
36 * *aligned* data in input stream byte order, big-endian in this case]
37 * we minimize burden of maintenance in two ways: a) collector/padding
38 * function is simpler; b) only one transform function to stare at;
39 * - SHA_LONG64 is required to be exactly 64-bit in order to be able to
40 * apply a number of optimizations to mitigate potential performance
41 * penalties caused by previous design decision;
42 *
43 * Caveat lector.
44 *
45 * Implementation relies on the fact that "long long" is 64-bit on
46 * both 32- and 64-bit platforms. If some compiler vendor comes up
47 * with 128-bit long long, adjustment to sha.h would be required.
48 * As this implementation relies on 64-bit integer type, it's totally
49 * inappropriate for platforms which don't support it, most notably
50 * 16-bit platforms.
51 */
52#include <stdlib.h>
53#include <string.h>
54
55#include <openssl/crypto.h>
56#include <openssl/sha.h>
57#include <openssl/opensslv.h>
58
59#include "internal/cryptlib.h"
60#include "crypto/sha.h"
61
62#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
63 defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) || \
64 defined(__s390__) || defined(__s390x__) || \
65 defined(__aarch64__) || \
66 defined(SHA512_ASM)
67# define SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
68#endif
69
70#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
71# define U64(C) C##UI64
72#elif defined(__arch64__)
73# define U64(C) C##UL
74#else
75# define U64(C) C##ULL
76#endif
77
78int sha512_224_init(SHA512_CTX *c)
79{
80 c->h[0] = U64(0x8c3d37c819544da2);
81 c->h[1] = U64(0x73e1996689dcd4d6);
82 c->h[2] = U64(0x1dfab7ae32ff9c82);
83 c->h[3] = U64(0x679dd514582f9fcf);
84 c->h[4] = U64(0x0f6d2b697bd44da8);
85 c->h[5] = U64(0x77e36f7304c48942);
86 c->h[6] = U64(0x3f9d85a86a1d36c8);
87 c->h[7] = U64(0x1112e6ad91d692a1);
88
89 c->Nl = 0;
90 c->Nh = 0;
91 c->num = 0;
92 c->md_len = SHA224_DIGEST_LENGTH;
93 return 1;
94}
95
96int sha512_256_init(SHA512_CTX *c)
97{
98 c->h[0] = U64(0x22312194fc2bf72c);
99 c->h[1] = U64(0x9f555fa3c84c64c2);
100 c->h[2] = U64(0x2393b86b6f53b151);
101 c->h[3] = U64(0x963877195940eabd);
102 c->h[4] = U64(0x96283ee2a88effe3);
103 c->h[5] = U64(0xbe5e1e2553863992);
104 c->h[6] = U64(0x2b0199fc2c85b8aa);
105 c->h[7] = U64(0x0eb72ddc81c52ca2);
106
107 c->Nl = 0;
108 c->Nh = 0;
109 c->num = 0;
110 c->md_len = SHA256_DIGEST_LENGTH;
111 return 1;
112}
113
114int SHA384_Init(SHA512_CTX *c)
115{
116 c->h[0] = U64(0xcbbb9d5dc1059ed8);
117 c->h[1] = U64(0x629a292a367cd507);
118 c->h[2] = U64(0x9159015a3070dd17);
119 c->h[3] = U64(0x152fecd8f70e5939);
120 c->h[4] = U64(0x67332667ffc00b31);
121 c->h[5] = U64(0x8eb44a8768581511);
122 c->h[6] = U64(0xdb0c2e0d64f98fa7);
123 c->h[7] = U64(0x47b5481dbefa4fa4);
124
125 c->Nl = 0;
126 c->Nh = 0;
127 c->num = 0;
128 c->md_len = SHA384_DIGEST_LENGTH;
129 return 1;
130}
131
132int SHA512_Init(SHA512_CTX *c)
133{
134 c->h[0] = U64(0x6a09e667f3bcc908);
135 c->h[1] = U64(0xbb67ae8584caa73b);
136 c->h[2] = U64(0x3c6ef372fe94f82b);
137 c->h[3] = U64(0xa54ff53a5f1d36f1);
138 c->h[4] = U64(0x510e527fade682d1);
139 c->h[5] = U64(0x9b05688c2b3e6c1f);
140 c->h[6] = U64(0x1f83d9abfb41bd6b);
141 c->h[7] = U64(0x5be0cd19137e2179);
142
143 c->Nl = 0;
144 c->Nh = 0;
145 c->num = 0;
146 c->md_len = SHA512_DIGEST_LENGTH;
147 return 1;
148}
149
150#ifndef SHA512_ASM
151static
152#endif
153void sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num);
154
155int SHA512_Final(unsigned char *md, SHA512_CTX *c)
156{
157 unsigned char *p = (unsigned char *)c->u.p;
158 size_t n = c->num;
159
160 p[n] = 0x80; /* There always is a room for one */
161 n++;
162 if (n > (sizeof(c->u) - 16)) {
163 memset(p + n, 0, sizeof(c->u) - n);
164 n = 0;
165 sha512_block_data_order(c, p, 1);
166 }
167
168 memset(p + n, 0, sizeof(c->u) - 16 - n);
169#ifdef B_ENDIAN
170 c->u.d[SHA_LBLOCK - 2] = c->Nh;
171 c->u.d[SHA_LBLOCK - 1] = c->Nl;
172#else
173 p[sizeof(c->u) - 1] = (unsigned char)(c->Nl);
174 p[sizeof(c->u) - 2] = (unsigned char)(c->Nl >> 8);
175 p[sizeof(c->u) - 3] = (unsigned char)(c->Nl >> 16);
176 p[sizeof(c->u) - 4] = (unsigned char)(c->Nl >> 24);
177 p[sizeof(c->u) - 5] = (unsigned char)(c->Nl >> 32);
178 p[sizeof(c->u) - 6] = (unsigned char)(c->Nl >> 40);
179 p[sizeof(c->u) - 7] = (unsigned char)(c->Nl >> 48);
180 p[sizeof(c->u) - 8] = (unsigned char)(c->Nl >> 56);
181 p[sizeof(c->u) - 9] = (unsigned char)(c->Nh);
182 p[sizeof(c->u) - 10] = (unsigned char)(c->Nh >> 8);
183 p[sizeof(c->u) - 11] = (unsigned char)(c->Nh >> 16);
184 p[sizeof(c->u) - 12] = (unsigned char)(c->Nh >> 24);
185 p[sizeof(c->u) - 13] = (unsigned char)(c->Nh >> 32);
186 p[sizeof(c->u) - 14] = (unsigned char)(c->Nh >> 40);
187 p[sizeof(c->u) - 15] = (unsigned char)(c->Nh >> 48);
188 p[sizeof(c->u) - 16] = (unsigned char)(c->Nh >> 56);
189#endif
190
191 sha512_block_data_order(c, p, 1);
192
193 if (md == 0)
194 return 0;
195
196 switch (c->md_len) {
197 /* Let compiler decide if it's appropriate to unroll... */
198 case SHA224_DIGEST_LENGTH:
199 for (n = 0; n < SHA224_DIGEST_LENGTH / 8; n++) {
200 SHA_LONG64 t = c->h[n];
201
202 *(md++) = (unsigned char)(t >> 56);
203 *(md++) = (unsigned char)(t >> 48);
204 *(md++) = (unsigned char)(t >> 40);
205 *(md++) = (unsigned char)(t >> 32);
206 *(md++) = (unsigned char)(t >> 24);
207 *(md++) = (unsigned char)(t >> 16);
208 *(md++) = (unsigned char)(t >> 8);
209 *(md++) = (unsigned char)(t);
210 }
211 /*
212 * For 224 bits, there are four bytes left over that have to be
213 * processed separately.
214 */
215 {
216 SHA_LONG64 t = c->h[SHA224_DIGEST_LENGTH / 8];
217
218 *(md++) = (unsigned char)(t >> 56);
219 *(md++) = (unsigned char)(t >> 48);
220 *(md++) = (unsigned char)(t >> 40);
221 *(md++) = (unsigned char)(t >> 32);
222 }
223 break;
224 case SHA256_DIGEST_LENGTH:
225 for (n = 0; n < SHA256_DIGEST_LENGTH / 8; n++) {
226 SHA_LONG64 t = c->h[n];
227
228 *(md++) = (unsigned char)(t >> 56);
229 *(md++) = (unsigned char)(t >> 48);
230 *(md++) = (unsigned char)(t >> 40);
231 *(md++) = (unsigned char)(t >> 32);
232 *(md++) = (unsigned char)(t >> 24);
233 *(md++) = (unsigned char)(t >> 16);
234 *(md++) = (unsigned char)(t >> 8);
235 *(md++) = (unsigned char)(t);
236 }
237 break;
238 case SHA384_DIGEST_LENGTH:
239 for (n = 0; n < SHA384_DIGEST_LENGTH / 8; n++) {
240 SHA_LONG64 t = c->h[n];
241
242 *(md++) = (unsigned char)(t >> 56);
243 *(md++) = (unsigned char)(t >> 48);
244 *(md++) = (unsigned char)(t >> 40);
245 *(md++) = (unsigned char)(t >> 32);
246 *(md++) = (unsigned char)(t >> 24);
247 *(md++) = (unsigned char)(t >> 16);
248 *(md++) = (unsigned char)(t >> 8);
249 *(md++) = (unsigned char)(t);
250 }
251 break;
252 case SHA512_DIGEST_LENGTH:
253 for (n = 0; n < SHA512_DIGEST_LENGTH / 8; n++) {
254 SHA_LONG64 t = c->h[n];
255
256 *(md++) = (unsigned char)(t >> 56);
257 *(md++) = (unsigned char)(t >> 48);
258 *(md++) = (unsigned char)(t >> 40);
259 *(md++) = (unsigned char)(t >> 32);
260 *(md++) = (unsigned char)(t >> 24);
261 *(md++) = (unsigned char)(t >> 16);
262 *(md++) = (unsigned char)(t >> 8);
263 *(md++) = (unsigned char)(t);
264 }
265 break;
266 /* ... as well as make sure md_len is not abused. */
267 default:
268 return 0;
269 }
270
271 return 1;
272}
273
274int SHA384_Final(unsigned char *md, SHA512_CTX *c)
275{
276 return SHA512_Final(md, c);
277}
278
279int SHA512_Update(SHA512_CTX *c, const void *_data, size_t len)
280{
281 SHA_LONG64 l;
282 unsigned char *p = c->u.p;
283 const unsigned char *data = (const unsigned char *)_data;
284
285 if (len == 0)
286 return 1;
287
288 l = (c->Nl + (((SHA_LONG64) len) << 3)) & U64(0xffffffffffffffff);
289 if (l < c->Nl)
290 c->Nh++;
291 if (sizeof(len) >= 8)
292 c->Nh += (((SHA_LONG64) len) >> 61);
293 c->Nl = l;
294
295 if (c->num != 0) {
296 size_t n = sizeof(c->u) - c->num;
297
298 if (len < n) {
299 memcpy(p + c->num, data, len), c->num += (unsigned int)len;
300 return 1;
301 } else {
302 memcpy(p + c->num, data, n), c->num = 0;
303 len -= n, data += n;
304 sha512_block_data_order(c, p, 1);
305 }
306 }
307
308 if (len >= sizeof(c->u)) {
309#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
310 if ((size_t)data % sizeof(c->u.d[0]) != 0)
311 while (len >= sizeof(c->u))
312 memcpy(p, data, sizeof(c->u)),
313 sha512_block_data_order(c, p, 1),
314 len -= sizeof(c->u), data += sizeof(c->u);
315 else
316#endif
317 sha512_block_data_order(c, data, len / sizeof(c->u)),
318 data += len, len %= sizeof(c->u), data -= len;
319 }
320
321 if (len != 0)
322 memcpy(p, data, len), c->num = (int)len;
323
324 return 1;
325}
326
327int SHA384_Update(SHA512_CTX *c, const void *data, size_t len)
328{
329 return SHA512_Update(c, data, len);
330}
331
332void SHA512_Transform(SHA512_CTX *c, const unsigned char *data)
333{
334#ifndef SHA512_BLOCK_CAN_MANAGE_UNALIGNED_DATA
335 if ((size_t)data % sizeof(c->u.d[0]) != 0)
336 memcpy(c->u.p, data, sizeof(c->u.p)), data = c->u.p;
337#endif
338 sha512_block_data_order(c, data, 1);
339}
340
341#ifndef SHA512_ASM
342static const SHA_LONG64 K512[80] = {
343 U64(0x428a2f98d728ae22), U64(0x7137449123ef65cd),
344 U64(0xb5c0fbcfec4d3b2f), U64(0xe9b5dba58189dbbc),
345 U64(0x3956c25bf348b538), U64(0x59f111f1b605d019),
346 U64(0x923f82a4af194f9b), U64(0xab1c5ed5da6d8118),
347 U64(0xd807aa98a3030242), U64(0x12835b0145706fbe),
348 U64(0x243185be4ee4b28c), U64(0x550c7dc3d5ffb4e2),
349 U64(0x72be5d74f27b896f), U64(0x80deb1fe3b1696b1),
350 U64(0x9bdc06a725c71235), U64(0xc19bf174cf692694),
351 U64(0xe49b69c19ef14ad2), U64(0xefbe4786384f25e3),
352 U64(0x0fc19dc68b8cd5b5), U64(0x240ca1cc77ac9c65),
353 U64(0x2de92c6f592b0275), U64(0x4a7484aa6ea6e483),
354 U64(0x5cb0a9dcbd41fbd4), U64(0x76f988da831153b5),
355 U64(0x983e5152ee66dfab), U64(0xa831c66d2db43210),
356 U64(0xb00327c898fb213f), U64(0xbf597fc7beef0ee4),
357 U64(0xc6e00bf33da88fc2), U64(0xd5a79147930aa725),
358 U64(0x06ca6351e003826f), U64(0x142929670a0e6e70),
359 U64(0x27b70a8546d22ffc), U64(0x2e1b21385c26c926),
360 U64(0x4d2c6dfc5ac42aed), U64(0x53380d139d95b3df),
361 U64(0x650a73548baf63de), U64(0x766a0abb3c77b2a8),
362 U64(0x81c2c92e47edaee6), U64(0x92722c851482353b),
363 U64(0xa2bfe8a14cf10364), U64(0xa81a664bbc423001),
364 U64(0xc24b8b70d0f89791), U64(0xc76c51a30654be30),
365 U64(0xd192e819d6ef5218), U64(0xd69906245565a910),
366 U64(0xf40e35855771202a), U64(0x106aa07032bbd1b8),
367 U64(0x19a4c116b8d2d0c8), U64(0x1e376c085141ab53),
368 U64(0x2748774cdf8eeb99), U64(0x34b0bcb5e19b48a8),
369 U64(0x391c0cb3c5c95a63), U64(0x4ed8aa4ae3418acb),
370 U64(0x5b9cca4f7763e373), U64(0x682e6ff3d6b2b8a3),
371 U64(0x748f82ee5defb2fc), U64(0x78a5636f43172f60),
372 U64(0x84c87814a1f0ab72), U64(0x8cc702081a6439ec),
373 U64(0x90befffa23631e28), U64(0xa4506cebde82bde9),
374 U64(0xbef9a3f7b2c67915), U64(0xc67178f2e372532b),
375 U64(0xca273eceea26619c), U64(0xd186b8c721c0c207),
376 U64(0xeada7dd6cde0eb1e), U64(0xf57d4f7fee6ed178),
377 U64(0x06f067aa72176fba), U64(0x0a637dc5a2c898a6),
378 U64(0x113f9804bef90dae), U64(0x1b710b35131c471b),
379 U64(0x28db77f523047d84), U64(0x32caab7b40c72493),
380 U64(0x3c9ebe0a15c9bebc), U64(0x431d67c49c100d4c),
381 U64(0x4cc5d4becb3e42b6), U64(0x597f299cfc657e2a),
382 U64(0x5fcb6fab3ad6faec), U64(0x6c44198c4a475817)
383};
384
385# ifndef PEDANTIC
386# if defined(__GNUC__) && __GNUC__>=2 && \
387 !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
388# if defined(__x86_64) || defined(__x86_64__)
389# define ROTR(a,n) ({ SHA_LONG64 ret; \
390 asm ("rorq %1,%0" \
391 : "=r"(ret) \
392 : "J"(n),"0"(a) \
393 : "cc"); ret; })
394# if !defined(B_ENDIAN)
395# define PULL64(x) ({ SHA_LONG64 ret=*((const SHA_LONG64 *)(&(x))); \
396 asm ("bswapq %0" \
397 : "=r"(ret) \
398 : "0"(ret)); ret; })
399# endif
400# elif (defined(__i386) || defined(__i386__)) && !defined(B_ENDIAN)
401# if defined(I386_ONLY)
402# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
403 unsigned int hi=p[0],lo=p[1]; \
404 asm("xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
405 "roll $16,%%eax; roll $16,%%edx; "\
406 "xchgb %%ah,%%al;xchgb %%dh,%%dl;"\
407 : "=a"(lo),"=d"(hi) \
408 : "0"(lo),"1"(hi) : "cc"); \
409 ((SHA_LONG64)hi)<<32|lo; })
410# else
411# define PULL64(x) ({ const unsigned int *p=(const unsigned int *)(&(x));\
412 unsigned int hi=p[0],lo=p[1]; \
413 asm ("bswapl %0; bswapl %1;" \
414 : "=r"(lo),"=r"(hi) \
415 : "0"(lo),"1"(hi)); \
416 ((SHA_LONG64)hi)<<32|lo; })
417# endif
418# elif (defined(_ARCH_PPC) && defined(__64BIT__)) || defined(_ARCH_PPC64)
419# define ROTR(a,n) ({ SHA_LONG64 ret; \
420 asm ("rotrdi %0,%1,%2" \
421 : "=r"(ret) \
422 : "r"(a),"K"(n)); ret; })
423# elif defined(__aarch64__)
424# define ROTR(a,n) ({ SHA_LONG64 ret; \
425 asm ("ror %0,%1,%2" \
426 : "=r"(ret) \
427 : "r"(a),"I"(n)); ret; })
428# if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
429 __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
430# define PULL64(x) ({ SHA_LONG64 ret; \
431 asm ("rev %0,%1" \
432 : "=r"(ret) \
433 : "r"(*((const SHA_LONG64 *)(&(x))))); ret; })
434# endif
435# elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 32
436# define PULL64(x) ({ SHA_LONG64 ret; \
437 unsigned int *r = (unsigned int *)(&(ret)); \
438 const unsigned int *p = (const unsigned int *)(&(x)); \
439 asm ("rev8 %0, %1" \
440 : "=r"(r[0]) \
441 : "r" (p[1])); \
442 asm ("rev8 %0, %1" \
443 : "=r"(r[1]) \
444 : "r" (p[0])); ret; })
445# elif (defined(__riscv_zbkb) || defined(__riscv_zbb)) && __riscv_xlen == 64
446# define PULL64(x) ({ SHA_LONG64 ret; \
447 asm ("rev8 %0, %1" \
448 : "=r"(ret) \
449 : "r"(x)); ret; })
450# endif
451# if defined(__riscv_zknh) && __riscv_xlen == 32
452# define Sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
453 const unsigned int *p = (const unsigned int *)(&(x)); \
454 asm ("sha512sum0r %0, %1, %2" \
455 : "=r"(r[0]) \
456 : "r" (p[0]), "r" (p[1])); \
457 asm ("sha512sum0r %0, %2, %1" \
458 : "=r"(r[1]) \
459 : "r" (p[0]), "r" (p[1])); ret; })
460# define Sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
461 const unsigned int *p = (const unsigned int *)(&(x)); \
462 asm ("sha512sum1r %0, %1, %2" \
463 : "=r"(r[0]) \
464 : "r" (p[0]), "r" (p[1])); \
465 asm ("sha512sum1r %0, %2, %1" \
466 : "=r"(r[1]) \
467 : "r" (p[0]), "r" (p[1])); ret; })
468# define sigma0(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
469 const unsigned int *p = (const unsigned int *)(&(x)); \
470 asm ("sha512sig0l %0, %1, %2" \
471 : "=r"(r[0]) \
472 : "r" (p[0]), "r" (p[1])); \
473 asm ("sha512sig0h %0, %2, %1" \
474 : "=r"(r[1]) \
475 : "r" (p[0]), "r" (p[1])); ret; })
476# define sigma1(x) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
477 const unsigned int *p = (const unsigned int *)(&(x)); \
478 asm ("sha512sig1l %0, %1, %2" \
479 : "=r"(r[0]) \
480 : "r" (p[0]), "r" (p[1])); \
481 asm ("sha512sig1h %0, %2, %1" \
482 : "=r"(r[1]) \
483 : "r" (p[0]), "r" (p[1])); ret; })
484# elif defined(__riscv_zknh) && __riscv_xlen == 64
485# define Sigma0(x) ({ SHA_LONG64 ret; \
486 asm ("sha512sum0 %0, %1" \
487 : "=r"(ret) \
488 : "r"(x)); ret; })
489# define Sigma1(x) ({ SHA_LONG64 ret; \
490 asm ("sha512sum1 %0, %1" \
491 : "=r"(ret) \
492 : "r"(x)); ret; })
493# define sigma0(x) ({ SHA_LONG64 ret; \
494 asm ("sha512sig0 %0, %1" \
495 : "=r"(ret) \
496 : "r"(x)); ret; })
497# define sigma1(x) ({ SHA_LONG64 ret; \
498 asm ("sha512sig1 %0, %1" \
499 : "=r"(ret) \
500 : "r"(x)); ret; })
501# endif
502# if (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 32
503# define Ch(x,y,z) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
504 const unsigned int *xp = (const unsigned int *)(&(x)); \
505 const unsigned int *yp = (const unsigned int *)(&(y)); \
506 const unsigned int *zp = (const unsigned int *)(&(z)); \
507 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t" \
508 : "=r"(r[0]) \
509 : "r"(xp[0]), "r"(yp[0]), "r"(zp[0])); \
510 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t" \
511 : "=r"(r[1]) \
512 : "r"(xp[1]), "r"(yp[1]), "r"(zp[1])); ret; })
513# define Maj(x,y,z) ({ SHA_LONG64 ret; unsigned int *r = (unsigned int *)(&(ret)); \
514 const unsigned int *xp = (const unsigned int *)(&(x)); \
515 const unsigned int *yp = (const unsigned int *)(&(y)); \
516 const unsigned int *zp = (const unsigned int *)(&(z)); \
517 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t" \
518 : "=r"(r[0]) \
519 : "r"(xp[0]^zp[0]), "r"(yp[0]), "r"(zp[0])); \
520 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3\n\t" \
521 : "=r"(r[1]) \
522 : "r"(xp[1]^zp[1]), "r"(yp[1]), "r"(zp[1])); ret; })
523# elif (defined(__riscv_zbt) || defined(__riscv_zpn)) && __riscv_xlen == 64
524# define Ch(x,y,z) ({ SHA_LONG64 ret; \
525 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
526 : "=r"(ret) \
527 : "r"(x), "r"(y), "r"(z)); ret; })
528# define Maj(x,y,z) ({ SHA_LONG64 ret; \
529 asm (".insn r4 0x33, 1, 0x3, %0, %2, %1, %3"\
530 : "=r"(ret) \
531 : "r"(x^z), "r"(y), "r"(x)); ret; })
532# endif
533# elif defined(_MSC_VER)
534# if defined(_WIN64) /* applies to both IA-64 and AMD64 */
535# pragma intrinsic(_rotr64)
536# define ROTR(a,n) _rotr64((a),n)
537# endif
538# if defined(_M_IX86) && !defined(OPENSSL_NO_ASM) && \
539 !defined(OPENSSL_NO_INLINE_ASM)
540# if defined(I386_ONLY)
541static SHA_LONG64 __fastcall __pull64be(const void *x)
542{
543 _asm mov edx,[ecx + 0]
544 _asm mov eax,[ecx + 4]
545 _asm xchg dh, dl
546 _asm xchg ah, al
547 _asm rol edx, 16
548 _asm rol eax, 16
549 _asm xchg dh, dl
550 _asm xchg ah, al
551}
552# else
553static SHA_LONG64 __fastcall __pull64be(const void *x)
554{
555 _asm mov edx,[ecx + 0]
556 _asm mov eax,[ecx + 4]
557 _asm bswap edx
558 _asm bswap eax
559}
560# endif
561# define PULL64(x) __pull64be(&(x))
562# endif
563# endif
564# endif
565# ifndef PULL64
566# define B(x,j) (((SHA_LONG64)(*(((const unsigned char *)(&x))+j)))<<((7-j)*8))
567# define PULL64(x) (B(x,0)|B(x,1)|B(x,2)|B(x,3)|B(x,4)|B(x,5)|B(x,6)|B(x,7))
568# endif
569# ifndef ROTR
570# define ROTR(x,s) (((x)>>s) | (x)<<(64-s))
571# endif
572# ifndef Sigma0
573# define Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
574# endif
575# ifndef Sigma1
576# define Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
577# endif
578# ifndef sigma0
579# define sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
580# endif
581# ifndef sigma1
582# define sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
583# endif
584# ifndef Ch
585# define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
586# endif
587# ifndef Maj
588# define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
589# endif
590
591# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
592/*
593 * This code should give better results on 32-bit CPU with less than
594 * ~24 registers, both size and performance wise...
595 */
596
597static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
598 size_t num)
599{
600 const SHA_LONG64 *W = in;
601 SHA_LONG64 A, E, T;
602 SHA_LONG64 X[9 + 80], *F;
603 int i;
604
605 while (num--) {
606
607 F = X + 80;
608 A = ctx->h[0];
609 F[1] = ctx->h[1];
610 F[2] = ctx->h[2];
611 F[3] = ctx->h[3];
612 E = ctx->h[4];
613 F[5] = ctx->h[5];
614 F[6] = ctx->h[6];
615 F[7] = ctx->h[7];
616
617 for (i = 0; i < 16; i++, F--) {
618# ifdef B_ENDIAN
619 T = W[i];
620# else
621 T = PULL64(W[i]);
622# endif
623 F[0] = A;
624 F[4] = E;
625 F[8] = T;
626 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
627 E = F[3] + T;
628 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
629 }
630
631 for (; i < 80; i++, F--) {
632 T = sigma0(F[8 + 16 - 1]);
633 T += sigma1(F[8 + 16 - 14]);
634 T += F[8 + 16] + F[8 + 16 - 9];
635
636 F[0] = A;
637 F[4] = E;
638 F[8] = T;
639 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
640 E = F[3] + T;
641 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
642 }
643
644 ctx->h[0] += A;
645 ctx->h[1] += F[1];
646 ctx->h[2] += F[2];
647 ctx->h[3] += F[3];
648 ctx->h[4] += E;
649 ctx->h[5] += F[5];
650 ctx->h[6] += F[6];
651 ctx->h[7] += F[7];
652
653 W += SHA_LBLOCK;
654 }
655}
656
657# elif defined(OPENSSL_SMALL_FOOTPRINT)
658
659static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
660 size_t num)
661{
662 const SHA_LONG64 *W = in;
663 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
664 SHA_LONG64 X[16];
665 int i;
666
667 while (num--) {
668
669 a = ctx->h[0];
670 b = ctx->h[1];
671 c = ctx->h[2];
672 d = ctx->h[3];
673 e = ctx->h[4];
674 f = ctx->h[5];
675 g = ctx->h[6];
676 h = ctx->h[7];
677
678 for (i = 0; i < 16; i++) {
679# ifdef B_ENDIAN
680 T1 = X[i] = W[i];
681# else
682 T1 = X[i] = PULL64(W[i]);
683# endif
684 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
685 T2 = Sigma0(a) + Maj(a, b, c);
686 h = g;
687 g = f;
688 f = e;
689 e = d + T1;
690 d = c;
691 c = b;
692 b = a;
693 a = T1 + T2;
694 }
695
696 for (; i < 80; i++) {
697 s0 = X[(i + 1) & 0x0f];
698 s0 = sigma0(s0);
699 s1 = X[(i + 14) & 0x0f];
700 s1 = sigma1(s1);
701
702 T1 = X[i & 0xf] += s0 + s1 + X[(i + 9) & 0xf];
703 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
704 T2 = Sigma0(a) + Maj(a, b, c);
705 h = g;
706 g = f;
707 f = e;
708 e = d + T1;
709 d = c;
710 c = b;
711 b = a;
712 a = T1 + T2;
713 }
714
715 ctx->h[0] += a;
716 ctx->h[1] += b;
717 ctx->h[2] += c;
718 ctx->h[3] += d;
719 ctx->h[4] += e;
720 ctx->h[5] += f;
721 ctx->h[6] += g;
722 ctx->h[7] += h;
723
724 W += SHA_LBLOCK;
725 }
726}
727
728# else
729# define ROUND_00_15(i,a,b,c,d,e,f,g,h) do { \
730 T1 += h + Sigma1(e) + Ch(e,f,g) + K512[i]; \
731 h = Sigma0(a) + Maj(a,b,c); \
732 d += T1; h += T1; } while (0)
733
734# define ROUND_16_80(i,j,a,b,c,d,e,f,g,h,X) do { \
735 s0 = X[(j+1)&0x0f]; s0 = sigma0(s0); \
736 s1 = X[(j+14)&0x0f]; s1 = sigma1(s1); \
737 T1 = X[(j)&0x0f] += s0 + s1 + X[(j+9)&0x0f]; \
738 ROUND_00_15(i+j,a,b,c,d,e,f,g,h); } while (0)
739
740static void sha512_block_data_order(SHA512_CTX *ctx, const void *in,
741 size_t num)
742{
743 const SHA_LONG64 *W = in;
744 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1;
745 SHA_LONG64 X[16];
746 int i;
747
748 while (num--) {
749
750 a = ctx->h[0];
751 b = ctx->h[1];
752 c = ctx->h[2];
753 d = ctx->h[3];
754 e = ctx->h[4];
755 f = ctx->h[5];
756 g = ctx->h[6];
757 h = ctx->h[7];
758
759# ifdef B_ENDIAN
760 T1 = X[0] = W[0];
761 ROUND_00_15(0, a, b, c, d, e, f, g, h);
762 T1 = X[1] = W[1];
763 ROUND_00_15(1, h, a, b, c, d, e, f, g);
764 T1 = X[2] = W[2];
765 ROUND_00_15(2, g, h, a, b, c, d, e, f);
766 T1 = X[3] = W[3];
767 ROUND_00_15(3, f, g, h, a, b, c, d, e);
768 T1 = X[4] = W[4];
769 ROUND_00_15(4, e, f, g, h, a, b, c, d);
770 T1 = X[5] = W[5];
771 ROUND_00_15(5, d, e, f, g, h, a, b, c);
772 T1 = X[6] = W[6];
773 ROUND_00_15(6, c, d, e, f, g, h, a, b);
774 T1 = X[7] = W[7];
775 ROUND_00_15(7, b, c, d, e, f, g, h, a);
776 T1 = X[8] = W[8];
777 ROUND_00_15(8, a, b, c, d, e, f, g, h);
778 T1 = X[9] = W[9];
779 ROUND_00_15(9, h, a, b, c, d, e, f, g);
780 T1 = X[10] = W[10];
781 ROUND_00_15(10, g, h, a, b, c, d, e, f);
782 T1 = X[11] = W[11];
783 ROUND_00_15(11, f, g, h, a, b, c, d, e);
784 T1 = X[12] = W[12];
785 ROUND_00_15(12, e, f, g, h, a, b, c, d);
786 T1 = X[13] = W[13];
787 ROUND_00_15(13, d, e, f, g, h, a, b, c);
788 T1 = X[14] = W[14];
789 ROUND_00_15(14, c, d, e, f, g, h, a, b);
790 T1 = X[15] = W[15];
791 ROUND_00_15(15, b, c, d, e, f, g, h, a);
792# else
793 T1 = X[0] = PULL64(W[0]);
794 ROUND_00_15(0, a, b, c, d, e, f, g, h);
795 T1 = X[1] = PULL64(W[1]);
796 ROUND_00_15(1, h, a, b, c, d, e, f, g);
797 T1 = X[2] = PULL64(W[2]);
798 ROUND_00_15(2, g, h, a, b, c, d, e, f);
799 T1 = X[3] = PULL64(W[3]);
800 ROUND_00_15(3, f, g, h, a, b, c, d, e);
801 T1 = X[4] = PULL64(W[4]);
802 ROUND_00_15(4, e, f, g, h, a, b, c, d);
803 T1 = X[5] = PULL64(W[5]);
804 ROUND_00_15(5, d, e, f, g, h, a, b, c);
805 T1 = X[6] = PULL64(W[6]);
806 ROUND_00_15(6, c, d, e, f, g, h, a, b);
807 T1 = X[7] = PULL64(W[7]);
808 ROUND_00_15(7, b, c, d, e, f, g, h, a);
809 T1 = X[8] = PULL64(W[8]);
810 ROUND_00_15(8, a, b, c, d, e, f, g, h);
811 T1 = X[9] = PULL64(W[9]);
812 ROUND_00_15(9, h, a, b, c, d, e, f, g);
813 T1 = X[10] = PULL64(W[10]);
814 ROUND_00_15(10, g, h, a, b, c, d, e, f);
815 T1 = X[11] = PULL64(W[11]);
816 ROUND_00_15(11, f, g, h, a, b, c, d, e);
817 T1 = X[12] = PULL64(W[12]);
818 ROUND_00_15(12, e, f, g, h, a, b, c, d);
819 T1 = X[13] = PULL64(W[13]);
820 ROUND_00_15(13, d, e, f, g, h, a, b, c);
821 T1 = X[14] = PULL64(W[14]);
822 ROUND_00_15(14, c, d, e, f, g, h, a, b);
823 T1 = X[15] = PULL64(W[15]);
824 ROUND_00_15(15, b, c, d, e, f, g, h, a);
825# endif
826
827 for (i = 16; i < 80; i += 16) {
828 ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X);
829 ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X);
830 ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X);
831 ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X);
832 ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X);
833 ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X);
834 ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X);
835 ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X);
836 ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X);
837 ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X);
838 ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X);
839 ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X);
840 ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X);
841 ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X);
842 ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X);
843 ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X);
844 }
845
846 ctx->h[0] += a;
847 ctx->h[1] += b;
848 ctx->h[2] += c;
849 ctx->h[3] += d;
850 ctx->h[4] += e;
851 ctx->h[5] += f;
852 ctx->h[6] += g;
853 ctx->h[7] += h;
854
855 W += SHA_LBLOCK;
856 }
857}
858
859# endif
860
861#endif /* SHA512_ASM */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette