VirtualBox

source: vbox/trunk/src/libs/openssl-3.1.2/crypto/sha/keccak1600.c@ 101021

Last change on this file since 101021 was 101021, checked in by vboxsync, 15 months ago

openssl-3.1.2: Applied and adjusted our OpenSSL changes to 3.1.0. bugref:10519

File size: 41.5 KB
Line 
1/*
2 * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10#include <openssl/e_os2.h>
11#include <string.h>
12#include <assert.h>
13
14size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
15 size_t r);
16void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r);
17
18#if !defined(KECCAK1600_ASM) || !defined(SELFTEST)
19
20/*
21 * Choose some sensible defaults
22 */
23#if !defined(KECCAK_REF) && !defined(KECCAK_1X) && !defined(KECCAK_1X_ALT) && \
24 !defined(KECCAK_2X) && !defined(KECCAK_INPLACE)
25# define KECCAK_2X /* default to KECCAK_2X variant */
26#endif
27
28#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
29 (defined(__x86_64) && !defined(__BMI__)) || defined(_M_X64) || \
30 defined(__mips) || defined(__riscv) || defined(__s390__) || \
31 defined(__EMSCRIPTEN__)
32/*
33 * These don't have "and with complement" instruction, so minimize amount
34 * of "not"-s. Implemented only in the [default] KECCAK_2X variant.
35 */
36# define KECCAK_COMPLEMENTING_TRANSFORM
37#endif
38
39#if defined(__x86_64__) || defined(__aarch64__) || \
40 defined(__mips64) || defined(__ia64) || \
41 (defined(__VMS) && !defined(__vax))
42/*
43 * These are available even in ILP32 flavours, but even then they are
44 * capable of performing 64-bit operations as efficiently as in *P64.
45 * Since it's not given that we can use sizeof(void *), just shunt it.
46 */
47# define BIT_INTERLEAVE (0)
48#else
49# define BIT_INTERLEAVE (sizeof(void *) < 8)
50#endif
51
52# ifdef VBOX
53# include <iprt/asm.h>
54# define ROL32(val, shift) ASMRotateLeftU32(val, shift)
55# define ROL64(val, shift) ASMRotateLeftU64(val, shift)
56# else /* !VBOX */
57#define ROL32(a, offset) (((a) << (offset)) | ((a) >> ((32 - (offset)) & 31)))
58
59static uint64_t ROL64(uint64_t val, int offset)
60{
61 if (offset == 0) {
62 return val;
63 } else if (!BIT_INTERLEAVE) {
64 return (val << offset) | (val >> (64-offset));
65 } else {
66 uint32_t hi = (uint32_t)(val >> 32), lo = (uint32_t)val;
67
68 if (offset & 1) {
69 uint32_t tmp = hi;
70
71 offset >>= 1;
72 hi = ROL32(lo, offset);
73 lo = ROL32(tmp, offset + 1);
74 } else {
75 offset >>= 1;
76 lo = ROL32(lo, offset);
77 hi = ROL32(hi, offset);
78 }
79
80 return ((uint64_t)hi << 32) | lo;
81 }
82}
83#endif /* !VBOX */
84
85static const unsigned char rhotates[5][5] = {
86 { 0, 1, 62, 28, 27 },
87 { 36, 44, 6, 55, 20 },
88 { 3, 10, 43, 25, 39 },
89 { 41, 45, 15, 21, 8 },
90 { 18, 2, 61, 56, 14 }
91};
92
93static const uint64_t iotas[] = {
94 BIT_INTERLEAVE ? 0x0000000000000001ULL : 0x0000000000000001ULL,
95 BIT_INTERLEAVE ? 0x0000008900000000ULL : 0x0000000000008082ULL,
96 BIT_INTERLEAVE ? 0x8000008b00000000ULL : 0x800000000000808aULL,
97 BIT_INTERLEAVE ? 0x8000808000000000ULL : 0x8000000080008000ULL,
98 BIT_INTERLEAVE ? 0x0000008b00000001ULL : 0x000000000000808bULL,
99 BIT_INTERLEAVE ? 0x0000800000000001ULL : 0x0000000080000001ULL,
100 BIT_INTERLEAVE ? 0x8000808800000001ULL : 0x8000000080008081ULL,
101 BIT_INTERLEAVE ? 0x8000008200000001ULL : 0x8000000000008009ULL,
102 BIT_INTERLEAVE ? 0x0000000b00000000ULL : 0x000000000000008aULL,
103 BIT_INTERLEAVE ? 0x0000000a00000000ULL : 0x0000000000000088ULL,
104 BIT_INTERLEAVE ? 0x0000808200000001ULL : 0x0000000080008009ULL,
105 BIT_INTERLEAVE ? 0x0000800300000000ULL : 0x000000008000000aULL,
106 BIT_INTERLEAVE ? 0x0000808b00000001ULL : 0x000000008000808bULL,
107 BIT_INTERLEAVE ? 0x8000000b00000001ULL : 0x800000000000008bULL,
108 BIT_INTERLEAVE ? 0x8000008a00000001ULL : 0x8000000000008089ULL,
109 BIT_INTERLEAVE ? 0x8000008100000001ULL : 0x8000000000008003ULL,
110 BIT_INTERLEAVE ? 0x8000008100000000ULL : 0x8000000000008002ULL,
111 BIT_INTERLEAVE ? 0x8000000800000000ULL : 0x8000000000000080ULL,
112 BIT_INTERLEAVE ? 0x0000008300000000ULL : 0x000000000000800aULL,
113 BIT_INTERLEAVE ? 0x8000800300000000ULL : 0x800000008000000aULL,
114 BIT_INTERLEAVE ? 0x8000808800000001ULL : 0x8000000080008081ULL,
115 BIT_INTERLEAVE ? 0x8000008800000000ULL : 0x8000000000008080ULL,
116 BIT_INTERLEAVE ? 0x0000800000000001ULL : 0x0000000080000001ULL,
117 BIT_INTERLEAVE ? 0x8000808200000000ULL : 0x8000000080008008ULL
118};
119
120#if defined(KECCAK_REF)
121/*
122 * This is straightforward or "maximum clarity" implementation aiming
123 * to resemble section 3.2 of the FIPS PUB 202 "SHA-3 Standard:
124 * Permutation-Based Hash and Extendible-Output Functions" as much as
125 * possible. With one caveat. Because of the way C stores matrices,
126 * references to A[x,y] in the specification are presented as A[y][x].
127 * Implementation unrolls inner x-loops so that modulo 5 operations are
128 * explicitly pre-computed.
129 */
130static void Theta(uint64_t A[5][5])
131{
132 uint64_t C[5], D[5];
133 size_t y;
134
135 C[0] = A[0][0];
136 C[1] = A[0][1];
137 C[2] = A[0][2];
138 C[3] = A[0][3];
139 C[4] = A[0][4];
140
141 for (y = 1; y < 5; y++) {
142 C[0] ^= A[y][0];
143 C[1] ^= A[y][1];
144 C[2] ^= A[y][2];
145 C[3] ^= A[y][3];
146 C[4] ^= A[y][4];
147 }
148
149 D[0] = ROL64(C[1], 1) ^ C[4];
150 D[1] = ROL64(C[2], 1) ^ C[0];
151 D[2] = ROL64(C[3], 1) ^ C[1];
152 D[3] = ROL64(C[4], 1) ^ C[2];
153 D[4] = ROL64(C[0], 1) ^ C[3];
154
155 for (y = 0; y < 5; y++) {
156 A[y][0] ^= D[0];
157 A[y][1] ^= D[1];
158 A[y][2] ^= D[2];
159 A[y][3] ^= D[3];
160 A[y][4] ^= D[4];
161 }
162}
163
164static void Rho(uint64_t A[5][5])
165{
166 size_t y;
167
168 for (y = 0; y < 5; y++) {
169 A[y][0] = ROL64(A[y][0], rhotates[y][0]);
170 A[y][1] = ROL64(A[y][1], rhotates[y][1]);
171 A[y][2] = ROL64(A[y][2], rhotates[y][2]);
172 A[y][3] = ROL64(A[y][3], rhotates[y][3]);
173 A[y][4] = ROL64(A[y][4], rhotates[y][4]);
174 }
175}
176
177static void Pi(uint64_t A[5][5])
178{
179 uint64_t T[5][5];
180
181 /*
182 * T = A
183 * A[y][x] = T[x][(3*y+x)%5]
184 */
185 memcpy(T, A, sizeof(T));
186
187 A[0][0] = T[0][0];
188 A[0][1] = T[1][1];
189 A[0][2] = T[2][2];
190 A[0][3] = T[3][3];
191 A[0][4] = T[4][4];
192
193 A[1][0] = T[0][3];
194 A[1][1] = T[1][4];
195 A[1][2] = T[2][0];
196 A[1][3] = T[3][1];
197 A[1][4] = T[4][2];
198
199 A[2][0] = T[0][1];
200 A[2][1] = T[1][2];
201 A[2][2] = T[2][3];
202 A[2][3] = T[3][4];
203 A[2][4] = T[4][0];
204
205 A[3][0] = T[0][4];
206 A[3][1] = T[1][0];
207 A[3][2] = T[2][1];
208 A[3][3] = T[3][2];
209 A[3][4] = T[4][3];
210
211 A[4][0] = T[0][2];
212 A[4][1] = T[1][3];
213 A[4][2] = T[2][4];
214 A[4][3] = T[3][0];
215 A[4][4] = T[4][1];
216}
217
218static void Chi(uint64_t A[5][5])
219{
220 uint64_t C[5];
221 size_t y;
222
223 for (y = 0; y < 5; y++) {
224 C[0] = A[y][0] ^ (~A[y][1] & A[y][2]);
225 C[1] = A[y][1] ^ (~A[y][2] & A[y][3]);
226 C[2] = A[y][2] ^ (~A[y][3] & A[y][4]);
227 C[3] = A[y][3] ^ (~A[y][4] & A[y][0]);
228 C[4] = A[y][4] ^ (~A[y][0] & A[y][1]);
229
230 A[y][0] = C[0];
231 A[y][1] = C[1];
232 A[y][2] = C[2];
233 A[y][3] = C[3];
234 A[y][4] = C[4];
235 }
236}
237
238static void Iota(uint64_t A[5][5], size_t i)
239{
240 assert(i < (sizeof(iotas) / sizeof(iotas[0])));
241 A[0][0] ^= iotas[i];
242}
243
244static void KeccakF1600(uint64_t A[5][5])
245{
246 size_t i;
247
248 for (i = 0; i < 24; i++) {
249 Theta(A);
250 Rho(A);
251 Pi(A);
252 Chi(A);
253 Iota(A, i);
254 }
255}
256
257#elif defined(KECCAK_1X)
258/*
259 * This implementation is optimization of above code featuring unroll
260 * of even y-loops, their fusion and code motion. It also minimizes
261 * temporary storage. Compiler would normally do all these things for
262 * you, purpose of manual optimization is to provide "unobscured"
263 * reference for assembly implementation [in case this approach is
264 * chosen for implementation on some platform]. In the nutshell it's
265 * equivalent of "plane-per-plane processing" approach discussed in
266 * section 2.4 of "Keccak implementation overview".
267 */
268static void Round(uint64_t A[5][5], size_t i)
269{
270 uint64_t C[5], E[2]; /* registers */
271 uint64_t D[5], T[2][5]; /* memory */
272
273 assert(i < (sizeof(iotas) / sizeof(iotas[0])));
274
275 C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
276 C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
277 C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
278 C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
279 C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
280
281#if defined(__arm__)
282 D[1] = E[0] = ROL64(C[2], 1) ^ C[0];
283 D[4] = E[1] = ROL64(C[0], 1) ^ C[3];
284 D[0] = C[0] = ROL64(C[1], 1) ^ C[4];
285 D[2] = C[1] = ROL64(C[3], 1) ^ C[1];
286 D[3] = C[2] = ROL64(C[4], 1) ^ C[2];
287
288 T[0][0] = A[3][0] ^ C[0]; /* borrow T[0][0] */
289 T[0][1] = A[0][1] ^ E[0]; /* D[1] */
290 T[0][2] = A[0][2] ^ C[1]; /* D[2] */
291 T[0][3] = A[0][3] ^ C[2]; /* D[3] */
292 T[0][4] = A[0][4] ^ E[1]; /* D[4] */
293
294 C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */
295 C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */
296 C[0] = A[0][0] ^ C[0]; /* rotate by 0 */ /* D[0] */
297 C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); /* D[2] */
298 C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); /* D[1] */
299#else
300 D[0] = ROL64(C[1], 1) ^ C[4];
301 D[1] = ROL64(C[2], 1) ^ C[0];
302 D[2] = ROL64(C[3], 1) ^ C[1];
303 D[3] = ROL64(C[4], 1) ^ C[2];
304 D[4] = ROL64(C[0], 1) ^ C[3];
305
306 T[0][0] = A[3][0] ^ D[0]; /* borrow T[0][0] */
307 T[0][1] = A[0][1] ^ D[1];
308 T[0][2] = A[0][2] ^ D[2];
309 T[0][3] = A[0][3] ^ D[3];
310 T[0][4] = A[0][4] ^ D[4];
311
312 C[0] = A[0][0] ^ D[0]; /* rotate by 0 */
313 C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
314 C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
315 C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
316 C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
317#endif
318 A[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
319 A[0][1] = C[1] ^ (~C[2] & C[3]);
320 A[0][2] = C[2] ^ (~C[3] & C[4]);
321 A[0][3] = C[3] ^ (~C[4] & C[0]);
322 A[0][4] = C[4] ^ (~C[0] & C[1]);
323
324 T[1][0] = A[1][0] ^ (C[3] = D[0]);
325 T[1][1] = A[2][1] ^ (C[4] = D[1]); /* borrow T[1][1] */
326 T[1][2] = A[1][2] ^ (E[0] = D[2]);
327 T[1][3] = A[1][3] ^ (E[1] = D[3]);
328 T[1][4] = A[2][4] ^ (C[2] = D[4]); /* borrow T[1][4] */
329
330 C[0] = ROL64(T[0][3], rhotates[0][3]);
331 C[1] = ROL64(A[1][4] ^ C[2], rhotates[1][4]); /* D[4] */
332 C[2] = ROL64(A[2][0] ^ C[3], rhotates[2][0]); /* D[0] */
333 C[3] = ROL64(A[3][1] ^ C[4], rhotates[3][1]); /* D[1] */
334 C[4] = ROL64(A[4][2] ^ E[0], rhotates[4][2]); /* D[2] */
335
336 A[1][0] = C[0] ^ (~C[1] & C[2]);
337 A[1][1] = C[1] ^ (~C[2] & C[3]);
338 A[1][2] = C[2] ^ (~C[3] & C[4]);
339 A[1][3] = C[3] ^ (~C[4] & C[0]);
340 A[1][4] = C[4] ^ (~C[0] & C[1]);
341
342 C[0] = ROL64(T[0][1], rhotates[0][1]);
343 C[1] = ROL64(T[1][2], rhotates[1][2]);
344 C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
345 C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
346 C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
347
348 A[2][0] = C[0] ^ (~C[1] & C[2]);
349 A[2][1] = C[1] ^ (~C[2] & C[3]);
350 A[2][2] = C[2] ^ (~C[3] & C[4]);
351 A[2][3] = C[3] ^ (~C[4] & C[0]);
352 A[2][4] = C[4] ^ (~C[0] & C[1]);
353
354 C[0] = ROL64(T[0][4], rhotates[0][4]);
355 C[1] = ROL64(T[1][0], rhotates[1][0]);
356 C[2] = ROL64(T[1][1], rhotates[2][1]); /* originally A[2][1] */
357 C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
358 C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
359
360 A[3][0] = C[0] ^ (~C[1] & C[2]);
361 A[3][1] = C[1] ^ (~C[2] & C[3]);
362 A[3][2] = C[2] ^ (~C[3] & C[4]);
363 A[3][3] = C[3] ^ (~C[4] & C[0]);
364 A[3][4] = C[4] ^ (~C[0] & C[1]);
365
366 C[0] = ROL64(T[0][2], rhotates[0][2]);
367 C[1] = ROL64(T[1][3], rhotates[1][3]);
368 C[2] = ROL64(T[1][4], rhotates[2][4]); /* originally A[2][4] */
369 C[3] = ROL64(T[0][0], rhotates[3][0]); /* originally A[3][0] */
370 C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
371
372 A[4][0] = C[0] ^ (~C[1] & C[2]);
373 A[4][1] = C[1] ^ (~C[2] & C[3]);
374 A[4][2] = C[2] ^ (~C[3] & C[4]);
375 A[4][3] = C[3] ^ (~C[4] & C[0]);
376 A[4][4] = C[4] ^ (~C[0] & C[1]);
377}
378
379static void KeccakF1600(uint64_t A[5][5])
380{
381 size_t i;
382
383 for (i = 0; i < 24; i++) {
384 Round(A, i);
385 }
386}
387
388#elif defined(KECCAK_1X_ALT)
389/*
390 * This is variant of above KECCAK_1X that reduces requirement for
391 * temporary storage even further, but at cost of more updates to A[][].
392 * It's less suitable if A[][] is memory bound, but better if it's
393 * register bound.
394 */
395
396static void Round(uint64_t A[5][5], size_t i)
397{
398 uint64_t C[5], D[5];
399
400 assert(i < (sizeof(iotas) / sizeof(iotas[0])));
401
402 C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
403 C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
404 C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
405 C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
406 C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
407
408 D[1] = C[0] ^ ROL64(C[2], 1);
409 D[2] = C[1] ^ ROL64(C[3], 1);
410 D[3] = C[2] ^= ROL64(C[4], 1);
411 D[4] = C[3] ^= ROL64(C[0], 1);
412 D[0] = C[4] ^= ROL64(C[1], 1);
413
414 A[0][1] ^= D[1];
415 A[1][1] ^= D[1];
416 A[2][1] ^= D[1];
417 A[3][1] ^= D[1];
418 A[4][1] ^= D[1];
419
420 A[0][2] ^= D[2];
421 A[1][2] ^= D[2];
422 A[2][2] ^= D[2];
423 A[3][2] ^= D[2];
424 A[4][2] ^= D[2];
425
426 A[0][3] ^= C[2];
427 A[1][3] ^= C[2];
428 A[2][3] ^= C[2];
429 A[3][3] ^= C[2];
430 A[4][3] ^= C[2];
431
432 A[0][4] ^= C[3];
433 A[1][4] ^= C[3];
434 A[2][4] ^= C[3];
435 A[3][4] ^= C[3];
436 A[4][4] ^= C[3];
437
438 A[0][0] ^= C[4];
439 A[1][0] ^= C[4];
440 A[2][0] ^= C[4];
441 A[3][0] ^= C[4];
442 A[4][0] ^= C[4];
443
444 C[1] = A[0][1];
445 C[2] = A[0][2];
446 C[3] = A[0][3];
447 C[4] = A[0][4];
448
449 A[0][1] = ROL64(A[1][1], rhotates[1][1]);
450 A[0][2] = ROL64(A[2][2], rhotates[2][2]);
451 A[0][3] = ROL64(A[3][3], rhotates[3][3]);
452 A[0][4] = ROL64(A[4][4], rhotates[4][4]);
453
454 A[1][1] = ROL64(A[1][4], rhotates[1][4]);
455 A[2][2] = ROL64(A[2][3], rhotates[2][3]);
456 A[3][3] = ROL64(A[3][2], rhotates[3][2]);
457 A[4][4] = ROL64(A[4][1], rhotates[4][1]);
458
459 A[1][4] = ROL64(A[4][2], rhotates[4][2]);
460 A[2][3] = ROL64(A[3][4], rhotates[3][4]);
461 A[3][2] = ROL64(A[2][1], rhotates[2][1]);
462 A[4][1] = ROL64(A[1][3], rhotates[1][3]);
463
464 A[4][2] = ROL64(A[2][4], rhotates[2][4]);
465 A[3][4] = ROL64(A[4][3], rhotates[4][3]);
466 A[2][1] = ROL64(A[1][2], rhotates[1][2]);
467 A[1][3] = ROL64(A[3][1], rhotates[3][1]);
468
469 A[2][4] = ROL64(A[4][0], rhotates[4][0]);
470 A[4][3] = ROL64(A[3][0], rhotates[3][0]);
471 A[1][2] = ROL64(A[2][0], rhotates[2][0]);
472 A[3][1] = ROL64(A[1][0], rhotates[1][0]);
473
474 A[1][0] = ROL64(C[3], rhotates[0][3]);
475 A[2][0] = ROL64(C[1], rhotates[0][1]);
476 A[3][0] = ROL64(C[4], rhotates[0][4]);
477 A[4][0] = ROL64(C[2], rhotates[0][2]);
478
479 C[0] = A[0][0];
480 C[1] = A[1][0];
481 D[0] = A[0][1];
482 D[1] = A[1][1];
483
484 A[0][0] ^= (~A[0][1] & A[0][2]);
485 A[1][0] ^= (~A[1][1] & A[1][2]);
486 A[0][1] ^= (~A[0][2] & A[0][3]);
487 A[1][1] ^= (~A[1][2] & A[1][3]);
488 A[0][2] ^= (~A[0][3] & A[0][4]);
489 A[1][2] ^= (~A[1][3] & A[1][4]);
490 A[0][3] ^= (~A[0][4] & C[0]);
491 A[1][3] ^= (~A[1][4] & C[1]);
492 A[0][4] ^= (~C[0] & D[0]);
493 A[1][4] ^= (~C[1] & D[1]);
494
495 C[2] = A[2][0];
496 C[3] = A[3][0];
497 D[2] = A[2][1];
498 D[3] = A[3][1];
499
500 A[2][0] ^= (~A[2][1] & A[2][2]);
501 A[3][0] ^= (~A[3][1] & A[3][2]);
502 A[2][1] ^= (~A[2][2] & A[2][3]);
503 A[3][1] ^= (~A[3][2] & A[3][3]);
504 A[2][2] ^= (~A[2][3] & A[2][4]);
505 A[3][2] ^= (~A[3][3] & A[3][4]);
506 A[2][3] ^= (~A[2][4] & C[2]);
507 A[3][3] ^= (~A[3][4] & C[3]);
508 A[2][4] ^= (~C[2] & D[2]);
509 A[3][4] ^= (~C[3] & D[3]);
510
511 C[4] = A[4][0];
512 D[4] = A[4][1];
513
514 A[4][0] ^= (~A[4][1] & A[4][2]);
515 A[4][1] ^= (~A[4][2] & A[4][3]);
516 A[4][2] ^= (~A[4][3] & A[4][4]);
517 A[4][3] ^= (~A[4][4] & C[4]);
518 A[4][4] ^= (~C[4] & D[4]);
519 A[0][0] ^= iotas[i];
520}
521
522static void KeccakF1600(uint64_t A[5][5])
523{
524 size_t i;
525
526 for (i = 0; i < 24; i++) {
527 Round(A, i);
528 }
529}
530
531#elif defined(KECCAK_2X)
532/*
533 * This implementation is variant of KECCAK_1X above with outer-most
534 * round loop unrolled twice. This allows to take temporary storage
535 * out of round procedure and simplify references to it by alternating
536 * it with actual data (see round loop below). Originally it was meant
537 * rather as reference for an assembly implementation, but it seems to
538 * play best with compilers [as well as provide best instruction per
539 * processed byte ratio at minimal round unroll factor]...
540 */
541static void Round(uint64_t R[5][5], uint64_t A[5][5], size_t i)
542{
543 uint64_t C[5], D[5];
544
545 assert(i < (sizeof(iotas) / sizeof(iotas[0])));
546
547 C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
548 C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
549 C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
550 C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
551 C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
552
553 D[0] = ROL64(C[1], 1) ^ C[4];
554 D[1] = ROL64(C[2], 1) ^ C[0];
555 D[2] = ROL64(C[3], 1) ^ C[1];
556 D[3] = ROL64(C[4], 1) ^ C[2];
557 D[4] = ROL64(C[0], 1) ^ C[3];
558
559 C[0] = A[0][0] ^ D[0]; /* rotate by 0 */
560 C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
561 C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
562 C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
563 C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
564
565#ifdef KECCAK_COMPLEMENTING_TRANSFORM
566 R[0][0] = C[0] ^ ( C[1] | C[2]) ^ iotas[i];
567 R[0][1] = C[1] ^ (~C[2] | C[3]);
568 R[0][2] = C[2] ^ ( C[3] & C[4]);
569 R[0][3] = C[3] ^ ( C[4] | C[0]);
570 R[0][4] = C[4] ^ ( C[0] & C[1]);
571#else
572 R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
573 R[0][1] = C[1] ^ (~C[2] & C[3]);
574 R[0][2] = C[2] ^ (~C[3] & C[4]);
575 R[0][3] = C[3] ^ (~C[4] & C[0]);
576 R[0][4] = C[4] ^ (~C[0] & C[1]);
577#endif
578
579 C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
580 C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
581 C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
582 C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
583 C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
584
585#ifdef KECCAK_COMPLEMENTING_TRANSFORM
586 R[1][0] = C[0] ^ (C[1] | C[2]);
587 R[1][1] = C[1] ^ (C[2] & C[3]);
588 R[1][2] = C[2] ^ (C[3] | ~C[4]);
589 R[1][3] = C[3] ^ (C[4] | C[0]);
590 R[1][4] = C[4] ^ (C[0] & C[1]);
591#else
592 R[1][0] = C[0] ^ (~C[1] & C[2]);
593 R[1][1] = C[1] ^ (~C[2] & C[3]);
594 R[1][2] = C[2] ^ (~C[3] & C[4]);
595 R[1][3] = C[3] ^ (~C[4] & C[0]);
596 R[1][4] = C[4] ^ (~C[0] & C[1]);
597#endif
598
599 C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
600 C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
601 C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
602 C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
603 C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
604
605#ifdef KECCAK_COMPLEMENTING_TRANSFORM
606 R[2][0] = C[0] ^ ( C[1] | C[2]);
607 R[2][1] = C[1] ^ ( C[2] & C[3]);
608 R[2][2] = C[2] ^ (~C[3] & C[4]);
609 R[2][3] = ~C[3] ^ ( C[4] | C[0]);
610 R[2][4] = C[4] ^ ( C[0] & C[1]);
611#else
612 R[2][0] = C[0] ^ (~C[1] & C[2]);
613 R[2][1] = C[1] ^ (~C[2] & C[3]);
614 R[2][2] = C[2] ^ (~C[3] & C[4]);
615 R[2][3] = C[3] ^ (~C[4] & C[0]);
616 R[2][4] = C[4] ^ (~C[0] & C[1]);
617#endif
618
619 C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
620 C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
621 C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
622 C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
623 C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
624
625#ifdef KECCAK_COMPLEMENTING_TRANSFORM
626 R[3][0] = C[0] ^ ( C[1] & C[2]);
627 R[3][1] = C[1] ^ ( C[2] | C[3]);
628 R[3][2] = C[2] ^ (~C[3] | C[4]);
629 R[3][3] = ~C[3] ^ ( C[4] & C[0]);
630 R[3][4] = C[4] ^ ( C[0] | C[1]);
631#else
632 R[3][0] = C[0] ^ (~C[1] & C[2]);
633 R[3][1] = C[1] ^ (~C[2] & C[3]);
634 R[3][2] = C[2] ^ (~C[3] & C[4]);
635 R[3][3] = C[3] ^ (~C[4] & C[0]);
636 R[3][4] = C[4] ^ (~C[0] & C[1]);
637#endif
638
639 C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
640 C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
641 C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
642 C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
643 C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
644
645#ifdef KECCAK_COMPLEMENTING_TRANSFORM
646 R[4][0] = C[0] ^ (~C[1] & C[2]);
647 R[4][1] = ~C[1] ^ ( C[2] | C[3]);
648 R[4][2] = C[2] ^ ( C[3] & C[4]);
649 R[4][3] = C[3] ^ ( C[4] | C[0]);
650 R[4][4] = C[4] ^ ( C[0] & C[1]);
651#else
652 R[4][0] = C[0] ^ (~C[1] & C[2]);
653 R[4][1] = C[1] ^ (~C[2] & C[3]);
654 R[4][2] = C[2] ^ (~C[3] & C[4]);
655 R[4][3] = C[3] ^ (~C[4] & C[0]);
656 R[4][4] = C[4] ^ (~C[0] & C[1]);
657#endif
658}
659
660static void KeccakF1600(uint64_t A[5][5])
661{
662 uint64_t T[5][5];
663 size_t i;
664
665#ifdef KECCAK_COMPLEMENTING_TRANSFORM
666 A[0][1] = ~A[0][1];
667 A[0][2] = ~A[0][2];
668 A[1][3] = ~A[1][3];
669 A[2][2] = ~A[2][2];
670 A[3][2] = ~A[3][2];
671 A[4][0] = ~A[4][0];
672#endif
673
674 for (i = 0; i < 24; i += 2) {
675 Round(T, A, i);
676 Round(A, T, i + 1);
677 }
678
679#ifdef KECCAK_COMPLEMENTING_TRANSFORM
680 A[0][1] = ~A[0][1];
681 A[0][2] = ~A[0][2];
682 A[1][3] = ~A[1][3];
683 A[2][2] = ~A[2][2];
684 A[3][2] = ~A[3][2];
685 A[4][0] = ~A[4][0];
686#endif
687}
688
689#else /* define KECCAK_INPLACE to compile this code path */
690/*
691 * This implementation is KECCAK_1X from above combined 4 times with
692 * a twist that allows to omit temporary storage and perform in-place
693 * processing. It's discussed in section 2.5 of "Keccak implementation
694 * overview". It's likely to be best suited for processors with large
695 * register bank... On the other hand processor with large register
696 * bank can as well use KECCAK_1X_ALT, it would be as fast but much
697 * more compact...
698 */
699static void FourRounds(uint64_t A[5][5], size_t i)
700{
701 uint64_t B[5], C[5], D[5];
702
703 assert(i <= (sizeof(iotas) / sizeof(iotas[0]) - 4));
704
705 /* Round 4*n */
706 C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
707 C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
708 C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
709 C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
710 C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
711
712 D[0] = ROL64(C[1], 1) ^ C[4];
713 D[1] = ROL64(C[2], 1) ^ C[0];
714 D[2] = ROL64(C[3], 1) ^ C[1];
715 D[3] = ROL64(C[4], 1) ^ C[2];
716 D[4] = ROL64(C[0], 1) ^ C[3];
717
718 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
719 B[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
720 B[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
721 B[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
722 B[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
723
724 C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i];
725 C[1] = A[1][1] = B[1] ^ (~B[2] & B[3]);
726 C[2] = A[2][2] = B[2] ^ (~B[3] & B[4]);
727 C[3] = A[3][3] = B[3] ^ (~B[4] & B[0]);
728 C[4] = A[4][4] = B[4] ^ (~B[0] & B[1]);
729
730 B[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
731 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
732 B[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
733 B[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
734 B[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
735
736 C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
737 C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
738 C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
739 C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
740 C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
741
742 B[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
743 B[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
744 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
745 B[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
746 B[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
747
748 C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
749 C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
750 C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
751 C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
752 C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
753
754 B[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
755 B[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
756 B[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
757 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
758 B[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
759
760 C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
761 C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
762 C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
763 C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
764 C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
765
766 B[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
767 B[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
768 B[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
769 B[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
770 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
771
772 C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
773 C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
774 C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
775 C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
776 C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
777
778 /* Round 4*n+1 */
779 D[0] = ROL64(C[1], 1) ^ C[4];
780 D[1] = ROL64(C[2], 1) ^ C[0];
781 D[2] = ROL64(C[3], 1) ^ C[1];
782 D[3] = ROL64(C[4], 1) ^ C[2];
783 D[4] = ROL64(C[0], 1) ^ C[3];
784
785 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
786 B[1] = ROL64(A[3][1] ^ D[1], rhotates[1][1]);
787 B[2] = ROL64(A[1][2] ^ D[2], rhotates[2][2]);
788 B[3] = ROL64(A[4][3] ^ D[3], rhotates[3][3]);
789 B[4] = ROL64(A[2][4] ^ D[4], rhotates[4][4]);
790
791 C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 1];
792 C[1] = A[3][1] = B[1] ^ (~B[2] & B[3]);
793 C[2] = A[1][2] = B[2] ^ (~B[3] & B[4]);
794 C[3] = A[4][3] = B[3] ^ (~B[4] & B[0]);
795 C[4] = A[2][4] = B[4] ^ (~B[0] & B[1]);
796
797 B[0] = ROL64(A[3][3] ^ D[3], rhotates[0][3]);
798 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
799 B[2] = ROL64(A[4][0] ^ D[0], rhotates[2][0]);
800 B[3] = ROL64(A[2][1] ^ D[1], rhotates[3][1]);
801 B[4] = ROL64(A[0][2] ^ D[2], rhotates[4][2]);
802
803 C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
804 C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
805 C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
806 C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
807 C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
808
809 B[0] = ROL64(A[1][1] ^ D[1], rhotates[0][1]);
810 B[1] = ROL64(A[4][2] ^ D[2], rhotates[1][2]);
811 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
812 B[3] = ROL64(A[0][4] ^ D[4], rhotates[3][4]);
813 B[4] = ROL64(A[3][0] ^ D[0], rhotates[4][0]);
814
815 C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
816 C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
817 C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
818 C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
819 C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
820
821 B[0] = ROL64(A[4][4] ^ D[4], rhotates[0][4]);
822 B[1] = ROL64(A[2][0] ^ D[0], rhotates[1][0]);
823 B[2] = ROL64(A[0][1] ^ D[1], rhotates[2][1]);
824 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
825 B[4] = ROL64(A[1][3] ^ D[3], rhotates[4][3]);
826
827 C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
828 C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
829 C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
830 C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
831 C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
832
833 B[0] = ROL64(A[2][2] ^ D[2], rhotates[0][2]);
834 B[1] = ROL64(A[0][3] ^ D[3], rhotates[1][3]);
835 B[2] = ROL64(A[3][4] ^ D[4], rhotates[2][4]);
836 B[3] = ROL64(A[1][0] ^ D[0], rhotates[3][0]);
837 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
838
839 C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
840 C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
841 C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
842 C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
843 C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
844
845 /* Round 4*n+2 */
846 D[0] = ROL64(C[1], 1) ^ C[4];
847 D[1] = ROL64(C[2], 1) ^ C[0];
848 D[2] = ROL64(C[3], 1) ^ C[1];
849 D[3] = ROL64(C[4], 1) ^ C[2];
850 D[4] = ROL64(C[0], 1) ^ C[3];
851
852 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
853 B[1] = ROL64(A[2][1] ^ D[1], rhotates[1][1]);
854 B[2] = ROL64(A[4][2] ^ D[2], rhotates[2][2]);
855 B[3] = ROL64(A[1][3] ^ D[3], rhotates[3][3]);
856 B[4] = ROL64(A[3][4] ^ D[4], rhotates[4][4]);
857
858 C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 2];
859 C[1] = A[2][1] = B[1] ^ (~B[2] & B[3]);
860 C[2] = A[4][2] = B[2] ^ (~B[3] & B[4]);
861 C[3] = A[1][3] = B[3] ^ (~B[4] & B[0]);
862 C[4] = A[3][4] = B[4] ^ (~B[0] & B[1]);
863
864 B[0] = ROL64(A[4][3] ^ D[3], rhotates[0][3]);
865 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
866 B[2] = ROL64(A[3][0] ^ D[0], rhotates[2][0]);
867 B[3] = ROL64(A[0][1] ^ D[1], rhotates[3][1]);
868 B[4] = ROL64(A[2][2] ^ D[2], rhotates[4][2]);
869
870 C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
871 C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
872 C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
873 C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
874 C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
875
876 B[0] = ROL64(A[3][1] ^ D[1], rhotates[0][1]);
877 B[1] = ROL64(A[0][2] ^ D[2], rhotates[1][2]);
878 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
879 B[3] = ROL64(A[4][4] ^ D[4], rhotates[3][4]);
880 B[4] = ROL64(A[1][0] ^ D[0], rhotates[4][0]);
881
882 C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
883 C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
884 C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
885 C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
886 C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
887
888 B[0] = ROL64(A[2][4] ^ D[4], rhotates[0][4]);
889 B[1] = ROL64(A[4][0] ^ D[0], rhotates[1][0]);
890 B[2] = ROL64(A[1][1] ^ D[1], rhotates[2][1]);
891 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
892 B[4] = ROL64(A[0][3] ^ D[3], rhotates[4][3]);
893
894 C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
895 C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
896 C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
897 C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
898 C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
899
900 B[0] = ROL64(A[1][2] ^ D[2], rhotates[0][2]);
901 B[1] = ROL64(A[3][3] ^ D[3], rhotates[1][3]);
902 B[2] = ROL64(A[0][4] ^ D[4], rhotates[2][4]);
903 B[3] = ROL64(A[2][0] ^ D[0], rhotates[3][0]);
904 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
905
906 C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
907 C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
908 C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
909 C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
910 C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
911
912 /* Round 4*n+3 */
913 D[0] = ROL64(C[1], 1) ^ C[4];
914 D[1] = ROL64(C[2], 1) ^ C[0];
915 D[2] = ROL64(C[3], 1) ^ C[1];
916 D[3] = ROL64(C[4], 1) ^ C[2];
917 D[4] = ROL64(C[0], 1) ^ C[3];
918
919 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
920 B[1] = ROL64(A[0][1] ^ D[1], rhotates[1][1]);
921 B[2] = ROL64(A[0][2] ^ D[2], rhotates[2][2]);
922 B[3] = ROL64(A[0][3] ^ D[3], rhotates[3][3]);
923 B[4] = ROL64(A[0][4] ^ D[4], rhotates[4][4]);
924
925 /* C[0] = */ A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 3];
926 /* C[1] = */ A[0][1] = B[1] ^ (~B[2] & B[3]);
927 /* C[2] = */ A[0][2] = B[2] ^ (~B[3] & B[4]);
928 /* C[3] = */ A[0][3] = B[3] ^ (~B[4] & B[0]);
929 /* C[4] = */ A[0][4] = B[4] ^ (~B[0] & B[1]);
930
931 B[0] = ROL64(A[1][3] ^ D[3], rhotates[0][3]);
932 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
933 B[2] = ROL64(A[1][0] ^ D[0], rhotates[2][0]);
934 B[3] = ROL64(A[1][1] ^ D[1], rhotates[3][1]);
935 B[4] = ROL64(A[1][2] ^ D[2], rhotates[4][2]);
936
937 /* C[0] ^= */ A[1][0] = B[0] ^ (~B[1] & B[2]);
938 /* C[1] ^= */ A[1][1] = B[1] ^ (~B[2] & B[3]);
939 /* C[2] ^= */ A[1][2] = B[2] ^ (~B[3] & B[4]);
940 /* C[3] ^= */ A[1][3] = B[3] ^ (~B[4] & B[0]);
941 /* C[4] ^= */ A[1][4] = B[4] ^ (~B[0] & B[1]);
942
943 B[0] = ROL64(A[2][1] ^ D[1], rhotates[0][1]);
944 B[1] = ROL64(A[2][2] ^ D[2], rhotates[1][2]);
945 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
946 B[3] = ROL64(A[2][4] ^ D[4], rhotates[3][4]);
947 B[4] = ROL64(A[2][0] ^ D[0], rhotates[4][0]);
948
949 /* C[0] ^= */ A[2][0] = B[0] ^ (~B[1] & B[2]);
950 /* C[1] ^= */ A[2][1] = B[1] ^ (~B[2] & B[3]);
951 /* C[2] ^= */ A[2][2] = B[2] ^ (~B[3] & B[4]);
952 /* C[3] ^= */ A[2][3] = B[3] ^ (~B[4] & B[0]);
953 /* C[4] ^= */ A[2][4] = B[4] ^ (~B[0] & B[1]);
954
955 B[0] = ROL64(A[3][4] ^ D[4], rhotates[0][4]);
956 B[1] = ROL64(A[3][0] ^ D[0], rhotates[1][0]);
957 B[2] = ROL64(A[3][1] ^ D[1], rhotates[2][1]);
958 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
959 B[4] = ROL64(A[3][3] ^ D[3], rhotates[4][3]);
960
961 /* C[0] ^= */ A[3][0] = B[0] ^ (~B[1] & B[2]);
962 /* C[1] ^= */ A[3][1] = B[1] ^ (~B[2] & B[3]);
963 /* C[2] ^= */ A[3][2] = B[2] ^ (~B[3] & B[4]);
964 /* C[3] ^= */ A[3][3] = B[3] ^ (~B[4] & B[0]);
965 /* C[4] ^= */ A[3][4] = B[4] ^ (~B[0] & B[1]);
966
967 B[0] = ROL64(A[4][2] ^ D[2], rhotates[0][2]);
968 B[1] = ROL64(A[4][3] ^ D[3], rhotates[1][3]);
969 B[2] = ROL64(A[4][4] ^ D[4], rhotates[2][4]);
970 B[3] = ROL64(A[4][0] ^ D[0], rhotates[3][0]);
971 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
972
973 /* C[0] ^= */ A[4][0] = B[0] ^ (~B[1] & B[2]);
974 /* C[1] ^= */ A[4][1] = B[1] ^ (~B[2] & B[3]);
975 /* C[2] ^= */ A[4][2] = B[2] ^ (~B[3] & B[4]);
976 /* C[3] ^= */ A[4][3] = B[3] ^ (~B[4] & B[0]);
977 /* C[4] ^= */ A[4][4] = B[4] ^ (~B[0] & B[1]);
978}
979
980static void KeccakF1600(uint64_t A[5][5])
981{
982 size_t i;
983
984 for (i = 0; i < 24; i += 4) {
985 FourRounds(A, i);
986 }
987}
988
989#endif
990
991static uint64_t BitInterleave(uint64_t Ai)
992{
993 if (BIT_INTERLEAVE) {
994 uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;
995 uint32_t t0, t1;
996
997 t0 = lo & 0x55555555;
998 t0 |= t0 >> 1; t0 &= 0x33333333;
999 t0 |= t0 >> 2; t0 &= 0x0f0f0f0f;
1000 t0 |= t0 >> 4; t0 &= 0x00ff00ff;
1001 t0 |= t0 >> 8; t0 &= 0x0000ffff;
1002
1003 t1 = hi & 0x55555555;
1004 t1 |= t1 >> 1; t1 &= 0x33333333;
1005 t1 |= t1 >> 2; t1 &= 0x0f0f0f0f;
1006 t1 |= t1 >> 4; t1 &= 0x00ff00ff;
1007 t1 |= t1 >> 8; t1 <<= 16;
1008
1009 lo &= 0xaaaaaaaa;
1010 lo |= lo << 1; lo &= 0xcccccccc;
1011 lo |= lo << 2; lo &= 0xf0f0f0f0;
1012 lo |= lo << 4; lo &= 0xff00ff00;
1013 lo |= lo << 8; lo >>= 16;
1014
1015 hi &= 0xaaaaaaaa;
1016 hi |= hi << 1; hi &= 0xcccccccc;
1017 hi |= hi << 2; hi &= 0xf0f0f0f0;
1018 hi |= hi << 4; hi &= 0xff00ff00;
1019 hi |= hi << 8; hi &= 0xffff0000;
1020
1021 Ai = ((uint64_t)(hi | lo) << 32) | (t1 | t0);
1022 }
1023
1024 return Ai;
1025}
1026
1027static uint64_t BitDeinterleave(uint64_t Ai)
1028{
1029 if (BIT_INTERLEAVE) {
1030 uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;
1031 uint32_t t0, t1;
1032
1033 t0 = lo & 0x0000ffff;
1034 t0 |= t0 << 8; t0 &= 0x00ff00ff;
1035 t0 |= t0 << 4; t0 &= 0x0f0f0f0f;
1036 t0 |= t0 << 2; t0 &= 0x33333333;
1037 t0 |= t0 << 1; t0 &= 0x55555555;
1038
1039 t1 = hi << 16;
1040 t1 |= t1 >> 8; t1 &= 0xff00ff00;
1041 t1 |= t1 >> 4; t1 &= 0xf0f0f0f0;
1042 t1 |= t1 >> 2; t1 &= 0xcccccccc;
1043 t1 |= t1 >> 1; t1 &= 0xaaaaaaaa;
1044
1045 lo >>= 16;
1046 lo |= lo << 8; lo &= 0x00ff00ff;
1047 lo |= lo << 4; lo &= 0x0f0f0f0f;
1048 lo |= lo << 2; lo &= 0x33333333;
1049 lo |= lo << 1; lo &= 0x55555555;
1050
1051 hi &= 0xffff0000;
1052 hi |= hi >> 8; hi &= 0xff00ff00;
1053 hi |= hi >> 4; hi &= 0xf0f0f0f0;
1054 hi |= hi >> 2; hi &= 0xcccccccc;
1055 hi |= hi >> 1; hi &= 0xaaaaaaaa;
1056
1057 Ai = ((uint64_t)(hi | lo) << 32) | (t1 | t0);
1058 }
1059
1060 return Ai;
1061}
1062
1063/*
1064 * SHA3_absorb can be called multiple times, but at each invocation
1065 * largest multiple of |r| out of |len| bytes are processed. Then
1066 * remaining amount of bytes is returned. This is done to spare caller
1067 * trouble of calculating the largest multiple of |r|. |r| can be viewed
1068 * as blocksize. It is commonly (1600 - 256*n)/8, e.g. 168, 136, 104,
1069 * 72, but can also be (1600 - 448)/8 = 144. All this means that message
1070 * padding and intermediate sub-block buffering, byte- or bitwise, is
1071 * caller's responsibility.
1072 */
1073size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
1074 size_t r)
1075{
1076 uint64_t *A_flat = (uint64_t *)A;
1077 size_t i, w = r / 8;
1078
1079 assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
1080
1081 while (len >= r) {
1082 for (i = 0; i < w; i++) {
1083 uint64_t Ai = (uint64_t)inp[0] | (uint64_t)inp[1] << 8 |
1084 (uint64_t)inp[2] << 16 | (uint64_t)inp[3] << 24 |
1085 (uint64_t)inp[4] << 32 | (uint64_t)inp[5] << 40 |
1086 (uint64_t)inp[6] << 48 | (uint64_t)inp[7] << 56;
1087 inp += 8;
1088
1089 A_flat[i] ^= BitInterleave(Ai);
1090 }
1091 KeccakF1600(A);
1092 len -= r;
1093 }
1094
1095 return len;
1096}
1097
1098/*
1099 * sha3_squeeze is called once at the end to generate |out| hash value
1100 * of |len| bytes.
1101 */
1102void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r)
1103{
1104 uint64_t *A_flat = (uint64_t *)A;
1105 size_t i, w = r / 8;
1106
1107 assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
1108
1109 while (len != 0) {
1110 for (i = 0; i < w && len != 0; i++) {
1111 uint64_t Ai = BitDeinterleave(A_flat[i]);
1112
1113 if (len < 8) {
1114 for (i = 0; i < len; i++) {
1115 *out++ = (unsigned char)Ai;
1116 Ai >>= 8;
1117 }
1118 return;
1119 }
1120
1121 out[0] = (unsigned char)(Ai);
1122 out[1] = (unsigned char)(Ai >> 8);
1123 out[2] = (unsigned char)(Ai >> 16);
1124 out[3] = (unsigned char)(Ai >> 24);
1125 out[4] = (unsigned char)(Ai >> 32);
1126 out[5] = (unsigned char)(Ai >> 40);
1127 out[6] = (unsigned char)(Ai >> 48);
1128 out[7] = (unsigned char)(Ai >> 56);
1129 out += 8;
1130 len -= 8;
1131 }
1132 if (len)
1133 KeccakF1600(A);
1134 }
1135}
1136#endif
1137
1138#ifdef SELFTEST
1139/*
1140 * Post-padding one-shot implementations would look as following:
1141 *
1142 * SHA3_224 SHA3_sponge(inp, len, out, 224/8, (1600-448)/8);
1143 * SHA3_256 SHA3_sponge(inp, len, out, 256/8, (1600-512)/8);
1144 * SHA3_384 SHA3_sponge(inp, len, out, 384/8, (1600-768)/8);
1145 * SHA3_512 SHA3_sponge(inp, len, out, 512/8, (1600-1024)/8);
1146 * SHAKE_128 SHA3_sponge(inp, len, out, d, (1600-256)/8);
1147 * SHAKE_256 SHA3_sponge(inp, len, out, d, (1600-512)/8);
1148 */
1149
1150void SHA3_sponge(const unsigned char *inp, size_t len,
1151 unsigned char *out, size_t d, size_t r)
1152{
1153 uint64_t A[5][5];
1154
1155 memset(A, 0, sizeof(A));
1156 SHA3_absorb(A, inp, len, r);
1157 SHA3_squeeze(A, out, d, r);
1158}
1159
1160# include <stdio.h>
1161
1162int main()
1163{
1164 /*
1165 * This is 5-bit SHAKE128 test from http://csrc.nist.gov/groups/ST/toolkit/examples.html#aHashing
1166 */
1167 unsigned char test[168] = { '\xf3', '\x3' };
1168 unsigned char out[512];
1169 size_t i;
1170 static const unsigned char result[512] = {
1171 0x2E, 0x0A, 0xBF, 0xBA, 0x83, 0xE6, 0x72, 0x0B,
1172 0xFB, 0xC2, 0x25, 0xFF, 0x6B, 0x7A, 0xB9, 0xFF,
1173 0xCE, 0x58, 0xBA, 0x02, 0x7E, 0xE3, 0xD8, 0x98,
1174 0x76, 0x4F, 0xEF, 0x28, 0x7D, 0xDE, 0xCC, 0xCA,
1175 0x3E, 0x6E, 0x59, 0x98, 0x41, 0x1E, 0x7D, 0xDB,
1176 0x32, 0xF6, 0x75, 0x38, 0xF5, 0x00, 0xB1, 0x8C,
1177 0x8C, 0x97, 0xC4, 0x52, 0xC3, 0x70, 0xEA, 0x2C,
1178 0xF0, 0xAF, 0xCA, 0x3E, 0x05, 0xDE, 0x7E, 0x4D,
1179 0xE2, 0x7F, 0xA4, 0x41, 0xA9, 0xCB, 0x34, 0xFD,
1180 0x17, 0xC9, 0x78, 0xB4, 0x2D, 0x5B, 0x7E, 0x7F,
1181 0x9A, 0xB1, 0x8F, 0xFE, 0xFF, 0xC3, 0xC5, 0xAC,
1182 0x2F, 0x3A, 0x45, 0x5E, 0xEB, 0xFD, 0xC7, 0x6C,
1183 0xEA, 0xEB, 0x0A, 0x2C, 0xCA, 0x22, 0xEE, 0xF6,
1184 0xE6, 0x37, 0xF4, 0xCA, 0xBE, 0x5C, 0x51, 0xDE,
1185 0xD2, 0xE3, 0xFA, 0xD8, 0xB9, 0x52, 0x70, 0xA3,
1186 0x21, 0x84, 0x56, 0x64, 0xF1, 0x07, 0xD1, 0x64,
1187 0x96, 0xBB, 0x7A, 0xBF, 0xBE, 0x75, 0x04, 0xB6,
1188 0xED, 0xE2, 0xE8, 0x9E, 0x4B, 0x99, 0x6F, 0xB5,
1189 0x8E, 0xFD, 0xC4, 0x18, 0x1F, 0x91, 0x63, 0x38,
1190 0x1C, 0xBE, 0x7B, 0xC0, 0x06, 0xA7, 0xA2, 0x05,
1191 0x98, 0x9C, 0x52, 0x6C, 0xD1, 0xBD, 0x68, 0x98,
1192 0x36, 0x93, 0xB4, 0xBD, 0xC5, 0x37, 0x28, 0xB2,
1193 0x41, 0xC1, 0xCF, 0xF4, 0x2B, 0xB6, 0x11, 0x50,
1194 0x2C, 0x35, 0x20, 0x5C, 0xAB, 0xB2, 0x88, 0x75,
1195 0x56, 0x55, 0xD6, 0x20, 0xC6, 0x79, 0x94, 0xF0,
1196 0x64, 0x51, 0x18, 0x7F, 0x6F, 0xD1, 0x7E, 0x04,
1197 0x66, 0x82, 0xBA, 0x12, 0x86, 0x06, 0x3F, 0xF8,
1198 0x8F, 0xE2, 0x50, 0x8D, 0x1F, 0xCA, 0xF9, 0x03,
1199 0x5A, 0x12, 0x31, 0xAD, 0x41, 0x50, 0xA9, 0xC9,
1200 0xB2, 0x4C, 0x9B, 0x2D, 0x66, 0xB2, 0xAD, 0x1B,
1201 0xDE, 0x0B, 0xD0, 0xBB, 0xCB, 0x8B, 0xE0, 0x5B,
1202 0x83, 0x52, 0x29, 0xEF, 0x79, 0x19, 0x73, 0x73,
1203 0x23, 0x42, 0x44, 0x01, 0xE1, 0xD8, 0x37, 0xB6,
1204 0x6E, 0xB4, 0xE6, 0x30, 0xFF, 0x1D, 0xE7, 0x0C,
1205 0xB3, 0x17, 0xC2, 0xBA, 0xCB, 0x08, 0x00, 0x1D,
1206 0x34, 0x77, 0xB7, 0xA7, 0x0A, 0x57, 0x6D, 0x20,
1207 0x86, 0x90, 0x33, 0x58, 0x9D, 0x85, 0xA0, 0x1D,
1208 0xDB, 0x2B, 0x66, 0x46, 0xC0, 0x43, 0xB5, 0x9F,
1209 0xC0, 0x11, 0x31, 0x1D, 0xA6, 0x66, 0xFA, 0x5A,
1210 0xD1, 0xD6, 0x38, 0x7F, 0xA9, 0xBC, 0x40, 0x15,
1211 0xA3, 0x8A, 0x51, 0xD1, 0xDA, 0x1E, 0xA6, 0x1D,
1212 0x64, 0x8D, 0xC8, 0xE3, 0x9A, 0x88, 0xB9, 0xD6,
1213 0x22, 0xBD, 0xE2, 0x07, 0xFD, 0xAB, 0xC6, 0xF2,
1214 0x82, 0x7A, 0x88, 0x0C, 0x33, 0x0B, 0xBF, 0x6D,
1215 0xF7, 0x33, 0x77, 0x4B, 0x65, 0x3E, 0x57, 0x30,
1216 0x5D, 0x78, 0xDC, 0xE1, 0x12, 0xF1, 0x0A, 0x2C,
1217 0x71, 0xF4, 0xCD, 0xAD, 0x92, 0xED, 0x11, 0x3E,
1218 0x1C, 0xEA, 0x63, 0xB9, 0x19, 0x25, 0xED, 0x28,
1219 0x19, 0x1E, 0x6D, 0xBB, 0xB5, 0xAA, 0x5A, 0x2A,
1220 0xFD, 0xA5, 0x1F, 0xC0, 0x5A, 0x3A, 0xF5, 0x25,
1221 0x8B, 0x87, 0x66, 0x52, 0x43, 0x55, 0x0F, 0x28,
1222 0x94, 0x8A, 0xE2, 0xB8, 0xBE, 0xB6, 0xBC, 0x9C,
1223 0x77, 0x0B, 0x35, 0xF0, 0x67, 0xEA, 0xA6, 0x41,
1224 0xEF, 0xE6, 0x5B, 0x1A, 0x44, 0x90, 0x9D, 0x1B,
1225 0x14, 0x9F, 0x97, 0xEE, 0xA6, 0x01, 0x39, 0x1C,
1226 0x60, 0x9E, 0xC8, 0x1D, 0x19, 0x30, 0xF5, 0x7C,
1227 0x18, 0xA4, 0xE0, 0xFA, 0xB4, 0x91, 0xD1, 0xCA,
1228 0xDF, 0xD5, 0x04, 0x83, 0x44, 0x9E, 0xDC, 0x0F,
1229 0x07, 0xFF, 0xB2, 0x4D, 0x2C, 0x6F, 0x9A, 0x9A,
1230 0x3B, 0xFF, 0x39, 0xAE, 0x3D, 0x57, 0xF5, 0x60,
1231 0x65, 0x4D, 0x7D, 0x75, 0xC9, 0x08, 0xAB, 0xE6,
1232 0x25, 0x64, 0x75, 0x3E, 0xAC, 0x39, 0xD7, 0x50,
1233 0x3D, 0xA6, 0xD3, 0x7C, 0x2E, 0x32, 0xE1, 0xAF,
1234 0x3B, 0x8A, 0xEC, 0x8A, 0xE3, 0x06, 0x9C, 0xD9
1235 };
1236
1237 test[167] = '\x80';
1238 SHA3_sponge(test, sizeof(test), out, sizeof(out), sizeof(test));
1239
1240 /*
1241 * Rationale behind keeping output [formatted as below] is that
1242 * one should be able to redirect it to a file, then copy-n-paste
1243 * final "output val" from official example to another file, and
1244 * compare the two with diff(1).
1245 */
1246 for (i = 0; i < sizeof(out);) {
1247 printf("%02X", out[i]);
1248 printf(++i % 16 && i != sizeof(out) ? " " : "\n");
1249 }
1250
1251 if (memcmp(out,result,sizeof(out))) {
1252 fprintf(stderr,"failure\n");
1253 return 1;
1254 } else {
1255 fprintf(stderr,"success\n");
1256 return 0;
1257 }
1258}
1259#endif
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette