VirtualBox

source: vbox/trunk/src/libs/openssl-1.1.1l/crypto/sha/keccak1600.c@ 91772

Last change on this file since 91772 was 91772, checked in by vboxsync, 3 years ago

openssl-1.1.1l: Applied and adjusted our OpenSSL changes to 1.1.1l. bugref:10126

File size: 41.2 KB
Line 
1/*
2 * Copyright 2016-2019 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the OpenSSL license (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10#include <openssl/e_os2.h>
11#include <string.h>
12#include <assert.h>
13
14size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
15 size_t r);
16void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r);
17
18#if !defined(KECCAK1600_ASM) || !defined(SELFTEST)
19
20/*
21 * Choose some sensible defaults
22 */
23#if !defined(KECCAK_REF) && !defined(KECCAK_1X) && !defined(KECCAK_1X_ALT) && \
24 !defined(KECCAK_2X) && !defined(KECCAK_INPLACE)
25# define KECCAK_2X /* default to KECCAK_2X variant */
26#endif
27
28#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
29# define KECCAK_COMPLEMENTING_TRANSFORM
30#endif
31
32#if defined(__x86_64__) || defined(__aarch64__) || \
33 defined(__mips64) || defined(__ia64) || \
34 (defined(__VMS) && !defined(__vax))
35/*
36 * These are available even in ILP32 flavours, but even then they are
37 * capable of performing 64-bit operations as efficiently as in *P64.
38 * Since it's not given that we can use sizeof(void *), just shunt it.
39 */
40# define BIT_INTERLEAVE (0)
41#else
42# define BIT_INTERLEAVE (sizeof(void *) < 8)
43#endif
44
45# ifdef VBOX
46# include <iprt/asm.h>
47# define ROL32(val, shift) ASMRotateLeftU32(val, shift)
48# define ROL64(val, shift) ASMRotateLeftU64(val, shift)
49# else /* !VBOX */
50#define ROL32(a, offset) (((a) << (offset)) | ((a) >> ((32 - (offset)) & 31)))
51
52static uint64_t ROL64(uint64_t val, int offset)
53{
54 if (offset == 0) {
55 return val;
56 } else if (!BIT_INTERLEAVE) {
57 return (val << offset) | (val >> (64-offset));
58 } else {
59 uint32_t hi = (uint32_t)(val >> 32), lo = (uint32_t)val;
60
61 if (offset & 1) {
62 uint32_t tmp = hi;
63
64 offset >>= 1;
65 hi = ROL32(lo, offset);
66 lo = ROL32(tmp, offset + 1);
67 } else {
68 offset >>= 1;
69 lo = ROL32(lo, offset);
70 hi = ROL32(hi, offset);
71 }
72
73 return ((uint64_t)hi << 32) | lo;
74 }
75}
76#endif /* !VBOX */
77
78static const unsigned char rhotates[5][5] = {
79 { 0, 1, 62, 28, 27 },
80 { 36, 44, 6, 55, 20 },
81 { 3, 10, 43, 25, 39 },
82 { 41, 45, 15, 21, 8 },
83 { 18, 2, 61, 56, 14 }
84};
85
86static const uint64_t iotas[] = {
87 BIT_INTERLEAVE ? 0x0000000000000001ULL : 0x0000000000000001ULL,
88 BIT_INTERLEAVE ? 0x0000008900000000ULL : 0x0000000000008082ULL,
89 BIT_INTERLEAVE ? 0x8000008b00000000ULL : 0x800000000000808aULL,
90 BIT_INTERLEAVE ? 0x8000808000000000ULL : 0x8000000080008000ULL,
91 BIT_INTERLEAVE ? 0x0000008b00000001ULL : 0x000000000000808bULL,
92 BIT_INTERLEAVE ? 0x0000800000000001ULL : 0x0000000080000001ULL,
93 BIT_INTERLEAVE ? 0x8000808800000001ULL : 0x8000000080008081ULL,
94 BIT_INTERLEAVE ? 0x8000008200000001ULL : 0x8000000000008009ULL,
95 BIT_INTERLEAVE ? 0x0000000b00000000ULL : 0x000000000000008aULL,
96 BIT_INTERLEAVE ? 0x0000000a00000000ULL : 0x0000000000000088ULL,
97 BIT_INTERLEAVE ? 0x0000808200000001ULL : 0x0000000080008009ULL,
98 BIT_INTERLEAVE ? 0x0000800300000000ULL : 0x000000008000000aULL,
99 BIT_INTERLEAVE ? 0x0000808b00000001ULL : 0x000000008000808bULL,
100 BIT_INTERLEAVE ? 0x8000000b00000001ULL : 0x800000000000008bULL,
101 BIT_INTERLEAVE ? 0x8000008a00000001ULL : 0x8000000000008089ULL,
102 BIT_INTERLEAVE ? 0x8000008100000001ULL : 0x8000000000008003ULL,
103 BIT_INTERLEAVE ? 0x8000008100000000ULL : 0x8000000000008002ULL,
104 BIT_INTERLEAVE ? 0x8000000800000000ULL : 0x8000000000000080ULL,
105 BIT_INTERLEAVE ? 0x0000008300000000ULL : 0x000000000000800aULL,
106 BIT_INTERLEAVE ? 0x8000800300000000ULL : 0x800000008000000aULL,
107 BIT_INTERLEAVE ? 0x8000808800000001ULL : 0x8000000080008081ULL,
108 BIT_INTERLEAVE ? 0x8000008800000000ULL : 0x8000000000008080ULL,
109 BIT_INTERLEAVE ? 0x0000800000000001ULL : 0x0000000080000001ULL,
110 BIT_INTERLEAVE ? 0x8000808200000000ULL : 0x8000000080008008ULL
111};
112
113#if defined(KECCAK_REF)
114/*
115 * This is straightforward or "maximum clarity" implementation aiming
116 * to resemble section 3.2 of the FIPS PUB 202 "SHA-3 Standard:
117 * Permutation-Based Hash and Extendible-Output Functions" as much as
118 * possible. With one caveat. Because of the way C stores matrices,
119 * references to A[x,y] in the specification are presented as A[y][x].
120 * Implementation unrolls inner x-loops so that modulo 5 operations are
121 * explicitly pre-computed.
122 */
123static void Theta(uint64_t A[5][5])
124{
125 uint64_t C[5], D[5];
126 size_t y;
127
128 C[0] = A[0][0];
129 C[1] = A[0][1];
130 C[2] = A[0][2];
131 C[3] = A[0][3];
132 C[4] = A[0][4];
133
134 for (y = 1; y < 5; y++) {
135 C[0] ^= A[y][0];
136 C[1] ^= A[y][1];
137 C[2] ^= A[y][2];
138 C[3] ^= A[y][3];
139 C[4] ^= A[y][4];
140 }
141
142 D[0] = ROL64(C[1], 1) ^ C[4];
143 D[1] = ROL64(C[2], 1) ^ C[0];
144 D[2] = ROL64(C[3], 1) ^ C[1];
145 D[3] = ROL64(C[4], 1) ^ C[2];
146 D[4] = ROL64(C[0], 1) ^ C[3];
147
148 for (y = 0; y < 5; y++) {
149 A[y][0] ^= D[0];
150 A[y][1] ^= D[1];
151 A[y][2] ^= D[2];
152 A[y][3] ^= D[3];
153 A[y][4] ^= D[4];
154 }
155}
156
157static void Rho(uint64_t A[5][5])
158{
159 size_t y;
160
161 for (y = 0; y < 5; y++) {
162 A[y][0] = ROL64(A[y][0], rhotates[y][0]);
163 A[y][1] = ROL64(A[y][1], rhotates[y][1]);
164 A[y][2] = ROL64(A[y][2], rhotates[y][2]);
165 A[y][3] = ROL64(A[y][3], rhotates[y][3]);
166 A[y][4] = ROL64(A[y][4], rhotates[y][4]);
167 }
168}
169
170static void Pi(uint64_t A[5][5])
171{
172 uint64_t T[5][5];
173
174 /*
175 * T = A
176 * A[y][x] = T[x][(3*y+x)%5]
177 */
178 memcpy(T, A, sizeof(T));
179
180 A[0][0] = T[0][0];
181 A[0][1] = T[1][1];
182 A[0][2] = T[2][2];
183 A[0][3] = T[3][3];
184 A[0][4] = T[4][4];
185
186 A[1][0] = T[0][3];
187 A[1][1] = T[1][4];
188 A[1][2] = T[2][0];
189 A[1][3] = T[3][1];
190 A[1][4] = T[4][2];
191
192 A[2][0] = T[0][1];
193 A[2][1] = T[1][2];
194 A[2][2] = T[2][3];
195 A[2][3] = T[3][4];
196 A[2][4] = T[4][0];
197
198 A[3][0] = T[0][4];
199 A[3][1] = T[1][0];
200 A[3][2] = T[2][1];
201 A[3][3] = T[3][2];
202 A[3][4] = T[4][3];
203
204 A[4][0] = T[0][2];
205 A[4][1] = T[1][3];
206 A[4][2] = T[2][4];
207 A[4][3] = T[3][0];
208 A[4][4] = T[4][1];
209}
210
211static void Chi(uint64_t A[5][5])
212{
213 uint64_t C[5];
214 size_t y;
215
216 for (y = 0; y < 5; y++) {
217 C[0] = A[y][0] ^ (~A[y][1] & A[y][2]);
218 C[1] = A[y][1] ^ (~A[y][2] & A[y][3]);
219 C[2] = A[y][2] ^ (~A[y][3] & A[y][4]);
220 C[3] = A[y][3] ^ (~A[y][4] & A[y][0]);
221 C[4] = A[y][4] ^ (~A[y][0] & A[y][1]);
222
223 A[y][0] = C[0];
224 A[y][1] = C[1];
225 A[y][2] = C[2];
226 A[y][3] = C[3];
227 A[y][4] = C[4];
228 }
229}
230
231static void Iota(uint64_t A[5][5], size_t i)
232{
233 assert(i < (sizeof(iotas) / sizeof(iotas[0])));
234 A[0][0] ^= iotas[i];
235}
236
237static void KeccakF1600(uint64_t A[5][5])
238{
239 size_t i;
240
241 for (i = 0; i < 24; i++) {
242 Theta(A);
243 Rho(A);
244 Pi(A);
245 Chi(A);
246 Iota(A, i);
247 }
248}
249
250#elif defined(KECCAK_1X)
251/*
252 * This implementation is optimization of above code featuring unroll
253 * of even y-loops, their fusion and code motion. It also minimizes
254 * temporary storage. Compiler would normally do all these things for
255 * you, purpose of manual optimization is to provide "unobscured"
256 * reference for assembly implementation [in case this approach is
257 * chosen for implementation on some platform]. In the nutshell it's
258 * equivalent of "plane-per-plane processing" approach discussed in
259 * section 2.4 of "Keccak implementation overview".
260 */
261static void Round(uint64_t A[5][5], size_t i)
262{
263 uint64_t C[5], E[2]; /* registers */
264 uint64_t D[5], T[2][5]; /* memory */
265
266 assert(i < (sizeof(iotas) / sizeof(iotas[0])));
267
268 C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
269 C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
270 C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
271 C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
272 C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
273
274#if defined(__arm__)
275 D[1] = E[0] = ROL64(C[2], 1) ^ C[0];
276 D[4] = E[1] = ROL64(C[0], 1) ^ C[3];
277 D[0] = C[0] = ROL64(C[1], 1) ^ C[4];
278 D[2] = C[1] = ROL64(C[3], 1) ^ C[1];
279 D[3] = C[2] = ROL64(C[4], 1) ^ C[2];
280
281 T[0][0] = A[3][0] ^ C[0]; /* borrow T[0][0] */
282 T[0][1] = A[0][1] ^ E[0]; /* D[1] */
283 T[0][2] = A[0][2] ^ C[1]; /* D[2] */
284 T[0][3] = A[0][3] ^ C[2]; /* D[3] */
285 T[0][4] = A[0][4] ^ E[1]; /* D[4] */
286
287 C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */
288 C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */
289 C[0] = A[0][0] ^ C[0]; /* rotate by 0 */ /* D[0] */
290 C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); /* D[2] */
291 C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); /* D[1] */
292#else
293 D[0] = ROL64(C[1], 1) ^ C[4];
294 D[1] = ROL64(C[2], 1) ^ C[0];
295 D[2] = ROL64(C[3], 1) ^ C[1];
296 D[3] = ROL64(C[4], 1) ^ C[2];
297 D[4] = ROL64(C[0], 1) ^ C[3];
298
299 T[0][0] = A[3][0] ^ D[0]; /* borrow T[0][0] */
300 T[0][1] = A[0][1] ^ D[1];
301 T[0][2] = A[0][2] ^ D[2];
302 T[0][3] = A[0][3] ^ D[3];
303 T[0][4] = A[0][4] ^ D[4];
304
305 C[0] = A[0][0] ^ D[0]; /* rotate by 0 */
306 C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
307 C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
308 C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
309 C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
310#endif
311 A[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
312 A[0][1] = C[1] ^ (~C[2] & C[3]);
313 A[0][2] = C[2] ^ (~C[3] & C[4]);
314 A[0][3] = C[3] ^ (~C[4] & C[0]);
315 A[0][4] = C[4] ^ (~C[0] & C[1]);
316
317 T[1][0] = A[1][0] ^ (C[3] = D[0]);
318 T[1][1] = A[2][1] ^ (C[4] = D[1]); /* borrow T[1][1] */
319 T[1][2] = A[1][2] ^ (E[0] = D[2]);
320 T[1][3] = A[1][3] ^ (E[1] = D[3]);
321 T[1][4] = A[2][4] ^ (C[2] = D[4]); /* borrow T[1][4] */
322
323 C[0] = ROL64(T[0][3], rhotates[0][3]);
324 C[1] = ROL64(A[1][4] ^ C[2], rhotates[1][4]); /* D[4] */
325 C[2] = ROL64(A[2][0] ^ C[3], rhotates[2][0]); /* D[0] */
326 C[3] = ROL64(A[3][1] ^ C[4], rhotates[3][1]); /* D[1] */
327 C[4] = ROL64(A[4][2] ^ E[0], rhotates[4][2]); /* D[2] */
328
329 A[1][0] = C[0] ^ (~C[1] & C[2]);
330 A[1][1] = C[1] ^ (~C[2] & C[3]);
331 A[1][2] = C[2] ^ (~C[3] & C[4]);
332 A[1][3] = C[3] ^ (~C[4] & C[0]);
333 A[1][4] = C[4] ^ (~C[0] & C[1]);
334
335 C[0] = ROL64(T[0][1], rhotates[0][1]);
336 C[1] = ROL64(T[1][2], rhotates[1][2]);
337 C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
338 C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
339 C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
340
341 A[2][0] = C[0] ^ (~C[1] & C[2]);
342 A[2][1] = C[1] ^ (~C[2] & C[3]);
343 A[2][2] = C[2] ^ (~C[3] & C[4]);
344 A[2][3] = C[3] ^ (~C[4] & C[0]);
345 A[2][4] = C[4] ^ (~C[0] & C[1]);
346
347 C[0] = ROL64(T[0][4], rhotates[0][4]);
348 C[1] = ROL64(T[1][0], rhotates[1][0]);
349 C[2] = ROL64(T[1][1], rhotates[2][1]); /* originally A[2][1] */
350 C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
351 C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
352
353 A[3][0] = C[0] ^ (~C[1] & C[2]);
354 A[3][1] = C[1] ^ (~C[2] & C[3]);
355 A[3][2] = C[2] ^ (~C[3] & C[4]);
356 A[3][3] = C[3] ^ (~C[4] & C[0]);
357 A[3][4] = C[4] ^ (~C[0] & C[1]);
358
359 C[0] = ROL64(T[0][2], rhotates[0][2]);
360 C[1] = ROL64(T[1][3], rhotates[1][3]);
361 C[2] = ROL64(T[1][4], rhotates[2][4]); /* originally A[2][4] */
362 C[3] = ROL64(T[0][0], rhotates[3][0]); /* originally A[3][0] */
363 C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
364
365 A[4][0] = C[0] ^ (~C[1] & C[2]);
366 A[4][1] = C[1] ^ (~C[2] & C[3]);
367 A[4][2] = C[2] ^ (~C[3] & C[4]);
368 A[4][3] = C[3] ^ (~C[4] & C[0]);
369 A[4][4] = C[4] ^ (~C[0] & C[1]);
370}
371
372static void KeccakF1600(uint64_t A[5][5])
373{
374 size_t i;
375
376 for (i = 0; i < 24; i++) {
377 Round(A, i);
378 }
379}
380
381#elif defined(KECCAK_1X_ALT)
382/*
383 * This is variant of above KECCAK_1X that reduces requirement for
384 * temporary storage even further, but at cost of more updates to A[][].
385 * It's less suitable if A[][] is memory bound, but better if it's
386 * register bound.
387 */
388
389static void Round(uint64_t A[5][5], size_t i)
390{
391 uint64_t C[5], D[5];
392
393 assert(i < (sizeof(iotas) / sizeof(iotas[0])));
394
395 C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
396 C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
397 C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
398 C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
399 C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
400
401 D[1] = C[0] ^ ROL64(C[2], 1);
402 D[2] = C[1] ^ ROL64(C[3], 1);
403 D[3] = C[2] ^= ROL64(C[4], 1);
404 D[4] = C[3] ^= ROL64(C[0], 1);
405 D[0] = C[4] ^= ROL64(C[1], 1);
406
407 A[0][1] ^= D[1];
408 A[1][1] ^= D[1];
409 A[2][1] ^= D[1];
410 A[3][1] ^= D[1];
411 A[4][1] ^= D[1];
412
413 A[0][2] ^= D[2];
414 A[1][2] ^= D[2];
415 A[2][2] ^= D[2];
416 A[3][2] ^= D[2];
417 A[4][2] ^= D[2];
418
419 A[0][3] ^= C[2];
420 A[1][3] ^= C[2];
421 A[2][3] ^= C[2];
422 A[3][3] ^= C[2];
423 A[4][3] ^= C[2];
424
425 A[0][4] ^= C[3];
426 A[1][4] ^= C[3];
427 A[2][4] ^= C[3];
428 A[3][4] ^= C[3];
429 A[4][4] ^= C[3];
430
431 A[0][0] ^= C[4];
432 A[1][0] ^= C[4];
433 A[2][0] ^= C[4];
434 A[3][0] ^= C[4];
435 A[4][0] ^= C[4];
436
437 C[1] = A[0][1];
438 C[2] = A[0][2];
439 C[3] = A[0][3];
440 C[4] = A[0][4];
441
442 A[0][1] = ROL64(A[1][1], rhotates[1][1]);
443 A[0][2] = ROL64(A[2][2], rhotates[2][2]);
444 A[0][3] = ROL64(A[3][3], rhotates[3][3]);
445 A[0][4] = ROL64(A[4][4], rhotates[4][4]);
446
447 A[1][1] = ROL64(A[1][4], rhotates[1][4]);
448 A[2][2] = ROL64(A[2][3], rhotates[2][3]);
449 A[3][3] = ROL64(A[3][2], rhotates[3][2]);
450 A[4][4] = ROL64(A[4][1], rhotates[4][1]);
451
452 A[1][4] = ROL64(A[4][2], rhotates[4][2]);
453 A[2][3] = ROL64(A[3][4], rhotates[3][4]);
454 A[3][2] = ROL64(A[2][1], rhotates[2][1]);
455 A[4][1] = ROL64(A[1][3], rhotates[1][3]);
456
457 A[4][2] = ROL64(A[2][4], rhotates[2][4]);
458 A[3][4] = ROL64(A[4][3], rhotates[4][3]);
459 A[2][1] = ROL64(A[1][2], rhotates[1][2]);
460 A[1][3] = ROL64(A[3][1], rhotates[3][1]);
461
462 A[2][4] = ROL64(A[4][0], rhotates[4][0]);
463 A[4][3] = ROL64(A[3][0], rhotates[3][0]);
464 A[1][2] = ROL64(A[2][0], rhotates[2][0]);
465 A[3][1] = ROL64(A[1][0], rhotates[1][0]);
466
467 A[1][0] = ROL64(C[3], rhotates[0][3]);
468 A[2][0] = ROL64(C[1], rhotates[0][1]);
469 A[3][0] = ROL64(C[4], rhotates[0][4]);
470 A[4][0] = ROL64(C[2], rhotates[0][2]);
471
472 C[0] = A[0][0];
473 C[1] = A[1][0];
474 D[0] = A[0][1];
475 D[1] = A[1][1];
476
477 A[0][0] ^= (~A[0][1] & A[0][2]);
478 A[1][0] ^= (~A[1][1] & A[1][2]);
479 A[0][1] ^= (~A[0][2] & A[0][3]);
480 A[1][1] ^= (~A[1][2] & A[1][3]);
481 A[0][2] ^= (~A[0][3] & A[0][4]);
482 A[1][2] ^= (~A[1][3] & A[1][4]);
483 A[0][3] ^= (~A[0][4] & C[0]);
484 A[1][3] ^= (~A[1][4] & C[1]);
485 A[0][4] ^= (~C[0] & D[0]);
486 A[1][4] ^= (~C[1] & D[1]);
487
488 C[2] = A[2][0];
489 C[3] = A[3][0];
490 D[2] = A[2][1];
491 D[3] = A[3][1];
492
493 A[2][0] ^= (~A[2][1] & A[2][2]);
494 A[3][0] ^= (~A[3][1] & A[3][2]);
495 A[2][1] ^= (~A[2][2] & A[2][3]);
496 A[3][1] ^= (~A[3][2] & A[3][3]);
497 A[2][2] ^= (~A[2][3] & A[2][4]);
498 A[3][2] ^= (~A[3][3] & A[3][4]);
499 A[2][3] ^= (~A[2][4] & C[2]);
500 A[3][3] ^= (~A[3][4] & C[3]);
501 A[2][4] ^= (~C[2] & D[2]);
502 A[3][4] ^= (~C[3] & D[3]);
503
504 C[4] = A[4][0];
505 D[4] = A[4][1];
506
507 A[4][0] ^= (~A[4][1] & A[4][2]);
508 A[4][1] ^= (~A[4][2] & A[4][3]);
509 A[4][2] ^= (~A[4][3] & A[4][4]);
510 A[4][3] ^= (~A[4][4] & C[4]);
511 A[4][4] ^= (~C[4] & D[4]);
512 A[0][0] ^= iotas[i];
513}
514
515static void KeccakF1600(uint64_t A[5][5])
516{
517 size_t i;
518
519 for (i = 0; i < 24; i++) {
520 Round(A, i);
521 }
522}
523
524#elif defined(KECCAK_2X)
525/*
526 * This implementation is variant of KECCAK_1X above with outer-most
527 * round loop unrolled twice. This allows to take temporary storage
528 * out of round procedure and simplify references to it by alternating
529 * it with actual data (see round loop below). Originally it was meant
530 * rather as reference for an assembly implementation, but it seems to
531 * play best with compilers [as well as provide best instruction per
532 * processed byte ratio at minimal round unroll factor]...
533 */
534static void Round(uint64_t R[5][5], uint64_t A[5][5], size_t i)
535{
536 uint64_t C[5], D[5];
537
538 assert(i < (sizeof(iotas) / sizeof(iotas[0])));
539
540 C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
541 C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
542 C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
543 C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
544 C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
545
546 D[0] = ROL64(C[1], 1) ^ C[4];
547 D[1] = ROL64(C[2], 1) ^ C[0];
548 D[2] = ROL64(C[3], 1) ^ C[1];
549 D[3] = ROL64(C[4], 1) ^ C[2];
550 D[4] = ROL64(C[0], 1) ^ C[3];
551
552 C[0] = A[0][0] ^ D[0]; /* rotate by 0 */
553 C[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
554 C[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
555 C[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
556 C[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
557
558#ifdef KECCAK_COMPLEMENTING_TRANSFORM
559 R[0][0] = C[0] ^ ( C[1] | C[2]) ^ iotas[i];
560 R[0][1] = C[1] ^ (~C[2] | C[3]);
561 R[0][2] = C[2] ^ ( C[3] & C[4]);
562 R[0][3] = C[3] ^ ( C[4] | C[0]);
563 R[0][4] = C[4] ^ ( C[0] & C[1]);
564#else
565 R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
566 R[0][1] = C[1] ^ (~C[2] & C[3]);
567 R[0][2] = C[2] ^ (~C[3] & C[4]);
568 R[0][3] = C[3] ^ (~C[4] & C[0]);
569 R[0][4] = C[4] ^ (~C[0] & C[1]);
570#endif
571
572 C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
573 C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
574 C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
575 C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
576 C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
577
578#ifdef KECCAK_COMPLEMENTING_TRANSFORM
579 R[1][0] = C[0] ^ (C[1] | C[2]);
580 R[1][1] = C[1] ^ (C[2] & C[3]);
581 R[1][2] = C[2] ^ (C[3] | ~C[4]);
582 R[1][3] = C[3] ^ (C[4] | C[0]);
583 R[1][4] = C[4] ^ (C[0] & C[1]);
584#else
585 R[1][0] = C[0] ^ (~C[1] & C[2]);
586 R[1][1] = C[1] ^ (~C[2] & C[3]);
587 R[1][2] = C[2] ^ (~C[3] & C[4]);
588 R[1][3] = C[3] ^ (~C[4] & C[0]);
589 R[1][4] = C[4] ^ (~C[0] & C[1]);
590#endif
591
592 C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
593 C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
594 C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
595 C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
596 C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
597
598#ifdef KECCAK_COMPLEMENTING_TRANSFORM
599 R[2][0] = C[0] ^ ( C[1] | C[2]);
600 R[2][1] = C[1] ^ ( C[2] & C[3]);
601 R[2][2] = C[2] ^ (~C[3] & C[4]);
602 R[2][3] = ~C[3] ^ ( C[4] | C[0]);
603 R[2][4] = C[4] ^ ( C[0] & C[1]);
604#else
605 R[2][0] = C[0] ^ (~C[1] & C[2]);
606 R[2][1] = C[1] ^ (~C[2] & C[3]);
607 R[2][2] = C[2] ^ (~C[3] & C[4]);
608 R[2][3] = C[3] ^ (~C[4] & C[0]);
609 R[2][4] = C[4] ^ (~C[0] & C[1]);
610#endif
611
612 C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
613 C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
614 C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
615 C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
616 C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
617
618#ifdef KECCAK_COMPLEMENTING_TRANSFORM
619 R[3][0] = C[0] ^ ( C[1] & C[2]);
620 R[3][1] = C[1] ^ ( C[2] | C[3]);
621 R[3][2] = C[2] ^ (~C[3] | C[4]);
622 R[3][3] = ~C[3] ^ ( C[4] & C[0]);
623 R[3][4] = C[4] ^ ( C[0] | C[1]);
624#else
625 R[3][0] = C[0] ^ (~C[1] & C[2]);
626 R[3][1] = C[1] ^ (~C[2] & C[3]);
627 R[3][2] = C[2] ^ (~C[3] & C[4]);
628 R[3][3] = C[3] ^ (~C[4] & C[0]);
629 R[3][4] = C[4] ^ (~C[0] & C[1]);
630#endif
631
632 C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
633 C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
634 C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
635 C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
636 C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
637
638#ifdef KECCAK_COMPLEMENTING_TRANSFORM
639 R[4][0] = C[0] ^ (~C[1] & C[2]);
640 R[4][1] = ~C[1] ^ ( C[2] | C[3]);
641 R[4][2] = C[2] ^ ( C[3] & C[4]);
642 R[4][3] = C[3] ^ ( C[4] | C[0]);
643 R[4][4] = C[4] ^ ( C[0] & C[1]);
644#else
645 R[4][0] = C[0] ^ (~C[1] & C[2]);
646 R[4][1] = C[1] ^ (~C[2] & C[3]);
647 R[4][2] = C[2] ^ (~C[3] & C[4]);
648 R[4][3] = C[3] ^ (~C[4] & C[0]);
649 R[4][4] = C[4] ^ (~C[0] & C[1]);
650#endif
651}
652
653static void KeccakF1600(uint64_t A[5][5])
654{
655 uint64_t T[5][5];
656 size_t i;
657
658#ifdef KECCAK_COMPLEMENTING_TRANSFORM
659 A[0][1] = ~A[0][1];
660 A[0][2] = ~A[0][2];
661 A[1][3] = ~A[1][3];
662 A[2][2] = ~A[2][2];
663 A[3][2] = ~A[3][2];
664 A[4][0] = ~A[4][0];
665#endif
666
667 for (i = 0; i < 24; i += 2) {
668 Round(T, A, i);
669 Round(A, T, i + 1);
670 }
671
672#ifdef KECCAK_COMPLEMENTING_TRANSFORM
673 A[0][1] = ~A[0][1];
674 A[0][2] = ~A[0][2];
675 A[1][3] = ~A[1][3];
676 A[2][2] = ~A[2][2];
677 A[3][2] = ~A[3][2];
678 A[4][0] = ~A[4][0];
679#endif
680}
681
682#else /* define KECCAK_INPLACE to compile this code path */
683/*
684 * This implementation is KECCAK_1X from above combined 4 times with
685 * a twist that allows to omit temporary storage and perform in-place
686 * processing. It's discussed in section 2.5 of "Keccak implementation
687 * overview". It's likely to be best suited for processors with large
688 * register bank... On the other hand processor with large register
689 * bank can as well use KECCAK_1X_ALT, it would be as fast but much
690 * more compact...
691 */
692static void FourRounds(uint64_t A[5][5], size_t i)
693{
694 uint64_t B[5], C[5], D[5];
695
696 assert(i <= (sizeof(iotas) / sizeof(iotas[0]) - 4));
697
698 /* Round 4*n */
699 C[0] = A[0][0] ^ A[1][0] ^ A[2][0] ^ A[3][0] ^ A[4][0];
700 C[1] = A[0][1] ^ A[1][1] ^ A[2][1] ^ A[3][1] ^ A[4][1];
701 C[2] = A[0][2] ^ A[1][2] ^ A[2][2] ^ A[3][2] ^ A[4][2];
702 C[3] = A[0][3] ^ A[1][3] ^ A[2][3] ^ A[3][3] ^ A[4][3];
703 C[4] = A[0][4] ^ A[1][4] ^ A[2][4] ^ A[3][4] ^ A[4][4];
704
705 D[0] = ROL64(C[1], 1) ^ C[4];
706 D[1] = ROL64(C[2], 1) ^ C[0];
707 D[2] = ROL64(C[3], 1) ^ C[1];
708 D[3] = ROL64(C[4], 1) ^ C[2];
709 D[4] = ROL64(C[0], 1) ^ C[3];
710
711 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
712 B[1] = ROL64(A[1][1] ^ D[1], rhotates[1][1]);
713 B[2] = ROL64(A[2][2] ^ D[2], rhotates[2][2]);
714 B[3] = ROL64(A[3][3] ^ D[3], rhotates[3][3]);
715 B[4] = ROL64(A[4][4] ^ D[4], rhotates[4][4]);
716
717 C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i];
718 C[1] = A[1][1] = B[1] ^ (~B[2] & B[3]);
719 C[2] = A[2][2] = B[2] ^ (~B[3] & B[4]);
720 C[3] = A[3][3] = B[3] ^ (~B[4] & B[0]);
721 C[4] = A[4][4] = B[4] ^ (~B[0] & B[1]);
722
723 B[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
724 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
725 B[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
726 B[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
727 B[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
728
729 C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
730 C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
731 C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
732 C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
733 C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
734
735 B[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
736 B[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
737 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
738 B[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
739 B[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
740
741 C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
742 C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
743 C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
744 C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
745 C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
746
747 B[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
748 B[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
749 B[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
750 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
751 B[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
752
753 C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
754 C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
755 C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
756 C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
757 C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
758
759 B[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
760 B[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
761 B[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
762 B[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
763 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
764
765 C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
766 C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
767 C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
768 C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
769 C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
770
771 /* Round 4*n+1 */
772 D[0] = ROL64(C[1], 1) ^ C[4];
773 D[1] = ROL64(C[2], 1) ^ C[0];
774 D[2] = ROL64(C[3], 1) ^ C[1];
775 D[3] = ROL64(C[4], 1) ^ C[2];
776 D[4] = ROL64(C[0], 1) ^ C[3];
777
778 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
779 B[1] = ROL64(A[3][1] ^ D[1], rhotates[1][1]);
780 B[2] = ROL64(A[1][2] ^ D[2], rhotates[2][2]);
781 B[3] = ROL64(A[4][3] ^ D[3], rhotates[3][3]);
782 B[4] = ROL64(A[2][4] ^ D[4], rhotates[4][4]);
783
784 C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 1];
785 C[1] = A[3][1] = B[1] ^ (~B[2] & B[3]);
786 C[2] = A[1][2] = B[2] ^ (~B[3] & B[4]);
787 C[3] = A[4][3] = B[3] ^ (~B[4] & B[0]);
788 C[4] = A[2][4] = B[4] ^ (~B[0] & B[1]);
789
790 B[0] = ROL64(A[3][3] ^ D[3], rhotates[0][3]);
791 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
792 B[2] = ROL64(A[4][0] ^ D[0], rhotates[2][0]);
793 B[3] = ROL64(A[2][1] ^ D[1], rhotates[3][1]);
794 B[4] = ROL64(A[0][2] ^ D[2], rhotates[4][2]);
795
796 C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
797 C[1] ^= A[2][1] = B[1] ^ (~B[2] & B[3]);
798 C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
799 C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
800 C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
801
802 B[0] = ROL64(A[1][1] ^ D[1], rhotates[0][1]);
803 B[1] = ROL64(A[4][2] ^ D[2], rhotates[1][2]);
804 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
805 B[3] = ROL64(A[0][4] ^ D[4], rhotates[3][4]);
806 B[4] = ROL64(A[3][0] ^ D[0], rhotates[4][0]);
807
808 C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
809 C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
810 C[2] ^= A[4][2] = B[2] ^ (~B[3] & B[4]);
811 C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
812 C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
813
814 B[0] = ROL64(A[4][4] ^ D[4], rhotates[0][4]);
815 B[1] = ROL64(A[2][0] ^ D[0], rhotates[1][0]);
816 B[2] = ROL64(A[0][1] ^ D[1], rhotates[2][1]);
817 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
818 B[4] = ROL64(A[1][3] ^ D[3], rhotates[4][3]);
819
820 C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
821 C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
822 C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
823 C[3] ^= A[1][3] = B[3] ^ (~B[4] & B[0]);
824 C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
825
826 B[0] = ROL64(A[2][2] ^ D[2], rhotates[0][2]);
827 B[1] = ROL64(A[0][3] ^ D[3], rhotates[1][3]);
828 B[2] = ROL64(A[3][4] ^ D[4], rhotates[2][4]);
829 B[3] = ROL64(A[1][0] ^ D[0], rhotates[3][0]);
830 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
831
832 C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
833 C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
834 C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
835 C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
836 C[4] ^= A[3][4] = B[4] ^ (~B[0] & B[1]);
837
838 /* Round 4*n+2 */
839 D[0] = ROL64(C[1], 1) ^ C[4];
840 D[1] = ROL64(C[2], 1) ^ C[0];
841 D[2] = ROL64(C[3], 1) ^ C[1];
842 D[3] = ROL64(C[4], 1) ^ C[2];
843 D[4] = ROL64(C[0], 1) ^ C[3];
844
845 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
846 B[1] = ROL64(A[2][1] ^ D[1], rhotates[1][1]);
847 B[2] = ROL64(A[4][2] ^ D[2], rhotates[2][2]);
848 B[3] = ROL64(A[1][3] ^ D[3], rhotates[3][3]);
849 B[4] = ROL64(A[3][4] ^ D[4], rhotates[4][4]);
850
851 C[0] = A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 2];
852 C[1] = A[2][1] = B[1] ^ (~B[2] & B[3]);
853 C[2] = A[4][2] = B[2] ^ (~B[3] & B[4]);
854 C[3] = A[1][3] = B[3] ^ (~B[4] & B[0]);
855 C[4] = A[3][4] = B[4] ^ (~B[0] & B[1]);
856
857 B[0] = ROL64(A[4][3] ^ D[3], rhotates[0][3]);
858 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
859 B[2] = ROL64(A[3][0] ^ D[0], rhotates[2][0]);
860 B[3] = ROL64(A[0][1] ^ D[1], rhotates[3][1]);
861 B[4] = ROL64(A[2][2] ^ D[2], rhotates[4][2]);
862
863 C[0] ^= A[3][0] = B[0] ^ (~B[1] & B[2]);
864 C[1] ^= A[0][1] = B[1] ^ (~B[2] & B[3]);
865 C[2] ^= A[2][2] = B[2] ^ (~B[3] & B[4]);
866 C[3] ^= A[4][3] = B[3] ^ (~B[4] & B[0]);
867 C[4] ^= A[1][4] = B[4] ^ (~B[0] & B[1]);
868
869 B[0] = ROL64(A[3][1] ^ D[1], rhotates[0][1]);
870 B[1] = ROL64(A[0][2] ^ D[2], rhotates[1][2]);
871 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
872 B[3] = ROL64(A[4][4] ^ D[4], rhotates[3][4]);
873 B[4] = ROL64(A[1][0] ^ D[0], rhotates[4][0]);
874
875 C[0] ^= A[1][0] = B[0] ^ (~B[1] & B[2]);
876 C[1] ^= A[3][1] = B[1] ^ (~B[2] & B[3]);
877 C[2] ^= A[0][2] = B[2] ^ (~B[3] & B[4]);
878 C[3] ^= A[2][3] = B[3] ^ (~B[4] & B[0]);
879 C[4] ^= A[4][4] = B[4] ^ (~B[0] & B[1]);
880
881 B[0] = ROL64(A[2][4] ^ D[4], rhotates[0][4]);
882 B[1] = ROL64(A[4][0] ^ D[0], rhotates[1][0]);
883 B[2] = ROL64(A[1][1] ^ D[1], rhotates[2][1]);
884 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
885 B[4] = ROL64(A[0][3] ^ D[3], rhotates[4][3]);
886
887 C[0] ^= A[4][0] = B[0] ^ (~B[1] & B[2]);
888 C[1] ^= A[1][1] = B[1] ^ (~B[2] & B[3]);
889 C[2] ^= A[3][2] = B[2] ^ (~B[3] & B[4]);
890 C[3] ^= A[0][3] = B[3] ^ (~B[4] & B[0]);
891 C[4] ^= A[2][4] = B[4] ^ (~B[0] & B[1]);
892
893 B[0] = ROL64(A[1][2] ^ D[2], rhotates[0][2]);
894 B[1] = ROL64(A[3][3] ^ D[3], rhotates[1][3]);
895 B[2] = ROL64(A[0][4] ^ D[4], rhotates[2][4]);
896 B[3] = ROL64(A[2][0] ^ D[0], rhotates[3][0]);
897 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
898
899 C[0] ^= A[2][0] = B[0] ^ (~B[1] & B[2]);
900 C[1] ^= A[4][1] = B[1] ^ (~B[2] & B[3]);
901 C[2] ^= A[1][2] = B[2] ^ (~B[3] & B[4]);
902 C[3] ^= A[3][3] = B[3] ^ (~B[4] & B[0]);
903 C[4] ^= A[0][4] = B[4] ^ (~B[0] & B[1]);
904
905 /* Round 4*n+3 */
906 D[0] = ROL64(C[1], 1) ^ C[4];
907 D[1] = ROL64(C[2], 1) ^ C[0];
908 D[2] = ROL64(C[3], 1) ^ C[1];
909 D[3] = ROL64(C[4], 1) ^ C[2];
910 D[4] = ROL64(C[0], 1) ^ C[3];
911
912 B[0] = A[0][0] ^ D[0]; /* rotate by 0 */
913 B[1] = ROL64(A[0][1] ^ D[1], rhotates[1][1]);
914 B[2] = ROL64(A[0][2] ^ D[2], rhotates[2][2]);
915 B[3] = ROL64(A[0][3] ^ D[3], rhotates[3][3]);
916 B[4] = ROL64(A[0][4] ^ D[4], rhotates[4][4]);
917
918 /* C[0] = */ A[0][0] = B[0] ^ (~B[1] & B[2]) ^ iotas[i + 3];
919 /* C[1] = */ A[0][1] = B[1] ^ (~B[2] & B[3]);
920 /* C[2] = */ A[0][2] = B[2] ^ (~B[3] & B[4]);
921 /* C[3] = */ A[0][3] = B[3] ^ (~B[4] & B[0]);
922 /* C[4] = */ A[0][4] = B[4] ^ (~B[0] & B[1]);
923
924 B[0] = ROL64(A[1][3] ^ D[3], rhotates[0][3]);
925 B[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
926 B[2] = ROL64(A[1][0] ^ D[0], rhotates[2][0]);
927 B[3] = ROL64(A[1][1] ^ D[1], rhotates[3][1]);
928 B[4] = ROL64(A[1][2] ^ D[2], rhotates[4][2]);
929
930 /* C[0] ^= */ A[1][0] = B[0] ^ (~B[1] & B[2]);
931 /* C[1] ^= */ A[1][1] = B[1] ^ (~B[2] & B[3]);
932 /* C[2] ^= */ A[1][2] = B[2] ^ (~B[3] & B[4]);
933 /* C[3] ^= */ A[1][3] = B[3] ^ (~B[4] & B[0]);
934 /* C[4] ^= */ A[1][4] = B[4] ^ (~B[0] & B[1]);
935
936 B[0] = ROL64(A[2][1] ^ D[1], rhotates[0][1]);
937 B[1] = ROL64(A[2][2] ^ D[2], rhotates[1][2]);
938 B[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
939 B[3] = ROL64(A[2][4] ^ D[4], rhotates[3][4]);
940 B[4] = ROL64(A[2][0] ^ D[0], rhotates[4][0]);
941
942 /* C[0] ^= */ A[2][0] = B[0] ^ (~B[1] & B[2]);
943 /* C[1] ^= */ A[2][1] = B[1] ^ (~B[2] & B[3]);
944 /* C[2] ^= */ A[2][2] = B[2] ^ (~B[3] & B[4]);
945 /* C[3] ^= */ A[2][3] = B[3] ^ (~B[4] & B[0]);
946 /* C[4] ^= */ A[2][4] = B[4] ^ (~B[0] & B[1]);
947
948 B[0] = ROL64(A[3][4] ^ D[4], rhotates[0][4]);
949 B[1] = ROL64(A[3][0] ^ D[0], rhotates[1][0]);
950 B[2] = ROL64(A[3][1] ^ D[1], rhotates[2][1]);
951 B[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
952 B[4] = ROL64(A[3][3] ^ D[3], rhotates[4][3]);
953
954 /* C[0] ^= */ A[3][0] = B[0] ^ (~B[1] & B[2]);
955 /* C[1] ^= */ A[3][1] = B[1] ^ (~B[2] & B[3]);
956 /* C[2] ^= */ A[3][2] = B[2] ^ (~B[3] & B[4]);
957 /* C[3] ^= */ A[3][3] = B[3] ^ (~B[4] & B[0]);
958 /* C[4] ^= */ A[3][4] = B[4] ^ (~B[0] & B[1]);
959
960 B[0] = ROL64(A[4][2] ^ D[2], rhotates[0][2]);
961 B[1] = ROL64(A[4][3] ^ D[3], rhotates[1][3]);
962 B[2] = ROL64(A[4][4] ^ D[4], rhotates[2][4]);
963 B[3] = ROL64(A[4][0] ^ D[0], rhotates[3][0]);
964 B[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
965
966 /* C[0] ^= */ A[4][0] = B[0] ^ (~B[1] & B[2]);
967 /* C[1] ^= */ A[4][1] = B[1] ^ (~B[2] & B[3]);
968 /* C[2] ^= */ A[4][2] = B[2] ^ (~B[3] & B[4]);
969 /* C[3] ^= */ A[4][3] = B[3] ^ (~B[4] & B[0]);
970 /* C[4] ^= */ A[4][4] = B[4] ^ (~B[0] & B[1]);
971}
972
973static void KeccakF1600(uint64_t A[5][5])
974{
975 size_t i;
976
977 for (i = 0; i < 24; i += 4) {
978 FourRounds(A, i);
979 }
980}
981
982#endif
983
984static uint64_t BitInterleave(uint64_t Ai)
985{
986 if (BIT_INTERLEAVE) {
987 uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;
988 uint32_t t0, t1;
989
990 t0 = lo & 0x55555555;
991 t0 |= t0 >> 1; t0 &= 0x33333333;
992 t0 |= t0 >> 2; t0 &= 0x0f0f0f0f;
993 t0 |= t0 >> 4; t0 &= 0x00ff00ff;
994 t0 |= t0 >> 8; t0 &= 0x0000ffff;
995
996 t1 = hi & 0x55555555;
997 t1 |= t1 >> 1; t1 &= 0x33333333;
998 t1 |= t1 >> 2; t1 &= 0x0f0f0f0f;
999 t1 |= t1 >> 4; t1 &= 0x00ff00ff;
1000 t1 |= t1 >> 8; t1 <<= 16;
1001
1002 lo &= 0xaaaaaaaa;
1003 lo |= lo << 1; lo &= 0xcccccccc;
1004 lo |= lo << 2; lo &= 0xf0f0f0f0;
1005 lo |= lo << 4; lo &= 0xff00ff00;
1006 lo |= lo << 8; lo >>= 16;
1007
1008 hi &= 0xaaaaaaaa;
1009 hi |= hi << 1; hi &= 0xcccccccc;
1010 hi |= hi << 2; hi &= 0xf0f0f0f0;
1011 hi |= hi << 4; hi &= 0xff00ff00;
1012 hi |= hi << 8; hi &= 0xffff0000;
1013
1014 Ai = ((uint64_t)(hi | lo) << 32) | (t1 | t0);
1015 }
1016
1017 return Ai;
1018}
1019
1020static uint64_t BitDeinterleave(uint64_t Ai)
1021{
1022 if (BIT_INTERLEAVE) {
1023 uint32_t hi = (uint32_t)(Ai >> 32), lo = (uint32_t)Ai;
1024 uint32_t t0, t1;
1025
1026 t0 = lo & 0x0000ffff;
1027 t0 |= t0 << 8; t0 &= 0x00ff00ff;
1028 t0 |= t0 << 4; t0 &= 0x0f0f0f0f;
1029 t0 |= t0 << 2; t0 &= 0x33333333;
1030 t0 |= t0 << 1; t0 &= 0x55555555;
1031
1032 t1 = hi << 16;
1033 t1 |= t1 >> 8; t1 &= 0xff00ff00;
1034 t1 |= t1 >> 4; t1 &= 0xf0f0f0f0;
1035 t1 |= t1 >> 2; t1 &= 0xcccccccc;
1036 t1 |= t1 >> 1; t1 &= 0xaaaaaaaa;
1037
1038 lo >>= 16;
1039 lo |= lo << 8; lo &= 0x00ff00ff;
1040 lo |= lo << 4; lo &= 0x0f0f0f0f;
1041 lo |= lo << 2; lo &= 0x33333333;
1042 lo |= lo << 1; lo &= 0x55555555;
1043
1044 hi &= 0xffff0000;
1045 hi |= hi >> 8; hi &= 0xff00ff00;
1046 hi |= hi >> 4; hi &= 0xf0f0f0f0;
1047 hi |= hi >> 2; hi &= 0xcccccccc;
1048 hi |= hi >> 1; hi &= 0xaaaaaaaa;
1049
1050 Ai = ((uint64_t)(hi | lo) << 32) | (t1 | t0);
1051 }
1052
1053 return Ai;
1054}
1055
1056/*
1057 * SHA3_absorb can be called multiple times, but at each invocation
1058 * largest multiple of |r| out of |len| bytes are processed. Then
1059 * remaining amount of bytes is returned. This is done to spare caller
1060 * trouble of calculating the largest multiple of |r|. |r| can be viewed
1061 * as blocksize. It is commonly (1600 - 256*n)/8, e.g. 168, 136, 104,
1062 * 72, but can also be (1600 - 448)/8 = 144. All this means that message
1063 * padding and intermediate sub-block buffering, byte- or bitwise, is
1064 * caller's responsibility.
1065 */
1066size_t SHA3_absorb(uint64_t A[5][5], const unsigned char *inp, size_t len,
1067 size_t r)
1068{
1069 uint64_t *A_flat = (uint64_t *)A;
1070 size_t i, w = r / 8;
1071
1072 assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
1073
1074 while (len >= r) {
1075 for (i = 0; i < w; i++) {
1076 uint64_t Ai = (uint64_t)inp[0] | (uint64_t)inp[1] << 8 |
1077 (uint64_t)inp[2] << 16 | (uint64_t)inp[3] << 24 |
1078 (uint64_t)inp[4] << 32 | (uint64_t)inp[5] << 40 |
1079 (uint64_t)inp[6] << 48 | (uint64_t)inp[7] << 56;
1080 inp += 8;
1081
1082 A_flat[i] ^= BitInterleave(Ai);
1083 }
1084 KeccakF1600(A);
1085 len -= r;
1086 }
1087
1088 return len;
1089}
1090
1091/*
1092 * SHA3_squeeze is called once at the end to generate |out| hash value
1093 * of |len| bytes.
1094 */
1095void SHA3_squeeze(uint64_t A[5][5], unsigned char *out, size_t len, size_t r)
1096{
1097 uint64_t *A_flat = (uint64_t *)A;
1098 size_t i, w = r / 8;
1099
1100 assert(r < (25 * sizeof(A[0][0])) && (r % 8) == 0);
1101
1102 while (len != 0) {
1103 for (i = 0; i < w && len != 0; i++) {
1104 uint64_t Ai = BitDeinterleave(A_flat[i]);
1105
1106 if (len < 8) {
1107 for (i = 0; i < len; i++) {
1108 *out++ = (unsigned char)Ai;
1109 Ai >>= 8;
1110 }
1111 return;
1112 }
1113
1114 out[0] = (unsigned char)(Ai);
1115 out[1] = (unsigned char)(Ai >> 8);
1116 out[2] = (unsigned char)(Ai >> 16);
1117 out[3] = (unsigned char)(Ai >> 24);
1118 out[4] = (unsigned char)(Ai >> 32);
1119 out[5] = (unsigned char)(Ai >> 40);
1120 out[6] = (unsigned char)(Ai >> 48);
1121 out[7] = (unsigned char)(Ai >> 56);
1122 out += 8;
1123 len -= 8;
1124 }
1125 if (len)
1126 KeccakF1600(A);
1127 }
1128}
1129#endif
1130
1131#ifdef SELFTEST
1132/*
1133 * Post-padding one-shot implementations would look as following:
1134 *
1135 * SHA3_224 SHA3_sponge(inp, len, out, 224/8, (1600-448)/8);
1136 * SHA3_256 SHA3_sponge(inp, len, out, 256/8, (1600-512)/8);
1137 * SHA3_384 SHA3_sponge(inp, len, out, 384/8, (1600-768)/8);
1138 * SHA3_512 SHA3_sponge(inp, len, out, 512/8, (1600-1024)/8);
1139 * SHAKE_128 SHA3_sponge(inp, len, out, d, (1600-256)/8);
1140 * SHAKE_256 SHA3_sponge(inp, len, out, d, (1600-512)/8);
1141 */
1142
1143void SHA3_sponge(const unsigned char *inp, size_t len,
1144 unsigned char *out, size_t d, size_t r)
1145{
1146 uint64_t A[5][5];
1147
1148 memset(A, 0, sizeof(A));
1149 SHA3_absorb(A, inp, len, r);
1150 SHA3_squeeze(A, out, d, r);
1151}
1152
1153# include <stdio.h>
1154
1155int main()
1156{
1157 /*
1158 * This is 5-bit SHAKE128 test from http://csrc.nist.gov/groups/ST/toolkit/examples.html#aHashing
1159 */
1160 unsigned char test[168] = { '\xf3', '\x3' };
1161 unsigned char out[512];
1162 size_t i;
1163 static const unsigned char result[512] = {
1164 0x2E, 0x0A, 0xBF, 0xBA, 0x83, 0xE6, 0x72, 0x0B,
1165 0xFB, 0xC2, 0x25, 0xFF, 0x6B, 0x7A, 0xB9, 0xFF,
1166 0xCE, 0x58, 0xBA, 0x02, 0x7E, 0xE3, 0xD8, 0x98,
1167 0x76, 0x4F, 0xEF, 0x28, 0x7D, 0xDE, 0xCC, 0xCA,
1168 0x3E, 0x6E, 0x59, 0x98, 0x41, 0x1E, 0x7D, 0xDB,
1169 0x32, 0xF6, 0x75, 0x38, 0xF5, 0x00, 0xB1, 0x8C,
1170 0x8C, 0x97, 0xC4, 0x52, 0xC3, 0x70, 0xEA, 0x2C,
1171 0xF0, 0xAF, 0xCA, 0x3E, 0x05, 0xDE, 0x7E, 0x4D,
1172 0xE2, 0x7F, 0xA4, 0x41, 0xA9, 0xCB, 0x34, 0xFD,
1173 0x17, 0xC9, 0x78, 0xB4, 0x2D, 0x5B, 0x7E, 0x7F,
1174 0x9A, 0xB1, 0x8F, 0xFE, 0xFF, 0xC3, 0xC5, 0xAC,
1175 0x2F, 0x3A, 0x45, 0x5E, 0xEB, 0xFD, 0xC7, 0x6C,
1176 0xEA, 0xEB, 0x0A, 0x2C, 0xCA, 0x22, 0xEE, 0xF6,
1177 0xE6, 0x37, 0xF4, 0xCA, 0xBE, 0x5C, 0x51, 0xDE,
1178 0xD2, 0xE3, 0xFA, 0xD8, 0xB9, 0x52, 0x70, 0xA3,
1179 0x21, 0x84, 0x56, 0x64, 0xF1, 0x07, 0xD1, 0x64,
1180 0x96, 0xBB, 0x7A, 0xBF, 0xBE, 0x75, 0x04, 0xB6,
1181 0xED, 0xE2, 0xE8, 0x9E, 0x4B, 0x99, 0x6F, 0xB5,
1182 0x8E, 0xFD, 0xC4, 0x18, 0x1F, 0x91, 0x63, 0x38,
1183 0x1C, 0xBE, 0x7B, 0xC0, 0x06, 0xA7, 0xA2, 0x05,
1184 0x98, 0x9C, 0x52, 0x6C, 0xD1, 0xBD, 0x68, 0x98,
1185 0x36, 0x93, 0xB4, 0xBD, 0xC5, 0x37, 0x28, 0xB2,
1186 0x41, 0xC1, 0xCF, 0xF4, 0x2B, 0xB6, 0x11, 0x50,
1187 0x2C, 0x35, 0x20, 0x5C, 0xAB, 0xB2, 0x88, 0x75,
1188 0x56, 0x55, 0xD6, 0x20, 0xC6, 0x79, 0x94, 0xF0,
1189 0x64, 0x51, 0x18, 0x7F, 0x6F, 0xD1, 0x7E, 0x04,
1190 0x66, 0x82, 0xBA, 0x12, 0x86, 0x06, 0x3F, 0xF8,
1191 0x8F, 0xE2, 0x50, 0x8D, 0x1F, 0xCA, 0xF9, 0x03,
1192 0x5A, 0x12, 0x31, 0xAD, 0x41, 0x50, 0xA9, 0xC9,
1193 0xB2, 0x4C, 0x9B, 0x2D, 0x66, 0xB2, 0xAD, 0x1B,
1194 0xDE, 0x0B, 0xD0, 0xBB, 0xCB, 0x8B, 0xE0, 0x5B,
1195 0x83, 0x52, 0x29, 0xEF, 0x79, 0x19, 0x73, 0x73,
1196 0x23, 0x42, 0x44, 0x01, 0xE1, 0xD8, 0x37, 0xB6,
1197 0x6E, 0xB4, 0xE6, 0x30, 0xFF, 0x1D, 0xE7, 0x0C,
1198 0xB3, 0x17, 0xC2, 0xBA, 0xCB, 0x08, 0x00, 0x1D,
1199 0x34, 0x77, 0xB7, 0xA7, 0x0A, 0x57, 0x6D, 0x20,
1200 0x86, 0x90, 0x33, 0x58, 0x9D, 0x85, 0xA0, 0x1D,
1201 0xDB, 0x2B, 0x66, 0x46, 0xC0, 0x43, 0xB5, 0x9F,
1202 0xC0, 0x11, 0x31, 0x1D, 0xA6, 0x66, 0xFA, 0x5A,
1203 0xD1, 0xD6, 0x38, 0x7F, 0xA9, 0xBC, 0x40, 0x15,
1204 0xA3, 0x8A, 0x51, 0xD1, 0xDA, 0x1E, 0xA6, 0x1D,
1205 0x64, 0x8D, 0xC8, 0xE3, 0x9A, 0x88, 0xB9, 0xD6,
1206 0x22, 0xBD, 0xE2, 0x07, 0xFD, 0xAB, 0xC6, 0xF2,
1207 0x82, 0x7A, 0x88, 0x0C, 0x33, 0x0B, 0xBF, 0x6D,
1208 0xF7, 0x33, 0x77, 0x4B, 0x65, 0x3E, 0x57, 0x30,
1209 0x5D, 0x78, 0xDC, 0xE1, 0x12, 0xF1, 0x0A, 0x2C,
1210 0x71, 0xF4, 0xCD, 0xAD, 0x92, 0xED, 0x11, 0x3E,
1211 0x1C, 0xEA, 0x63, 0xB9, 0x19, 0x25, 0xED, 0x28,
1212 0x19, 0x1E, 0x6D, 0xBB, 0xB5, 0xAA, 0x5A, 0x2A,
1213 0xFD, 0xA5, 0x1F, 0xC0, 0x5A, 0x3A, 0xF5, 0x25,
1214 0x8B, 0x87, 0x66, 0x52, 0x43, 0x55, 0x0F, 0x28,
1215 0x94, 0x8A, 0xE2, 0xB8, 0xBE, 0xB6, 0xBC, 0x9C,
1216 0x77, 0x0B, 0x35, 0xF0, 0x67, 0xEA, 0xA6, 0x41,
1217 0xEF, 0xE6, 0x5B, 0x1A, 0x44, 0x90, 0x9D, 0x1B,
1218 0x14, 0x9F, 0x97, 0xEE, 0xA6, 0x01, 0x39, 0x1C,
1219 0x60, 0x9E, 0xC8, 0x1D, 0x19, 0x30, 0xF5, 0x7C,
1220 0x18, 0xA4, 0xE0, 0xFA, 0xB4, 0x91, 0xD1, 0xCA,
1221 0xDF, 0xD5, 0x04, 0x83, 0x44, 0x9E, 0xDC, 0x0F,
1222 0x07, 0xFF, 0xB2, 0x4D, 0x2C, 0x6F, 0x9A, 0x9A,
1223 0x3B, 0xFF, 0x39, 0xAE, 0x3D, 0x57, 0xF5, 0x60,
1224 0x65, 0x4D, 0x7D, 0x75, 0xC9, 0x08, 0xAB, 0xE6,
1225 0x25, 0x64, 0x75, 0x3E, 0xAC, 0x39, 0xD7, 0x50,
1226 0x3D, 0xA6, 0xD3, 0x7C, 0x2E, 0x32, 0xE1, 0xAF,
1227 0x3B, 0x8A, 0xEC, 0x8A, 0xE3, 0x06, 0x9C, 0xD9
1228 };
1229
1230 test[167] = '\x80';
1231 SHA3_sponge(test, sizeof(test), out, sizeof(out), sizeof(test));
1232
1233 /*
1234 * Rationale behind keeping output [formatted as below] is that
1235 * one should be able to redirect it to a file, then copy-n-paste
1236 * final "output val" from official example to another file, and
1237 * compare the two with diff(1).
1238 */
1239 for (i = 0; i < sizeof(out);) {
1240 printf("%02X", out[i]);
1241 printf(++i % 16 && i != sizeof(out) ? " " : "\n");
1242 }
1243
1244 if (memcmp(out,result,sizeof(out))) {
1245 fprintf(stderr,"failure\n");
1246 return 1;
1247 } else {
1248 fprintf(stderr,"success\n");
1249 return 0;
1250 }
1251}
1252#endif
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette