1 | /*
|
---|
2 | * Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
|
---|
3 | * Copyright (c) 2014, Intel Corporation. All Rights Reserved.
|
---|
4 | * Copyright (c) 2015, CloudFlare, Inc.
|
---|
5 | *
|
---|
6 | * Licensed under the OpenSSL license (the "License"). You may not use
|
---|
7 | * this file except in compliance with the License. You can obtain a copy
|
---|
8 | * in the file LICENSE in the source distribution or at
|
---|
9 | * https://www.openssl.org/source/license.html
|
---|
10 | *
|
---|
11 | * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1, 3)
|
---|
12 | * (1) Intel Corporation, Israel Development Center, Haifa, Israel
|
---|
13 | * (2) University of Haifa, Israel
|
---|
14 | * (3) CloudFlare, Inc.
|
---|
15 | *
|
---|
16 | * Reference:
|
---|
17 | * S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with
|
---|
18 | * 256 Bit Primes"
|
---|
19 | */
|
---|
20 |
|
---|
21 | #include <string.h>
|
---|
22 |
|
---|
23 | #include "internal/cryptlib.h"
|
---|
24 | #include "crypto/bn.h"
|
---|
25 | #include "ec_local.h"
|
---|
26 | #include "internal/refcount.h"
|
---|
27 |
|
---|
28 | #if BN_BITS2 != 64
|
---|
29 | # define TOBN(hi,lo) lo,hi
|
---|
30 | #else
|
---|
31 | # define TOBN(hi,lo) ((BN_ULONG)hi<<32|lo)
|
---|
32 | #endif
|
---|
33 |
|
---|
34 | #if defined(__GNUC__)
|
---|
35 | # define ALIGN32 __attribute((aligned(32)))
|
---|
36 | #elif defined(_MSC_VER)
|
---|
37 | # define ALIGN32 __declspec(align(32))
|
---|
38 | #else
|
---|
39 | # define ALIGN32
|
---|
40 | #endif
|
---|
41 |
|
---|
42 | #define ALIGNPTR(p,N) ((unsigned char *)p+N-(size_t)p%N)
|
---|
43 | #define P256_LIMBS (256/BN_BITS2)
|
---|
44 |
|
---|
45 | typedef unsigned short u16;
|
---|
46 |
|
---|
47 | typedef struct {
|
---|
48 | BN_ULONG X[P256_LIMBS];
|
---|
49 | BN_ULONG Y[P256_LIMBS];
|
---|
50 | BN_ULONG Z[P256_LIMBS];
|
---|
51 | } P256_POINT;
|
---|
52 |
|
---|
53 | typedef struct {
|
---|
54 | BN_ULONG X[P256_LIMBS];
|
---|
55 | BN_ULONG Y[P256_LIMBS];
|
---|
56 | } P256_POINT_AFFINE;
|
---|
57 |
|
---|
58 | typedef P256_POINT_AFFINE PRECOMP256_ROW[64];
|
---|
59 |
|
---|
60 | /* structure for precomputed multiples of the generator */
|
---|
61 | struct nistz256_pre_comp_st {
|
---|
62 | const EC_GROUP *group; /* Parent EC_GROUP object */
|
---|
63 | size_t w; /* Window size */
|
---|
64 | /*
|
---|
65 | * Constant time access to the X and Y coordinates of the pre-computed,
|
---|
66 | * generator multiplies, in the Montgomery domain. Pre-calculated
|
---|
67 | * multiplies are stored in affine form.
|
---|
68 | */
|
---|
69 | PRECOMP256_ROW *precomp;
|
---|
70 | void *precomp_storage;
|
---|
71 | CRYPTO_REF_COUNT references;
|
---|
72 | CRYPTO_RWLOCK *lock;
|
---|
73 | };
|
---|
74 |
|
---|
75 | /* Functions implemented in assembly */
|
---|
76 | /*
|
---|
77 | * Most of below mentioned functions *preserve* the property of inputs
|
---|
78 | * being fully reduced, i.e. being in [0, modulus) range. Simply put if
|
---|
79 | * inputs are fully reduced, then output is too. Note that reverse is
|
---|
80 | * not true, in sense that given partially reduced inputs output can be
|
---|
81 | * either, not unlikely reduced. And "most" in first sentence refers to
|
---|
82 | * the fact that given the calculations flow one can tolerate that
|
---|
83 | * addition, 1st function below, produces partially reduced result *if*
|
---|
84 | * multiplications by 2 and 3, which customarily use addition, fully
|
---|
85 | * reduce it. This effectively gives two options: a) addition produces
|
---|
86 | * fully reduced result [as long as inputs are, just like remaining
|
---|
87 | * functions]; b) addition is allowed to produce partially reduced
|
---|
88 | * result, but multiplications by 2 and 3 perform additional reduction
|
---|
89 | * step. Choice between the two can be platform-specific, but it was a)
|
---|
90 | * in all cases so far...
|
---|
91 | */
|
---|
92 | /* Modular add: res = a+b mod P */
|
---|
93 | void ecp_nistz256_add(BN_ULONG res[P256_LIMBS],
|
---|
94 | const BN_ULONG a[P256_LIMBS],
|
---|
95 | const BN_ULONG b[P256_LIMBS]);
|
---|
96 | /* Modular mul by 2: res = 2*a mod P */
|
---|
97 | void ecp_nistz256_mul_by_2(BN_ULONG res[P256_LIMBS],
|
---|
98 | const BN_ULONG a[P256_LIMBS]);
|
---|
99 | /* Modular mul by 3: res = 3*a mod P */
|
---|
100 | void ecp_nistz256_mul_by_3(BN_ULONG res[P256_LIMBS],
|
---|
101 | const BN_ULONG a[P256_LIMBS]);
|
---|
102 |
|
---|
103 | /* Modular div by 2: res = a/2 mod P */
|
---|
104 | void ecp_nistz256_div_by_2(BN_ULONG res[P256_LIMBS],
|
---|
105 | const BN_ULONG a[P256_LIMBS]);
|
---|
106 | /* Modular sub: res = a-b mod P */
|
---|
107 | void ecp_nistz256_sub(BN_ULONG res[P256_LIMBS],
|
---|
108 | const BN_ULONG a[P256_LIMBS],
|
---|
109 | const BN_ULONG b[P256_LIMBS]);
|
---|
110 | /* Modular neg: res = -a mod P */
|
---|
111 | void ecp_nistz256_neg(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS]);
|
---|
112 | /* Montgomery mul: res = a*b*2^-256 mod P */
|
---|
113 | void ecp_nistz256_mul_mont(BN_ULONG res[P256_LIMBS],
|
---|
114 | const BN_ULONG a[P256_LIMBS],
|
---|
115 | const BN_ULONG b[P256_LIMBS]);
|
---|
116 | /* Montgomery sqr: res = a*a*2^-256 mod P */
|
---|
117 | void ecp_nistz256_sqr_mont(BN_ULONG res[P256_LIMBS],
|
---|
118 | const BN_ULONG a[P256_LIMBS]);
|
---|
119 | /* Convert a number from Montgomery domain, by multiplying with 1 */
|
---|
120 | void ecp_nistz256_from_mont(BN_ULONG res[P256_LIMBS],
|
---|
121 | const BN_ULONG in[P256_LIMBS]);
|
---|
122 | /* Convert a number to Montgomery domain, by multiplying with 2^512 mod P*/
|
---|
123 | void ecp_nistz256_to_mont(BN_ULONG res[P256_LIMBS],
|
---|
124 | const BN_ULONG in[P256_LIMBS]);
|
---|
125 | /* Functions that perform constant time access to the precomputed tables */
|
---|
126 | void ecp_nistz256_scatter_w5(P256_POINT *val,
|
---|
127 | const P256_POINT *in_t, int idx);
|
---|
128 | void ecp_nistz256_gather_w5(P256_POINT *val,
|
---|
129 | const P256_POINT *in_t, int idx);
|
---|
130 | void ecp_nistz256_scatter_w7(P256_POINT_AFFINE *val,
|
---|
131 | const P256_POINT_AFFINE *in_t, int idx);
|
---|
132 | void ecp_nistz256_gather_w7(P256_POINT_AFFINE *val,
|
---|
133 | const P256_POINT_AFFINE *in_t, int idx);
|
---|
134 |
|
---|
135 | /* One converted into the Montgomery domain */
|
---|
136 | static const BN_ULONG ONE[P256_LIMBS] = {
|
---|
137 | TOBN(0x00000000, 0x00000001), TOBN(0xffffffff, 0x00000000),
|
---|
138 | TOBN(0xffffffff, 0xffffffff), TOBN(0x00000000, 0xfffffffe)
|
---|
139 | };
|
---|
140 |
|
---|
141 | static NISTZ256_PRE_COMP *ecp_nistz256_pre_comp_new(const EC_GROUP *group);
|
---|
142 |
|
---|
143 | /* Precomputed tables for the default generator */
|
---|
144 | extern const PRECOMP256_ROW ecp_nistz256_precomputed[37];
|
---|
145 |
|
---|
146 | /* Recode window to a signed digit, see ecp_nistputil.c for details */
|
---|
147 | static unsigned int _booth_recode_w5(unsigned int in)
|
---|
148 | {
|
---|
149 | unsigned int s, d;
|
---|
150 |
|
---|
151 | s = ~((in >> 5) - 1);
|
---|
152 | d = (1 << 6) - in - 1;
|
---|
153 | d = (d & s) | (in & ~s);
|
---|
154 | d = (d >> 1) + (d & 1);
|
---|
155 |
|
---|
156 | return (d << 1) + (s & 1);
|
---|
157 | }
|
---|
158 |
|
---|
159 | static unsigned int _booth_recode_w7(unsigned int in)
|
---|
160 | {
|
---|
161 | unsigned int s, d;
|
---|
162 |
|
---|
163 | s = ~((in >> 7) - 1);
|
---|
164 | d = (1 << 8) - in - 1;
|
---|
165 | d = (d & s) | (in & ~s);
|
---|
166 | d = (d >> 1) + (d & 1);
|
---|
167 |
|
---|
168 | return (d << 1) + (s & 1);
|
---|
169 | }
|
---|
170 |
|
---|
171 | static void copy_conditional(BN_ULONG dst[P256_LIMBS],
|
---|
172 | const BN_ULONG src[P256_LIMBS], BN_ULONG move)
|
---|
173 | {
|
---|
174 | BN_ULONG mask1 = 0-move;
|
---|
175 | BN_ULONG mask2 = ~mask1;
|
---|
176 |
|
---|
177 | dst[0] = (src[0] & mask1) ^ (dst[0] & mask2);
|
---|
178 | dst[1] = (src[1] & mask1) ^ (dst[1] & mask2);
|
---|
179 | dst[2] = (src[2] & mask1) ^ (dst[2] & mask2);
|
---|
180 | dst[3] = (src[3] & mask1) ^ (dst[3] & mask2);
|
---|
181 | if (P256_LIMBS == 8) {
|
---|
182 | dst[4] = (src[4] & mask1) ^ (dst[4] & mask2);
|
---|
183 | dst[5] = (src[5] & mask1) ^ (dst[5] & mask2);
|
---|
184 | dst[6] = (src[6] & mask1) ^ (dst[6] & mask2);
|
---|
185 | dst[7] = (src[7] & mask1) ^ (dst[7] & mask2);
|
---|
186 | }
|
---|
187 | }
|
---|
188 |
|
---|
189 | static BN_ULONG is_zero(BN_ULONG in)
|
---|
190 | {
|
---|
191 | in |= (0 - in);
|
---|
192 | in = ~in;
|
---|
193 | in >>= BN_BITS2 - 1;
|
---|
194 | return in;
|
---|
195 | }
|
---|
196 |
|
---|
197 | static BN_ULONG is_equal(const BN_ULONG a[P256_LIMBS],
|
---|
198 | const BN_ULONG b[P256_LIMBS])
|
---|
199 | {
|
---|
200 | BN_ULONG res;
|
---|
201 |
|
---|
202 | res = a[0] ^ b[0];
|
---|
203 | res |= a[1] ^ b[1];
|
---|
204 | res |= a[2] ^ b[2];
|
---|
205 | res |= a[3] ^ b[3];
|
---|
206 | if (P256_LIMBS == 8) {
|
---|
207 | res |= a[4] ^ b[4];
|
---|
208 | res |= a[5] ^ b[5];
|
---|
209 | res |= a[6] ^ b[6];
|
---|
210 | res |= a[7] ^ b[7];
|
---|
211 | }
|
---|
212 |
|
---|
213 | return is_zero(res);
|
---|
214 | }
|
---|
215 |
|
---|
216 | static BN_ULONG is_one(const BIGNUM *z)
|
---|
217 | {
|
---|
218 | BN_ULONG res = 0;
|
---|
219 | BN_ULONG *a = bn_get_words(z);
|
---|
220 |
|
---|
221 | if (bn_get_top(z) == (P256_LIMBS - P256_LIMBS / 8)) {
|
---|
222 | res = a[0] ^ ONE[0];
|
---|
223 | res |= a[1] ^ ONE[1];
|
---|
224 | res |= a[2] ^ ONE[2];
|
---|
225 | res |= a[3] ^ ONE[3];
|
---|
226 | if (P256_LIMBS == 8) {
|
---|
227 | res |= a[4] ^ ONE[4];
|
---|
228 | res |= a[5] ^ ONE[5];
|
---|
229 | res |= a[6] ^ ONE[6];
|
---|
230 | /*
|
---|
231 | * no check for a[7] (being zero) on 32-bit platforms,
|
---|
232 | * because value of "one" takes only 7 limbs.
|
---|
233 | */
|
---|
234 | }
|
---|
235 | res = is_zero(res);
|
---|
236 | }
|
---|
237 |
|
---|
238 | return res;
|
---|
239 | }
|
---|
240 |
|
---|
241 | /*
|
---|
242 | * For reference, this macro is used only when new ecp_nistz256 assembly
|
---|
243 | * module is being developed. For example, configure with
|
---|
244 | * -DECP_NISTZ256_REFERENCE_IMPLEMENTATION and implement only functions
|
---|
245 | * performing simplest arithmetic operations on 256-bit vectors. Then
|
---|
246 | * work on implementation of higher-level functions performing point
|
---|
247 | * operations. Then remove ECP_NISTZ256_REFERENCE_IMPLEMENTATION
|
---|
248 | * and never define it again. (The correct macro denoting presence of
|
---|
249 | * ecp_nistz256 module is ECP_NISTZ256_ASM.)
|
---|
250 | */
|
---|
251 | #ifndef ECP_NISTZ256_REFERENCE_IMPLEMENTATION
|
---|
252 | void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a);
|
---|
253 | void ecp_nistz256_point_add(P256_POINT *r,
|
---|
254 | const P256_POINT *a, const P256_POINT *b);
|
---|
255 | void ecp_nistz256_point_add_affine(P256_POINT *r,
|
---|
256 | const P256_POINT *a,
|
---|
257 | const P256_POINT_AFFINE *b);
|
---|
258 | #else
|
---|
259 | /* Point double: r = 2*a */
|
---|
260 | static void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a)
|
---|
261 | {
|
---|
262 | BN_ULONG S[P256_LIMBS];
|
---|
263 | BN_ULONG M[P256_LIMBS];
|
---|
264 | BN_ULONG Zsqr[P256_LIMBS];
|
---|
265 | BN_ULONG tmp0[P256_LIMBS];
|
---|
266 |
|
---|
267 | const BN_ULONG *in_x = a->X;
|
---|
268 | const BN_ULONG *in_y = a->Y;
|
---|
269 | const BN_ULONG *in_z = a->Z;
|
---|
270 |
|
---|
271 | BN_ULONG *res_x = r->X;
|
---|
272 | BN_ULONG *res_y = r->Y;
|
---|
273 | BN_ULONG *res_z = r->Z;
|
---|
274 |
|
---|
275 | ecp_nistz256_mul_by_2(S, in_y);
|
---|
276 |
|
---|
277 | ecp_nistz256_sqr_mont(Zsqr, in_z);
|
---|
278 |
|
---|
279 | ecp_nistz256_sqr_mont(S, S);
|
---|
280 |
|
---|
281 | ecp_nistz256_mul_mont(res_z, in_z, in_y);
|
---|
282 | ecp_nistz256_mul_by_2(res_z, res_z);
|
---|
283 |
|
---|
284 | ecp_nistz256_add(M, in_x, Zsqr);
|
---|
285 | ecp_nistz256_sub(Zsqr, in_x, Zsqr);
|
---|
286 |
|
---|
287 | ecp_nistz256_sqr_mont(res_y, S);
|
---|
288 | ecp_nistz256_div_by_2(res_y, res_y);
|
---|
289 |
|
---|
290 | ecp_nistz256_mul_mont(M, M, Zsqr);
|
---|
291 | ecp_nistz256_mul_by_3(M, M);
|
---|
292 |
|
---|
293 | ecp_nistz256_mul_mont(S, S, in_x);
|
---|
294 | ecp_nistz256_mul_by_2(tmp0, S);
|
---|
295 |
|
---|
296 | ecp_nistz256_sqr_mont(res_x, M);
|
---|
297 |
|
---|
298 | ecp_nistz256_sub(res_x, res_x, tmp0);
|
---|
299 | ecp_nistz256_sub(S, S, res_x);
|
---|
300 |
|
---|
301 | ecp_nistz256_mul_mont(S, S, M);
|
---|
302 | ecp_nistz256_sub(res_y, S, res_y);
|
---|
303 | }
|
---|
304 |
|
---|
305 | /* Point addition: r = a+b */
|
---|
306 | static void ecp_nistz256_point_add(P256_POINT *r,
|
---|
307 | const P256_POINT *a, const P256_POINT *b)
|
---|
308 | {
|
---|
309 | BN_ULONG U2[P256_LIMBS], S2[P256_LIMBS];
|
---|
310 | BN_ULONG U1[P256_LIMBS], S1[P256_LIMBS];
|
---|
311 | BN_ULONG Z1sqr[P256_LIMBS];
|
---|
312 | BN_ULONG Z2sqr[P256_LIMBS];
|
---|
313 | BN_ULONG H[P256_LIMBS], R[P256_LIMBS];
|
---|
314 | BN_ULONG Hsqr[P256_LIMBS];
|
---|
315 | BN_ULONG Rsqr[P256_LIMBS];
|
---|
316 | BN_ULONG Hcub[P256_LIMBS];
|
---|
317 |
|
---|
318 | BN_ULONG res_x[P256_LIMBS];
|
---|
319 | BN_ULONG res_y[P256_LIMBS];
|
---|
320 | BN_ULONG res_z[P256_LIMBS];
|
---|
321 |
|
---|
322 | BN_ULONG in1infty, in2infty;
|
---|
323 |
|
---|
324 | const BN_ULONG *in1_x = a->X;
|
---|
325 | const BN_ULONG *in1_y = a->Y;
|
---|
326 | const BN_ULONG *in1_z = a->Z;
|
---|
327 |
|
---|
328 | const BN_ULONG *in2_x = b->X;
|
---|
329 | const BN_ULONG *in2_y = b->Y;
|
---|
330 | const BN_ULONG *in2_z = b->Z;
|
---|
331 |
|
---|
332 | /*
|
---|
333 | * Infinity in encoded as (,,0)
|
---|
334 | */
|
---|
335 | in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]);
|
---|
336 | if (P256_LIMBS == 8)
|
---|
337 | in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]);
|
---|
338 |
|
---|
339 | in2infty = (in2_z[0] | in2_z[1] | in2_z[2] | in2_z[3]);
|
---|
340 | if (P256_LIMBS == 8)
|
---|
341 | in2infty |= (in2_z[4] | in2_z[5] | in2_z[6] | in2_z[7]);
|
---|
342 |
|
---|
343 | in1infty = is_zero(in1infty);
|
---|
344 | in2infty = is_zero(in2infty);
|
---|
345 |
|
---|
346 | ecp_nistz256_sqr_mont(Z2sqr, in2_z); /* Z2^2 */
|
---|
347 | ecp_nistz256_sqr_mont(Z1sqr, in1_z); /* Z1^2 */
|
---|
348 |
|
---|
349 | ecp_nistz256_mul_mont(S1, Z2sqr, in2_z); /* S1 = Z2^3 */
|
---|
350 | ecp_nistz256_mul_mont(S2, Z1sqr, in1_z); /* S2 = Z1^3 */
|
---|
351 |
|
---|
352 | ecp_nistz256_mul_mont(S1, S1, in1_y); /* S1 = Y1*Z2^3 */
|
---|
353 | ecp_nistz256_mul_mont(S2, S2, in2_y); /* S2 = Y2*Z1^3 */
|
---|
354 | ecp_nistz256_sub(R, S2, S1); /* R = S2 - S1 */
|
---|
355 |
|
---|
356 | ecp_nistz256_mul_mont(U1, in1_x, Z2sqr); /* U1 = X1*Z2^2 */
|
---|
357 | ecp_nistz256_mul_mont(U2, in2_x, Z1sqr); /* U2 = X2*Z1^2 */
|
---|
358 | ecp_nistz256_sub(H, U2, U1); /* H = U2 - U1 */
|
---|
359 |
|
---|
360 | /*
|
---|
361 | * The formulae are incorrect if the points are equal so we check for
|
---|
362 | * this and do doubling if this happens.
|
---|
363 | *
|
---|
364 | * Points here are in Jacobian projective coordinates (Xi, Yi, Zi)
|
---|
365 | * that are bound to the affine coordinates (xi, yi) by the following
|
---|
366 | * equations:
|
---|
367 | * - xi = Xi / (Zi)^2
|
---|
368 | * - y1 = Yi / (Zi)^3
|
---|
369 | *
|
---|
370 | * For the sake of optimization, the algorithm operates over
|
---|
371 | * intermediate variables U1, U2 and S1, S2 that are derived from
|
---|
372 | * the projective coordinates:
|
---|
373 | * - U1 = X1 * (Z2)^2 ; U2 = X2 * (Z1)^2
|
---|
374 | * - S1 = Y1 * (Z2)^3 ; S2 = Y2 * (Z1)^3
|
---|
375 | *
|
---|
376 | * It is easy to prove that is_equal(U1, U2) implies that the affine
|
---|
377 | * x-coordinates are equal, or either point is at infinity.
|
---|
378 | * Likewise is_equal(S1, S2) implies that the affine y-coordinates are
|
---|
379 | * equal, or either point is at infinity.
|
---|
380 | *
|
---|
381 | * The special case of either point being the point at infinity (Z1 or Z2
|
---|
382 | * is zero), is handled separately later on in this function, so we avoid
|
---|
383 | * jumping to point_double here in those special cases.
|
---|
384 | *
|
---|
385 | * When both points are inverse of each other, we know that the affine
|
---|
386 | * x-coordinates are equal, and the y-coordinates have different sign.
|
---|
387 | * Therefore since U1 = U2, we know H = 0, and therefore Z3 = H*Z1*Z2
|
---|
388 | * will equal 0, thus the result is infinity, if we simply let this
|
---|
389 | * function continue normally.
|
---|
390 | *
|
---|
391 | * We use bitwise operations to avoid potential side-channels introduced by
|
---|
392 | * the short-circuiting behaviour of boolean operators.
|
---|
393 | */
|
---|
394 | if (is_equal(U1, U2) & ~in1infty & ~in2infty & is_equal(S1, S2)) {
|
---|
395 | /*
|
---|
396 | * This is obviously not constant-time but it should never happen during
|
---|
397 | * single point multiplication, so there is no timing leak for ECDH or
|
---|
398 | * ECDSA signing.
|
---|
399 | */
|
---|
400 | ecp_nistz256_point_double(r, a);
|
---|
401 | return;
|
---|
402 | }
|
---|
403 |
|
---|
404 | ecp_nistz256_sqr_mont(Rsqr, R); /* R^2 */
|
---|
405 | ecp_nistz256_mul_mont(res_z, H, in1_z); /* Z3 = H*Z1*Z2 */
|
---|
406 | ecp_nistz256_sqr_mont(Hsqr, H); /* H^2 */
|
---|
407 | ecp_nistz256_mul_mont(res_z, res_z, in2_z); /* Z3 = H*Z1*Z2 */
|
---|
408 | ecp_nistz256_mul_mont(Hcub, Hsqr, H); /* H^3 */
|
---|
409 |
|
---|
410 | ecp_nistz256_mul_mont(U2, U1, Hsqr); /* U1*H^2 */
|
---|
411 | ecp_nistz256_mul_by_2(Hsqr, U2); /* 2*U1*H^2 */
|
---|
412 |
|
---|
413 | ecp_nistz256_sub(res_x, Rsqr, Hsqr);
|
---|
414 | ecp_nistz256_sub(res_x, res_x, Hcub);
|
---|
415 |
|
---|
416 | ecp_nistz256_sub(res_y, U2, res_x);
|
---|
417 |
|
---|
418 | ecp_nistz256_mul_mont(S2, S1, Hcub);
|
---|
419 | ecp_nistz256_mul_mont(res_y, R, res_y);
|
---|
420 | ecp_nistz256_sub(res_y, res_y, S2);
|
---|
421 |
|
---|
422 | copy_conditional(res_x, in2_x, in1infty);
|
---|
423 | copy_conditional(res_y, in2_y, in1infty);
|
---|
424 | copy_conditional(res_z, in2_z, in1infty);
|
---|
425 |
|
---|
426 | copy_conditional(res_x, in1_x, in2infty);
|
---|
427 | copy_conditional(res_y, in1_y, in2infty);
|
---|
428 | copy_conditional(res_z, in1_z, in2infty);
|
---|
429 |
|
---|
430 | memcpy(r->X, res_x, sizeof(res_x));
|
---|
431 | memcpy(r->Y, res_y, sizeof(res_y));
|
---|
432 | memcpy(r->Z, res_z, sizeof(res_z));
|
---|
433 | }
|
---|
434 |
|
---|
435 | /* Point addition when b is known to be affine: r = a+b */
|
---|
436 | static void ecp_nistz256_point_add_affine(P256_POINT *r,
|
---|
437 | const P256_POINT *a,
|
---|
438 | const P256_POINT_AFFINE *b)
|
---|
439 | {
|
---|
440 | BN_ULONG U2[P256_LIMBS], S2[P256_LIMBS];
|
---|
441 | BN_ULONG Z1sqr[P256_LIMBS];
|
---|
442 | BN_ULONG H[P256_LIMBS], R[P256_LIMBS];
|
---|
443 | BN_ULONG Hsqr[P256_LIMBS];
|
---|
444 | BN_ULONG Rsqr[P256_LIMBS];
|
---|
445 | BN_ULONG Hcub[P256_LIMBS];
|
---|
446 |
|
---|
447 | BN_ULONG res_x[P256_LIMBS];
|
---|
448 | BN_ULONG res_y[P256_LIMBS];
|
---|
449 | BN_ULONG res_z[P256_LIMBS];
|
---|
450 |
|
---|
451 | BN_ULONG in1infty, in2infty;
|
---|
452 |
|
---|
453 | const BN_ULONG *in1_x = a->X;
|
---|
454 | const BN_ULONG *in1_y = a->Y;
|
---|
455 | const BN_ULONG *in1_z = a->Z;
|
---|
456 |
|
---|
457 | const BN_ULONG *in2_x = b->X;
|
---|
458 | const BN_ULONG *in2_y = b->Y;
|
---|
459 |
|
---|
460 | /*
|
---|
461 | * Infinity in encoded as (,,0)
|
---|
462 | */
|
---|
463 | in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]);
|
---|
464 | if (P256_LIMBS == 8)
|
---|
465 | in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]);
|
---|
466 |
|
---|
467 | /*
|
---|
468 | * In affine representation we encode infinity as (0,0), which is
|
---|
469 | * not on the curve, so it is OK
|
---|
470 | */
|
---|
471 | in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] |
|
---|
472 | in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]);
|
---|
473 | if (P256_LIMBS == 8)
|
---|
474 | in2infty |= (in2_x[4] | in2_x[5] | in2_x[6] | in2_x[7] |
|
---|
475 | in2_y[4] | in2_y[5] | in2_y[6] | in2_y[7]);
|
---|
476 |
|
---|
477 | in1infty = is_zero(in1infty);
|
---|
478 | in2infty = is_zero(in2infty);
|
---|
479 |
|
---|
480 | ecp_nistz256_sqr_mont(Z1sqr, in1_z); /* Z1^2 */
|
---|
481 |
|
---|
482 | ecp_nistz256_mul_mont(U2, in2_x, Z1sqr); /* U2 = X2*Z1^2 */
|
---|
483 | ecp_nistz256_sub(H, U2, in1_x); /* H = U2 - U1 */
|
---|
484 |
|
---|
485 | ecp_nistz256_mul_mont(S2, Z1sqr, in1_z); /* S2 = Z1^3 */
|
---|
486 |
|
---|
487 | ecp_nistz256_mul_mont(res_z, H, in1_z); /* Z3 = H*Z1*Z2 */
|
---|
488 |
|
---|
489 | ecp_nistz256_mul_mont(S2, S2, in2_y); /* S2 = Y2*Z1^3 */
|
---|
490 | ecp_nistz256_sub(R, S2, in1_y); /* R = S2 - S1 */
|
---|
491 |
|
---|
492 | ecp_nistz256_sqr_mont(Hsqr, H); /* H^2 */
|
---|
493 | ecp_nistz256_sqr_mont(Rsqr, R); /* R^2 */
|
---|
494 | ecp_nistz256_mul_mont(Hcub, Hsqr, H); /* H^3 */
|
---|
495 |
|
---|
496 | ecp_nistz256_mul_mont(U2, in1_x, Hsqr); /* U1*H^2 */
|
---|
497 | ecp_nistz256_mul_by_2(Hsqr, U2); /* 2*U1*H^2 */
|
---|
498 |
|
---|
499 | ecp_nistz256_sub(res_x, Rsqr, Hsqr);
|
---|
500 | ecp_nistz256_sub(res_x, res_x, Hcub);
|
---|
501 | ecp_nistz256_sub(H, U2, res_x);
|
---|
502 |
|
---|
503 | ecp_nistz256_mul_mont(S2, in1_y, Hcub);
|
---|
504 | ecp_nistz256_mul_mont(H, H, R);
|
---|
505 | ecp_nistz256_sub(res_y, H, S2);
|
---|
506 |
|
---|
507 | copy_conditional(res_x, in2_x, in1infty);
|
---|
508 | copy_conditional(res_x, in1_x, in2infty);
|
---|
509 |
|
---|
510 | copy_conditional(res_y, in2_y, in1infty);
|
---|
511 | copy_conditional(res_y, in1_y, in2infty);
|
---|
512 |
|
---|
513 | copy_conditional(res_z, ONE, in1infty);
|
---|
514 | copy_conditional(res_z, in1_z, in2infty);
|
---|
515 |
|
---|
516 | memcpy(r->X, res_x, sizeof(res_x));
|
---|
517 | memcpy(r->Y, res_y, sizeof(res_y));
|
---|
518 | memcpy(r->Z, res_z, sizeof(res_z));
|
---|
519 | }
|
---|
520 | #endif
|
---|
521 |
|
---|
522 | /* r = in^-1 mod p */
|
---|
523 | static void ecp_nistz256_mod_inverse(BN_ULONG r[P256_LIMBS],
|
---|
524 | const BN_ULONG in[P256_LIMBS])
|
---|
525 | {
|
---|
526 | /*
|
---|
527 | * The poly is ffffffff 00000001 00000000 00000000 00000000 ffffffff
|
---|
528 | * ffffffff ffffffff We use FLT and used poly-2 as exponent
|
---|
529 | */
|
---|
530 | BN_ULONG p2[P256_LIMBS];
|
---|
531 | BN_ULONG p4[P256_LIMBS];
|
---|
532 | BN_ULONG p8[P256_LIMBS];
|
---|
533 | BN_ULONG p16[P256_LIMBS];
|
---|
534 | BN_ULONG p32[P256_LIMBS];
|
---|
535 | BN_ULONG res[P256_LIMBS];
|
---|
536 | int i;
|
---|
537 |
|
---|
538 | ecp_nistz256_sqr_mont(res, in);
|
---|
539 | ecp_nistz256_mul_mont(p2, res, in); /* 3*p */
|
---|
540 |
|
---|
541 | ecp_nistz256_sqr_mont(res, p2);
|
---|
542 | ecp_nistz256_sqr_mont(res, res);
|
---|
543 | ecp_nistz256_mul_mont(p4, res, p2); /* f*p */
|
---|
544 |
|
---|
545 | ecp_nistz256_sqr_mont(res, p4);
|
---|
546 | ecp_nistz256_sqr_mont(res, res);
|
---|
547 | ecp_nistz256_sqr_mont(res, res);
|
---|
548 | ecp_nistz256_sqr_mont(res, res);
|
---|
549 | ecp_nistz256_mul_mont(p8, res, p4); /* ff*p */
|
---|
550 |
|
---|
551 | ecp_nistz256_sqr_mont(res, p8);
|
---|
552 | for (i = 0; i < 7; i++)
|
---|
553 | ecp_nistz256_sqr_mont(res, res);
|
---|
554 | ecp_nistz256_mul_mont(p16, res, p8); /* ffff*p */
|
---|
555 |
|
---|
556 | ecp_nistz256_sqr_mont(res, p16);
|
---|
557 | for (i = 0; i < 15; i++)
|
---|
558 | ecp_nistz256_sqr_mont(res, res);
|
---|
559 | ecp_nistz256_mul_mont(p32, res, p16); /* ffffffff*p */
|
---|
560 |
|
---|
561 | ecp_nistz256_sqr_mont(res, p32);
|
---|
562 | for (i = 0; i < 31; i++)
|
---|
563 | ecp_nistz256_sqr_mont(res, res);
|
---|
564 | ecp_nistz256_mul_mont(res, res, in);
|
---|
565 |
|
---|
566 | for (i = 0; i < 32 * 4; i++)
|
---|
567 | ecp_nistz256_sqr_mont(res, res);
|
---|
568 | ecp_nistz256_mul_mont(res, res, p32);
|
---|
569 |
|
---|
570 | for (i = 0; i < 32; i++)
|
---|
571 | ecp_nistz256_sqr_mont(res, res);
|
---|
572 | ecp_nistz256_mul_mont(res, res, p32);
|
---|
573 |
|
---|
574 | for (i = 0; i < 16; i++)
|
---|
575 | ecp_nistz256_sqr_mont(res, res);
|
---|
576 | ecp_nistz256_mul_mont(res, res, p16);
|
---|
577 |
|
---|
578 | for (i = 0; i < 8; i++)
|
---|
579 | ecp_nistz256_sqr_mont(res, res);
|
---|
580 | ecp_nistz256_mul_mont(res, res, p8);
|
---|
581 |
|
---|
582 | ecp_nistz256_sqr_mont(res, res);
|
---|
583 | ecp_nistz256_sqr_mont(res, res);
|
---|
584 | ecp_nistz256_sqr_mont(res, res);
|
---|
585 | ecp_nistz256_sqr_mont(res, res);
|
---|
586 | ecp_nistz256_mul_mont(res, res, p4);
|
---|
587 |
|
---|
588 | ecp_nistz256_sqr_mont(res, res);
|
---|
589 | ecp_nistz256_sqr_mont(res, res);
|
---|
590 | ecp_nistz256_mul_mont(res, res, p2);
|
---|
591 |
|
---|
592 | ecp_nistz256_sqr_mont(res, res);
|
---|
593 | ecp_nistz256_sqr_mont(res, res);
|
---|
594 | ecp_nistz256_mul_mont(res, res, in);
|
---|
595 |
|
---|
596 | memcpy(r, res, sizeof(res));
|
---|
597 | }
|
---|
598 |
|
---|
599 | /*
|
---|
600 | * ecp_nistz256_bignum_to_field_elem copies the contents of |in| to |out| and
|
---|
601 | * returns one if it fits. Otherwise it returns zero.
|
---|
602 | */
|
---|
603 | __owur static int ecp_nistz256_bignum_to_field_elem(BN_ULONG out[P256_LIMBS],
|
---|
604 | const BIGNUM *in)
|
---|
605 | {
|
---|
606 | return bn_copy_words(out, in, P256_LIMBS);
|
---|
607 | }
|
---|
608 |
|
---|
609 | /* r = sum(scalar[i]*point[i]) */
|
---|
610 | __owur static int ecp_nistz256_windowed_mul(const EC_GROUP *group,
|
---|
611 | P256_POINT *r,
|
---|
612 | const BIGNUM **scalar,
|
---|
613 | const EC_POINT **point,
|
---|
614 | size_t num, BN_CTX *ctx)
|
---|
615 | {
|
---|
616 | size_t i;
|
---|
617 | int j, ret = 0;
|
---|
618 | unsigned int idx;
|
---|
619 | unsigned char (*p_str)[33] = NULL;
|
---|
620 | const unsigned int window_size = 5;
|
---|
621 | const unsigned int mask = (1 << (window_size + 1)) - 1;
|
---|
622 | unsigned int wvalue;
|
---|
623 | P256_POINT *temp; /* place for 5 temporary points */
|
---|
624 | const BIGNUM **scalars = NULL;
|
---|
625 | P256_POINT (*table)[16] = NULL;
|
---|
626 | void *table_storage = NULL;
|
---|
627 |
|
---|
628 | if ((num * 16 + 6) > OPENSSL_MALLOC_MAX_NELEMS(P256_POINT)
|
---|
629 | || (table_storage =
|
---|
630 | OPENSSL_malloc((num * 16 + 5) * sizeof(P256_POINT) + 64)) == NULL
|
---|
631 | || (p_str =
|
---|
632 | OPENSSL_malloc(num * 33 * sizeof(unsigned char))) == NULL
|
---|
633 | || (scalars = OPENSSL_malloc(num * sizeof(BIGNUM *))) == NULL) {
|
---|
634 | ECerr(EC_F_ECP_NISTZ256_WINDOWED_MUL, ERR_R_MALLOC_FAILURE);
|
---|
635 | goto err;
|
---|
636 | }
|
---|
637 |
|
---|
638 | table = (void *)ALIGNPTR(table_storage, 64);
|
---|
639 | temp = (P256_POINT *)(table + num);
|
---|
640 |
|
---|
641 | for (i = 0; i < num; i++) {
|
---|
642 | P256_POINT *row = table[i];
|
---|
643 |
|
---|
644 | /* This is an unusual input, we don't guarantee constant-timeness. */
|
---|
645 | if ((BN_num_bits(scalar[i]) > 256) || BN_is_negative(scalar[i])) {
|
---|
646 | BIGNUM *mod;
|
---|
647 |
|
---|
648 | if ((mod = BN_CTX_get(ctx)) == NULL)
|
---|
649 | goto err;
|
---|
650 | if (!BN_nnmod(mod, scalar[i], group->order, ctx)) {
|
---|
651 | ECerr(EC_F_ECP_NISTZ256_WINDOWED_MUL, ERR_R_BN_LIB);
|
---|
652 | goto err;
|
---|
653 | }
|
---|
654 | scalars[i] = mod;
|
---|
655 | } else
|
---|
656 | scalars[i] = scalar[i];
|
---|
657 |
|
---|
658 | for (j = 0; j < bn_get_top(scalars[i]) * BN_BYTES; j += BN_BYTES) {
|
---|
659 | BN_ULONG d = bn_get_words(scalars[i])[j / BN_BYTES];
|
---|
660 |
|
---|
661 | p_str[i][j + 0] = (unsigned char)d;
|
---|
662 | p_str[i][j + 1] = (unsigned char)(d >> 8);
|
---|
663 | p_str[i][j + 2] = (unsigned char)(d >> 16);
|
---|
664 | p_str[i][j + 3] = (unsigned char)(d >>= 24);
|
---|
665 | if (BN_BYTES == 8) {
|
---|
666 | d >>= 8;
|
---|
667 | p_str[i][j + 4] = (unsigned char)d;
|
---|
668 | p_str[i][j + 5] = (unsigned char)(d >> 8);
|
---|
669 | p_str[i][j + 6] = (unsigned char)(d >> 16);
|
---|
670 | p_str[i][j + 7] = (unsigned char)(d >> 24);
|
---|
671 | }
|
---|
672 | }
|
---|
673 | for (; j < 33; j++)
|
---|
674 | p_str[i][j] = 0;
|
---|
675 |
|
---|
676 | if (!ecp_nistz256_bignum_to_field_elem(temp[0].X, point[i]->X)
|
---|
677 | || !ecp_nistz256_bignum_to_field_elem(temp[0].Y, point[i]->Y)
|
---|
678 | || !ecp_nistz256_bignum_to_field_elem(temp[0].Z, point[i]->Z)) {
|
---|
679 | ECerr(EC_F_ECP_NISTZ256_WINDOWED_MUL,
|
---|
680 | EC_R_COORDINATES_OUT_OF_RANGE);
|
---|
681 | goto err;
|
---|
682 | }
|
---|
683 |
|
---|
684 | /*
|
---|
685 | * row[0] is implicitly (0,0,0) (the point at infinity), therefore it
|
---|
686 | * is not stored. All other values are actually stored with an offset
|
---|
687 | * of -1 in table.
|
---|
688 | */
|
---|
689 |
|
---|
690 | ecp_nistz256_scatter_w5 (row, &temp[0], 1);
|
---|
691 | ecp_nistz256_point_double(&temp[1], &temp[0]); /*1+1=2 */
|
---|
692 | ecp_nistz256_scatter_w5 (row, &temp[1], 2);
|
---|
693 | ecp_nistz256_point_add (&temp[2], &temp[1], &temp[0]); /*2+1=3 */
|
---|
694 | ecp_nistz256_scatter_w5 (row, &temp[2], 3);
|
---|
695 | ecp_nistz256_point_double(&temp[1], &temp[1]); /*2*2=4 */
|
---|
696 | ecp_nistz256_scatter_w5 (row, &temp[1], 4);
|
---|
697 | ecp_nistz256_point_double(&temp[2], &temp[2]); /*2*3=6 */
|
---|
698 | ecp_nistz256_scatter_w5 (row, &temp[2], 6);
|
---|
699 | ecp_nistz256_point_add (&temp[3], &temp[1], &temp[0]); /*4+1=5 */
|
---|
700 | ecp_nistz256_scatter_w5 (row, &temp[3], 5);
|
---|
701 | ecp_nistz256_point_add (&temp[4], &temp[2], &temp[0]); /*6+1=7 */
|
---|
702 | ecp_nistz256_scatter_w5 (row, &temp[4], 7);
|
---|
703 | ecp_nistz256_point_double(&temp[1], &temp[1]); /*2*4=8 */
|
---|
704 | ecp_nistz256_scatter_w5 (row, &temp[1], 8);
|
---|
705 | ecp_nistz256_point_double(&temp[2], &temp[2]); /*2*6=12 */
|
---|
706 | ecp_nistz256_scatter_w5 (row, &temp[2], 12);
|
---|
707 | ecp_nistz256_point_double(&temp[3], &temp[3]); /*2*5=10 */
|
---|
708 | ecp_nistz256_scatter_w5 (row, &temp[3], 10);
|
---|
709 | ecp_nistz256_point_double(&temp[4], &temp[4]); /*2*7=14 */
|
---|
710 | ecp_nistz256_scatter_w5 (row, &temp[4], 14);
|
---|
711 | ecp_nistz256_point_add (&temp[2], &temp[2], &temp[0]); /*12+1=13*/
|
---|
712 | ecp_nistz256_scatter_w5 (row, &temp[2], 13);
|
---|
713 | ecp_nistz256_point_add (&temp[3], &temp[3], &temp[0]); /*10+1=11*/
|
---|
714 | ecp_nistz256_scatter_w5 (row, &temp[3], 11);
|
---|
715 | ecp_nistz256_point_add (&temp[4], &temp[4], &temp[0]); /*14+1=15*/
|
---|
716 | ecp_nistz256_scatter_w5 (row, &temp[4], 15);
|
---|
717 | ecp_nistz256_point_add (&temp[2], &temp[1], &temp[0]); /*8+1=9 */
|
---|
718 | ecp_nistz256_scatter_w5 (row, &temp[2], 9);
|
---|
719 | ecp_nistz256_point_double(&temp[1], &temp[1]); /*2*8=16 */
|
---|
720 | ecp_nistz256_scatter_w5 (row, &temp[1], 16);
|
---|
721 | }
|
---|
722 |
|
---|
723 | idx = 255;
|
---|
724 |
|
---|
725 | wvalue = p_str[0][(idx - 1) / 8];
|
---|
726 | wvalue = (wvalue >> ((idx - 1) % 8)) & mask;
|
---|
727 |
|
---|
728 | /*
|
---|
729 | * We gather to temp[0], because we know it's position relative
|
---|
730 | * to table
|
---|
731 | */
|
---|
732 | ecp_nistz256_gather_w5(&temp[0], table[0], _booth_recode_w5(wvalue) >> 1);
|
---|
733 | memcpy(r, &temp[0], sizeof(temp[0]));
|
---|
734 |
|
---|
735 | while (idx >= 5) {
|
---|
736 | for (i = (idx == 255 ? 1 : 0); i < num; i++) {
|
---|
737 | unsigned int off = (idx - 1) / 8;
|
---|
738 |
|
---|
739 | wvalue = p_str[i][off] | p_str[i][off + 1] << 8;
|
---|
740 | wvalue = (wvalue >> ((idx - 1) % 8)) & mask;
|
---|
741 |
|
---|
742 | wvalue = _booth_recode_w5(wvalue);
|
---|
743 |
|
---|
744 | ecp_nistz256_gather_w5(&temp[0], table[i], wvalue >> 1);
|
---|
745 |
|
---|
746 | ecp_nistz256_neg(temp[1].Y, temp[0].Y);
|
---|
747 | copy_conditional(temp[0].Y, temp[1].Y, (wvalue & 1));
|
---|
748 |
|
---|
749 | ecp_nistz256_point_add(r, r, &temp[0]);
|
---|
750 | }
|
---|
751 |
|
---|
752 | idx -= window_size;
|
---|
753 |
|
---|
754 | ecp_nistz256_point_double(r, r);
|
---|
755 | ecp_nistz256_point_double(r, r);
|
---|
756 | ecp_nistz256_point_double(r, r);
|
---|
757 | ecp_nistz256_point_double(r, r);
|
---|
758 | ecp_nistz256_point_double(r, r);
|
---|
759 | }
|
---|
760 |
|
---|
761 | /* Final window */
|
---|
762 | for (i = 0; i < num; i++) {
|
---|
763 | wvalue = p_str[i][0];
|
---|
764 | wvalue = (wvalue << 1) & mask;
|
---|
765 |
|
---|
766 | wvalue = _booth_recode_w5(wvalue);
|
---|
767 |
|
---|
768 | ecp_nistz256_gather_w5(&temp[0], table[i], wvalue >> 1);
|
---|
769 |
|
---|
770 | ecp_nistz256_neg(temp[1].Y, temp[0].Y);
|
---|
771 | copy_conditional(temp[0].Y, temp[1].Y, wvalue & 1);
|
---|
772 |
|
---|
773 | ecp_nistz256_point_add(r, r, &temp[0]);
|
---|
774 | }
|
---|
775 |
|
---|
776 | ret = 1;
|
---|
777 | err:
|
---|
778 | OPENSSL_free(table_storage);
|
---|
779 | OPENSSL_free(p_str);
|
---|
780 | OPENSSL_free(scalars);
|
---|
781 | return ret;
|
---|
782 | }
|
---|
783 |
|
---|
784 | /* Coordinates of G, for which we have precomputed tables */
|
---|
785 | static const BN_ULONG def_xG[P256_LIMBS] = {
|
---|
786 | TOBN(0x79e730d4, 0x18a9143c), TOBN(0x75ba95fc, 0x5fedb601),
|
---|
787 | TOBN(0x79fb732b, 0x77622510), TOBN(0x18905f76, 0xa53755c6)
|
---|
788 | };
|
---|
789 |
|
---|
790 | static const BN_ULONG def_yG[P256_LIMBS] = {
|
---|
791 | TOBN(0xddf25357, 0xce95560a), TOBN(0x8b4ab8e4, 0xba19e45c),
|
---|
792 | TOBN(0xd2e88688, 0xdd21f325), TOBN(0x8571ff18, 0x25885d85)
|
---|
793 | };
|
---|
794 |
|
---|
795 | /*
|
---|
796 | * ecp_nistz256_is_affine_G returns one if |generator| is the standard, P-256
|
---|
797 | * generator.
|
---|
798 | */
|
---|
799 | static int ecp_nistz256_is_affine_G(const EC_POINT *generator)
|
---|
800 | {
|
---|
801 | return (bn_get_top(generator->X) == P256_LIMBS) &&
|
---|
802 | (bn_get_top(generator->Y) == P256_LIMBS) &&
|
---|
803 | is_equal(bn_get_words(generator->X), def_xG) &&
|
---|
804 | is_equal(bn_get_words(generator->Y), def_yG) &&
|
---|
805 | is_one(generator->Z);
|
---|
806 | }
|
---|
807 |
|
---|
808 | __owur static int ecp_nistz256_mult_precompute(EC_GROUP *group, BN_CTX *ctx)
|
---|
809 | {
|
---|
810 | /*
|
---|
811 | * We precompute a table for a Booth encoded exponent (wNAF) based
|
---|
812 | * computation. Each table holds 64 values for safe access, with an
|
---|
813 | * implicit value of infinity at index zero. We use window of size 7, and
|
---|
814 | * therefore require ceil(256/7) = 37 tables.
|
---|
815 | */
|
---|
816 | const BIGNUM *order;
|
---|
817 | EC_POINT *P = NULL, *T = NULL;
|
---|
818 | const EC_POINT *generator;
|
---|
819 | NISTZ256_PRE_COMP *pre_comp;
|
---|
820 | BN_CTX *new_ctx = NULL;
|
---|
821 | int i, j, k, ret = 0;
|
---|
822 | size_t w;
|
---|
823 |
|
---|
824 | PRECOMP256_ROW *preComputedTable = NULL;
|
---|
825 | unsigned char *precomp_storage = NULL;
|
---|
826 |
|
---|
827 | /* if there is an old NISTZ256_PRE_COMP object, throw it away */
|
---|
828 | EC_pre_comp_free(group);
|
---|
829 | generator = EC_GROUP_get0_generator(group);
|
---|
830 | if (generator == NULL) {
|
---|
831 | ECerr(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE, EC_R_UNDEFINED_GENERATOR);
|
---|
832 | return 0;
|
---|
833 | }
|
---|
834 |
|
---|
835 | if (ecp_nistz256_is_affine_G(generator)) {
|
---|
836 | /*
|
---|
837 | * No need to calculate tables for the standard generator because we
|
---|
838 | * have them statically.
|
---|
839 | */
|
---|
840 | return 1;
|
---|
841 | }
|
---|
842 |
|
---|
843 | if ((pre_comp = ecp_nistz256_pre_comp_new(group)) == NULL)
|
---|
844 | return 0;
|
---|
845 |
|
---|
846 | if (ctx == NULL) {
|
---|
847 | ctx = new_ctx = BN_CTX_new();
|
---|
848 | if (ctx == NULL)
|
---|
849 | goto err;
|
---|
850 | }
|
---|
851 |
|
---|
852 | BN_CTX_start(ctx);
|
---|
853 |
|
---|
854 | order = EC_GROUP_get0_order(group);
|
---|
855 | if (order == NULL)
|
---|
856 | goto err;
|
---|
857 |
|
---|
858 | if (BN_is_zero(order)) {
|
---|
859 | ECerr(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE, EC_R_UNKNOWN_ORDER);
|
---|
860 | goto err;
|
---|
861 | }
|
---|
862 |
|
---|
863 | w = 7;
|
---|
864 |
|
---|
865 | if ((precomp_storage =
|
---|
866 | OPENSSL_malloc(37 * 64 * sizeof(P256_POINT_AFFINE) + 64)) == NULL) {
|
---|
867 | ECerr(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE, ERR_R_MALLOC_FAILURE);
|
---|
868 | goto err;
|
---|
869 | }
|
---|
870 |
|
---|
871 | preComputedTable = (void *)ALIGNPTR(precomp_storage, 64);
|
---|
872 |
|
---|
873 | P = EC_POINT_new(group);
|
---|
874 | T = EC_POINT_new(group);
|
---|
875 | if (P == NULL || T == NULL)
|
---|
876 | goto err;
|
---|
877 |
|
---|
878 | /*
|
---|
879 | * The zero entry is implicitly infinity, and we skip it, storing other
|
---|
880 | * values with -1 offset.
|
---|
881 | */
|
---|
882 | if (!EC_POINT_copy(T, generator))
|
---|
883 | goto err;
|
---|
884 |
|
---|
885 | for (k = 0; k < 64; k++) {
|
---|
886 | if (!EC_POINT_copy(P, T))
|
---|
887 | goto err;
|
---|
888 | for (j = 0; j < 37; j++) {
|
---|
889 | P256_POINT_AFFINE temp;
|
---|
890 | /*
|
---|
891 | * It would be faster to use EC_POINTs_make_affine and
|
---|
892 | * make multiple points affine at the same time.
|
---|
893 | */
|
---|
894 | if (!EC_POINT_make_affine(group, P, ctx))
|
---|
895 | goto err;
|
---|
896 | if (!ecp_nistz256_bignum_to_field_elem(temp.X, P->X) ||
|
---|
897 | !ecp_nistz256_bignum_to_field_elem(temp.Y, P->Y)) {
|
---|
898 | ECerr(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE,
|
---|
899 | EC_R_COORDINATES_OUT_OF_RANGE);
|
---|
900 | goto err;
|
---|
901 | }
|
---|
902 | ecp_nistz256_scatter_w7(preComputedTable[j], &temp, k);
|
---|
903 | for (i = 0; i < 7; i++) {
|
---|
904 | if (!EC_POINT_dbl(group, P, P, ctx))
|
---|
905 | goto err;
|
---|
906 | }
|
---|
907 | }
|
---|
908 | if (!EC_POINT_add(group, T, T, generator, ctx))
|
---|
909 | goto err;
|
---|
910 | }
|
---|
911 |
|
---|
912 | pre_comp->group = group;
|
---|
913 | pre_comp->w = w;
|
---|
914 | pre_comp->precomp = preComputedTable;
|
---|
915 | pre_comp->precomp_storage = precomp_storage;
|
---|
916 | precomp_storage = NULL;
|
---|
917 | SETPRECOMP(group, nistz256, pre_comp);
|
---|
918 | pre_comp = NULL;
|
---|
919 | ret = 1;
|
---|
920 |
|
---|
921 | err:
|
---|
922 | BN_CTX_end(ctx);
|
---|
923 | BN_CTX_free(new_ctx);
|
---|
924 |
|
---|
925 | EC_nistz256_pre_comp_free(pre_comp);
|
---|
926 | OPENSSL_free(precomp_storage);
|
---|
927 | EC_POINT_free(P);
|
---|
928 | EC_POINT_free(T);
|
---|
929 | return ret;
|
---|
930 | }
|
---|
931 |
|
---|
932 | __owur static int ecp_nistz256_set_from_affine(EC_POINT *out, const EC_GROUP *group,
|
---|
933 | const P256_POINT_AFFINE *in,
|
---|
934 | BN_CTX *ctx)
|
---|
935 | {
|
---|
936 | int ret = 0;
|
---|
937 |
|
---|
938 | if ((ret = bn_set_words(out->X, in->X, P256_LIMBS))
|
---|
939 | && (ret = bn_set_words(out->Y, in->Y, P256_LIMBS))
|
---|
940 | && (ret = bn_set_words(out->Z, ONE, P256_LIMBS)))
|
---|
941 | out->Z_is_one = 1;
|
---|
942 |
|
---|
943 | return ret;
|
---|
944 | }
|
---|
945 |
|
---|
946 | /* r = scalar*G + sum(scalars[i]*points[i]) */
|
---|
947 | __owur static int ecp_nistz256_points_mul(const EC_GROUP *group,
|
---|
948 | EC_POINT *r,
|
---|
949 | const BIGNUM *scalar,
|
---|
950 | size_t num,
|
---|
951 | const EC_POINT *points[],
|
---|
952 | const BIGNUM *scalars[], BN_CTX *ctx)
|
---|
953 | {
|
---|
954 | int i = 0, ret = 0, no_precomp_for_generator = 0, p_is_infinity = 0;
|
---|
955 | unsigned char p_str[33] = { 0 };
|
---|
956 | const PRECOMP256_ROW *preComputedTable = NULL;
|
---|
957 | const NISTZ256_PRE_COMP *pre_comp = NULL;
|
---|
958 | const EC_POINT *generator = NULL;
|
---|
959 | const BIGNUM **new_scalars = NULL;
|
---|
960 | const EC_POINT **new_points = NULL;
|
---|
961 | unsigned int idx = 0;
|
---|
962 | const unsigned int window_size = 7;
|
---|
963 | const unsigned int mask = (1 << (window_size + 1)) - 1;
|
---|
964 | unsigned int wvalue;
|
---|
965 | ALIGN32 union {
|
---|
966 | P256_POINT p;
|
---|
967 | P256_POINT_AFFINE a;
|
---|
968 | } t, p;
|
---|
969 | BIGNUM *tmp_scalar;
|
---|
970 |
|
---|
971 | if ((num + 1) == 0 || (num + 1) > OPENSSL_MALLOC_MAX_NELEMS(void *)) {
|
---|
972 | ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
|
---|
973 | return 0;
|
---|
974 | }
|
---|
975 |
|
---|
976 | BN_CTX_start(ctx);
|
---|
977 |
|
---|
978 | if (scalar) {
|
---|
979 | generator = EC_GROUP_get0_generator(group);
|
---|
980 | if (generator == NULL) {
|
---|
981 | ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, EC_R_UNDEFINED_GENERATOR);
|
---|
982 | goto err;
|
---|
983 | }
|
---|
984 |
|
---|
985 | /* look if we can use precomputed multiples of generator */
|
---|
986 | pre_comp = group->pre_comp.nistz256;
|
---|
987 |
|
---|
988 | if (pre_comp) {
|
---|
989 | /*
|
---|
990 | * If there is a precomputed table for the generator, check that
|
---|
991 | * it was generated with the same generator.
|
---|
992 | */
|
---|
993 | EC_POINT *pre_comp_generator = EC_POINT_new(group);
|
---|
994 | if (pre_comp_generator == NULL)
|
---|
995 | goto err;
|
---|
996 |
|
---|
997 | ecp_nistz256_gather_w7(&p.a, pre_comp->precomp[0], 1);
|
---|
998 | if (!ecp_nistz256_set_from_affine(pre_comp_generator,
|
---|
999 | group, &p.a, ctx)) {
|
---|
1000 | EC_POINT_free(pre_comp_generator);
|
---|
1001 | goto err;
|
---|
1002 | }
|
---|
1003 |
|
---|
1004 | if (0 == EC_POINT_cmp(group, generator, pre_comp_generator, ctx))
|
---|
1005 | preComputedTable = (const PRECOMP256_ROW *)pre_comp->precomp;
|
---|
1006 |
|
---|
1007 | EC_POINT_free(pre_comp_generator);
|
---|
1008 | }
|
---|
1009 |
|
---|
1010 | if (preComputedTable == NULL && ecp_nistz256_is_affine_G(generator)) {
|
---|
1011 | /*
|
---|
1012 | * If there is no precomputed data, but the generator is the
|
---|
1013 | * default, a hardcoded table of precomputed data is used. This
|
---|
1014 | * is because applications, such as Apache, do not use
|
---|
1015 | * EC_KEY_precompute_mult.
|
---|
1016 | */
|
---|
1017 | preComputedTable = ecp_nistz256_precomputed;
|
---|
1018 | }
|
---|
1019 |
|
---|
1020 | if (preComputedTable) {
|
---|
1021 | BN_ULONG infty;
|
---|
1022 |
|
---|
1023 | if ((BN_num_bits(scalar) > 256)
|
---|
1024 | || BN_is_negative(scalar)) {
|
---|
1025 | if ((tmp_scalar = BN_CTX_get(ctx)) == NULL)
|
---|
1026 | goto err;
|
---|
1027 |
|
---|
1028 | if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
|
---|
1029 | ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, ERR_R_BN_LIB);
|
---|
1030 | goto err;
|
---|
1031 | }
|
---|
1032 | scalar = tmp_scalar;
|
---|
1033 | }
|
---|
1034 |
|
---|
1035 | for (i = 0; i < bn_get_top(scalar) * BN_BYTES; i += BN_BYTES) {
|
---|
1036 | BN_ULONG d = bn_get_words(scalar)[i / BN_BYTES];
|
---|
1037 |
|
---|
1038 | p_str[i + 0] = (unsigned char)d;
|
---|
1039 | p_str[i + 1] = (unsigned char)(d >> 8);
|
---|
1040 | p_str[i + 2] = (unsigned char)(d >> 16);
|
---|
1041 | p_str[i + 3] = (unsigned char)(d >>= 24);
|
---|
1042 | if (BN_BYTES == 8) {
|
---|
1043 | d >>= 8;
|
---|
1044 | p_str[i + 4] = (unsigned char)d;
|
---|
1045 | p_str[i + 5] = (unsigned char)(d >> 8);
|
---|
1046 | p_str[i + 6] = (unsigned char)(d >> 16);
|
---|
1047 | p_str[i + 7] = (unsigned char)(d >> 24);
|
---|
1048 | }
|
---|
1049 | }
|
---|
1050 |
|
---|
1051 | for (; i < 33; i++)
|
---|
1052 | p_str[i] = 0;
|
---|
1053 |
|
---|
1054 | /* First window */
|
---|
1055 | wvalue = (p_str[0] << 1) & mask;
|
---|
1056 | idx += window_size;
|
---|
1057 |
|
---|
1058 | wvalue = _booth_recode_w7(wvalue);
|
---|
1059 |
|
---|
1060 | ecp_nistz256_gather_w7(&p.a, preComputedTable[0],
|
---|
1061 | wvalue >> 1);
|
---|
1062 |
|
---|
1063 | ecp_nistz256_neg(p.p.Z, p.p.Y);
|
---|
1064 | copy_conditional(p.p.Y, p.p.Z, wvalue & 1);
|
---|
1065 |
|
---|
1066 | /*
|
---|
1067 | * Since affine infinity is encoded as (0,0) and
|
---|
1068 | * Jacobian is (,,0), we need to harmonize them
|
---|
1069 | * by assigning "one" or zero to Z.
|
---|
1070 | */
|
---|
1071 | infty = (p.p.X[0] | p.p.X[1] | p.p.X[2] | p.p.X[3] |
|
---|
1072 | p.p.Y[0] | p.p.Y[1] | p.p.Y[2] | p.p.Y[3]);
|
---|
1073 | #if !defined(_MSC_VER) || P256_LIMBS != 4 /* vbox: VC++ 2010 complains about out of bounds accesses here in debug builds. */
|
---|
1074 | if (P256_LIMBS == 8)
|
---|
1075 | infty |= (p.p.X[4] | p.p.X[5] | p.p.X[6] | p.p.X[7] |
|
---|
1076 | p.p.Y[4] | p.p.Y[5] | p.p.Y[6] | p.p.Y[7]);
|
---|
1077 | #endif
|
---|
1078 |
|
---|
1079 | infty = 0 - is_zero(infty);
|
---|
1080 | infty = ~infty;
|
---|
1081 |
|
---|
1082 | p.p.Z[0] = ONE[0] & infty;
|
---|
1083 | p.p.Z[1] = ONE[1] & infty;
|
---|
1084 | p.p.Z[2] = ONE[2] & infty;
|
---|
1085 | p.p.Z[3] = ONE[3] & infty;
|
---|
1086 | #if !defined(_MSC_VER) || P256_LIMBS != 4 /* vbox: VC++ 2010 complains about out of bounds accesses here in debug builds. */
|
---|
1087 | if (P256_LIMBS == 8) {
|
---|
1088 | p.p.Z[4] = ONE[4] & infty;
|
---|
1089 | p.p.Z[5] = ONE[5] & infty;
|
---|
1090 | p.p.Z[6] = ONE[6] & infty;
|
---|
1091 | p.p.Z[7] = ONE[7] & infty;
|
---|
1092 | }
|
---|
1093 | #endif
|
---|
1094 |
|
---|
1095 | for (i = 1; i < 37; i++) {
|
---|
1096 | unsigned int off = (idx - 1) / 8;
|
---|
1097 | wvalue = p_str[off] | p_str[off + 1] << 8;
|
---|
1098 | wvalue = (wvalue >> ((idx - 1) % 8)) & mask;
|
---|
1099 | idx += window_size;
|
---|
1100 |
|
---|
1101 | wvalue = _booth_recode_w7(wvalue);
|
---|
1102 |
|
---|
1103 | ecp_nistz256_gather_w7(&t.a,
|
---|
1104 | preComputedTable[i], wvalue >> 1);
|
---|
1105 |
|
---|
1106 | ecp_nistz256_neg(t.p.Z, t.a.Y);
|
---|
1107 | copy_conditional(t.a.Y, t.p.Z, wvalue & 1);
|
---|
1108 |
|
---|
1109 | ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a);
|
---|
1110 | }
|
---|
1111 | } else {
|
---|
1112 | p_is_infinity = 1;
|
---|
1113 | no_precomp_for_generator = 1;
|
---|
1114 | }
|
---|
1115 | } else
|
---|
1116 | p_is_infinity = 1;
|
---|
1117 |
|
---|
1118 | if (no_precomp_for_generator) {
|
---|
1119 | /*
|
---|
1120 | * Without a precomputed table for the generator, it has to be
|
---|
1121 | * handled like a normal point.
|
---|
1122 | */
|
---|
1123 | new_scalars = OPENSSL_malloc((num + 1) * sizeof(BIGNUM *));
|
---|
1124 | if (new_scalars == NULL) {
|
---|
1125 | ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
|
---|
1126 | goto err;
|
---|
1127 | }
|
---|
1128 |
|
---|
1129 | new_points = OPENSSL_malloc((num + 1) * sizeof(EC_POINT *));
|
---|
1130 | if (new_points == NULL) {
|
---|
1131 | ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, ERR_R_MALLOC_FAILURE);
|
---|
1132 | goto err;
|
---|
1133 | }
|
---|
1134 |
|
---|
1135 | memcpy(new_scalars, scalars, num * sizeof(BIGNUM *));
|
---|
1136 | new_scalars[num] = scalar;
|
---|
1137 | memcpy(new_points, points, num * sizeof(EC_POINT *));
|
---|
1138 | new_points[num] = generator;
|
---|
1139 |
|
---|
1140 | scalars = new_scalars;
|
---|
1141 | points = new_points;
|
---|
1142 | num++;
|
---|
1143 | }
|
---|
1144 |
|
---|
1145 | if (num) {
|
---|
1146 | P256_POINT *out = &t.p;
|
---|
1147 | if (p_is_infinity)
|
---|
1148 | out = &p.p;
|
---|
1149 |
|
---|
1150 | if (!ecp_nistz256_windowed_mul(group, out, scalars, points, num, ctx))
|
---|
1151 | goto err;
|
---|
1152 |
|
---|
1153 | if (!p_is_infinity)
|
---|
1154 | ecp_nistz256_point_add(&p.p, &p.p, out);
|
---|
1155 | }
|
---|
1156 |
|
---|
1157 | /* Not constant-time, but we're only operating on the public output. */
|
---|
1158 | if (!bn_set_words(r->X, p.p.X, P256_LIMBS) ||
|
---|
1159 | !bn_set_words(r->Y, p.p.Y, P256_LIMBS) ||
|
---|
1160 | !bn_set_words(r->Z, p.p.Z, P256_LIMBS)) {
|
---|
1161 | goto err;
|
---|
1162 | }
|
---|
1163 | r->Z_is_one = is_one(r->Z) & 1;
|
---|
1164 |
|
---|
1165 | ret = 1;
|
---|
1166 |
|
---|
1167 | err:
|
---|
1168 | BN_CTX_end(ctx);
|
---|
1169 | OPENSSL_free(new_points);
|
---|
1170 | OPENSSL_free(new_scalars);
|
---|
1171 | return ret;
|
---|
1172 | }
|
---|
1173 |
|
---|
1174 | __owur static int ecp_nistz256_get_affine(const EC_GROUP *group,
|
---|
1175 | const EC_POINT *point,
|
---|
1176 | BIGNUM *x, BIGNUM *y, BN_CTX *ctx)
|
---|
1177 | {
|
---|
1178 | BN_ULONG z_inv2[P256_LIMBS];
|
---|
1179 | BN_ULONG z_inv3[P256_LIMBS];
|
---|
1180 | BN_ULONG x_aff[P256_LIMBS];
|
---|
1181 | BN_ULONG y_aff[P256_LIMBS];
|
---|
1182 | BN_ULONG point_x[P256_LIMBS], point_y[P256_LIMBS], point_z[P256_LIMBS];
|
---|
1183 | BN_ULONG x_ret[P256_LIMBS], y_ret[P256_LIMBS];
|
---|
1184 |
|
---|
1185 | if (EC_POINT_is_at_infinity(group, point)) {
|
---|
1186 | ECerr(EC_F_ECP_NISTZ256_GET_AFFINE, EC_R_POINT_AT_INFINITY);
|
---|
1187 | return 0;
|
---|
1188 | }
|
---|
1189 |
|
---|
1190 | if (!ecp_nistz256_bignum_to_field_elem(point_x, point->X) ||
|
---|
1191 | !ecp_nistz256_bignum_to_field_elem(point_y, point->Y) ||
|
---|
1192 | !ecp_nistz256_bignum_to_field_elem(point_z, point->Z)) {
|
---|
1193 | ECerr(EC_F_ECP_NISTZ256_GET_AFFINE, EC_R_COORDINATES_OUT_OF_RANGE);
|
---|
1194 | return 0;
|
---|
1195 | }
|
---|
1196 |
|
---|
1197 | ecp_nistz256_mod_inverse(z_inv3, point_z);
|
---|
1198 | ecp_nistz256_sqr_mont(z_inv2, z_inv3);
|
---|
1199 | ecp_nistz256_mul_mont(x_aff, z_inv2, point_x);
|
---|
1200 |
|
---|
1201 | if (x != NULL) {
|
---|
1202 | ecp_nistz256_from_mont(x_ret, x_aff);
|
---|
1203 | if (!bn_set_words(x, x_ret, P256_LIMBS))
|
---|
1204 | return 0;
|
---|
1205 | }
|
---|
1206 |
|
---|
1207 | if (y != NULL) {
|
---|
1208 | ecp_nistz256_mul_mont(z_inv3, z_inv3, z_inv2);
|
---|
1209 | ecp_nistz256_mul_mont(y_aff, z_inv3, point_y);
|
---|
1210 | ecp_nistz256_from_mont(y_ret, y_aff);
|
---|
1211 | if (!bn_set_words(y, y_ret, P256_LIMBS))
|
---|
1212 | return 0;
|
---|
1213 | }
|
---|
1214 |
|
---|
1215 | return 1;
|
---|
1216 | }
|
---|
1217 |
|
---|
1218 | static NISTZ256_PRE_COMP *ecp_nistz256_pre_comp_new(const EC_GROUP *group)
|
---|
1219 | {
|
---|
1220 | NISTZ256_PRE_COMP *ret = NULL;
|
---|
1221 |
|
---|
1222 | if (!group)
|
---|
1223 | return NULL;
|
---|
1224 |
|
---|
1225 | ret = OPENSSL_zalloc(sizeof(*ret));
|
---|
1226 |
|
---|
1227 | if (ret == NULL) {
|
---|
1228 | ECerr(EC_F_ECP_NISTZ256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
|
---|
1229 | return ret;
|
---|
1230 | }
|
---|
1231 |
|
---|
1232 | ret->group = group;
|
---|
1233 | ret->w = 6; /* default */
|
---|
1234 | ret->references = 1;
|
---|
1235 |
|
---|
1236 | ret->lock = CRYPTO_THREAD_lock_new();
|
---|
1237 | if (ret->lock == NULL) {
|
---|
1238 | ECerr(EC_F_ECP_NISTZ256_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
|
---|
1239 | OPENSSL_free(ret);
|
---|
1240 | return NULL;
|
---|
1241 | }
|
---|
1242 | return ret;
|
---|
1243 | }
|
---|
1244 |
|
---|
1245 | NISTZ256_PRE_COMP *EC_nistz256_pre_comp_dup(NISTZ256_PRE_COMP *p)
|
---|
1246 | {
|
---|
1247 | int i;
|
---|
1248 | if (p != NULL)
|
---|
1249 | CRYPTO_UP_REF(&p->references, &i, p->lock);
|
---|
1250 | return p;
|
---|
1251 | }
|
---|
1252 |
|
---|
1253 | void EC_nistz256_pre_comp_free(NISTZ256_PRE_COMP *pre)
|
---|
1254 | {
|
---|
1255 | int i;
|
---|
1256 |
|
---|
1257 | if (pre == NULL)
|
---|
1258 | return;
|
---|
1259 |
|
---|
1260 | CRYPTO_DOWN_REF(&pre->references, &i, pre->lock);
|
---|
1261 | REF_PRINT_COUNT("EC_nistz256", x);
|
---|
1262 | if (i > 0)
|
---|
1263 | return;
|
---|
1264 | REF_ASSERT_ISNT(i < 0);
|
---|
1265 |
|
---|
1266 | OPENSSL_free(pre->precomp_storage);
|
---|
1267 | CRYPTO_THREAD_lock_free(pre->lock);
|
---|
1268 | OPENSSL_free(pre);
|
---|
1269 | }
|
---|
1270 |
|
---|
1271 |
|
---|
1272 | static int ecp_nistz256_window_have_precompute_mult(const EC_GROUP *group)
|
---|
1273 | {
|
---|
1274 | /* There is a hard-coded table for the default generator. */
|
---|
1275 | const EC_POINT *generator = EC_GROUP_get0_generator(group);
|
---|
1276 |
|
---|
1277 | if (generator != NULL && ecp_nistz256_is_affine_G(generator)) {
|
---|
1278 | /* There is a hard-coded table for the default generator. */
|
---|
1279 | return 1;
|
---|
1280 | }
|
---|
1281 |
|
---|
1282 | return HAVEPRECOMP(group, nistz256);
|
---|
1283 | }
|
---|
1284 |
|
---|
1285 | #if defined(__x86_64) || defined(__x86_64__) || \
|
---|
1286 | defined(_M_AMD64) || defined(_M_X64) || \
|
---|
1287 | defined(__powerpc64__) || defined(_ARCH_PP64) || \
|
---|
1288 | defined(__aarch64__)
|
---|
1289 | /*
|
---|
1290 | * Montgomery mul modulo Order(P): res = a*b*2^-256 mod Order(P)
|
---|
1291 | */
|
---|
1292 | void ecp_nistz256_ord_mul_mont(BN_ULONG res[P256_LIMBS],
|
---|
1293 | const BN_ULONG a[P256_LIMBS],
|
---|
1294 | const BN_ULONG b[P256_LIMBS]);
|
---|
1295 | void ecp_nistz256_ord_sqr_mont(BN_ULONG res[P256_LIMBS],
|
---|
1296 | const BN_ULONG a[P256_LIMBS],
|
---|
1297 | int rep);
|
---|
1298 |
|
---|
1299 | static int ecp_nistz256_inv_mod_ord(const EC_GROUP *group, BIGNUM *r,
|
---|
1300 | const BIGNUM *x, BN_CTX *ctx)
|
---|
1301 | {
|
---|
1302 | /* RR = 2^512 mod ord(p256) */
|
---|
1303 | static const BN_ULONG RR[P256_LIMBS] = {
|
---|
1304 | TOBN(0x83244c95,0xbe79eea2), TOBN(0x4699799c,0x49bd6fa6),
|
---|
1305 | TOBN(0x2845b239,0x2b6bec59), TOBN(0x66e12d94,0xf3d95620)
|
---|
1306 | };
|
---|
1307 | /* The constant 1 (unlike ONE that is one in Montgomery representation) */
|
---|
1308 | static const BN_ULONG one[P256_LIMBS] = {
|
---|
1309 | TOBN(0,1), TOBN(0,0), TOBN(0,0), TOBN(0,0)
|
---|
1310 | };
|
---|
1311 | /*
|
---|
1312 | * We don't use entry 0 in the table, so we omit it and address
|
---|
1313 | * with -1 offset.
|
---|
1314 | */
|
---|
1315 | BN_ULONG table[15][P256_LIMBS];
|
---|
1316 | BN_ULONG out[P256_LIMBS], t[P256_LIMBS];
|
---|
1317 | int i, ret = 0;
|
---|
1318 | enum {
|
---|
1319 | i_1 = 0, i_10, i_11, i_101, i_111, i_1010, i_1111,
|
---|
1320 | i_10101, i_101010, i_101111, i_x6, i_x8, i_x16, i_x32
|
---|
1321 | };
|
---|
1322 |
|
---|
1323 | /*
|
---|
1324 | * Catch allocation failure early.
|
---|
1325 | */
|
---|
1326 | if (bn_wexpand(r, P256_LIMBS) == NULL) {
|
---|
1327 | ECerr(EC_F_ECP_NISTZ256_INV_MOD_ORD, ERR_R_BN_LIB);
|
---|
1328 | goto err;
|
---|
1329 | }
|
---|
1330 |
|
---|
1331 | if ((BN_num_bits(x) > 256) || BN_is_negative(x)) {
|
---|
1332 | BIGNUM *tmp;
|
---|
1333 |
|
---|
1334 | if ((tmp = BN_CTX_get(ctx)) == NULL
|
---|
1335 | || !BN_nnmod(tmp, x, group->order, ctx)) {
|
---|
1336 | ECerr(EC_F_ECP_NISTZ256_INV_MOD_ORD, ERR_R_BN_LIB);
|
---|
1337 | goto err;
|
---|
1338 | }
|
---|
1339 | x = tmp;
|
---|
1340 | }
|
---|
1341 |
|
---|
1342 | if (!ecp_nistz256_bignum_to_field_elem(t, x)) {
|
---|
1343 | ECerr(EC_F_ECP_NISTZ256_INV_MOD_ORD, EC_R_COORDINATES_OUT_OF_RANGE);
|
---|
1344 | goto err;
|
---|
1345 | }
|
---|
1346 |
|
---|
1347 | ecp_nistz256_ord_mul_mont(table[0], t, RR);
|
---|
1348 | #if 0
|
---|
1349 | /*
|
---|
1350 | * Original sparse-then-fixed-window algorithm, retained for reference.
|
---|
1351 | */
|
---|
1352 | for (i = 2; i < 16; i += 2) {
|
---|
1353 | ecp_nistz256_ord_sqr_mont(table[i-1], table[i/2-1], 1);
|
---|
1354 | ecp_nistz256_ord_mul_mont(table[i], table[i-1], table[0]);
|
---|
1355 | }
|
---|
1356 |
|
---|
1357 | /*
|
---|
1358 | * The top 128bit of the exponent are highly redudndant, so we
|
---|
1359 | * perform an optimized flow
|
---|
1360 | */
|
---|
1361 | ecp_nistz256_ord_sqr_mont(t, table[15-1], 4); /* f0 */
|
---|
1362 | ecp_nistz256_ord_mul_mont(t, t, table[15-1]); /* ff */
|
---|
1363 |
|
---|
1364 | ecp_nistz256_ord_sqr_mont(out, t, 8); /* ff00 */
|
---|
1365 | ecp_nistz256_ord_mul_mont(out, out, t); /* ffff */
|
---|
1366 |
|
---|
1367 | ecp_nistz256_ord_sqr_mont(t, out, 16); /* ffff0000 */
|
---|
1368 | ecp_nistz256_ord_mul_mont(t, t, out); /* ffffffff */
|
---|
1369 |
|
---|
1370 | ecp_nistz256_ord_sqr_mont(out, t, 64); /* ffffffff0000000000000000 */
|
---|
1371 | ecp_nistz256_ord_mul_mont(out, out, t); /* ffffffff00000000ffffffff */
|
---|
1372 |
|
---|
1373 | ecp_nistz256_ord_sqr_mont(out, out, 32); /* ffffffff00000000ffffffff00000000 */
|
---|
1374 | ecp_nistz256_ord_mul_mont(out, out, t); /* ffffffff00000000ffffffffffffffff */
|
---|
1375 |
|
---|
1376 | /*
|
---|
1377 | * The bottom 128 bit of the exponent are processed with fixed 4-bit window
|
---|
1378 | */
|
---|
1379 | for(i = 0; i < 32; i++) {
|
---|
1380 | /* expLo - the low 128 bits of the exponent we use (ord(p256) - 2),
|
---|
1381 | * split into nibbles */
|
---|
1382 | static const unsigned char expLo[32] = {
|
---|
1383 | 0xb,0xc,0xe,0x6,0xf,0xa,0xa,0xd,0xa,0x7,0x1,0x7,0x9,0xe,0x8,0x4,
|
---|
1384 | 0xf,0x3,0xb,0x9,0xc,0xa,0xc,0x2,0xf,0xc,0x6,0x3,0x2,0x5,0x4,0xf
|
---|
1385 | };
|
---|
1386 |
|
---|
1387 | ecp_nistz256_ord_sqr_mont(out, out, 4);
|
---|
1388 | /* The exponent is public, no need in constant-time access */
|
---|
1389 | ecp_nistz256_ord_mul_mont(out, out, table[expLo[i]-1]);
|
---|
1390 | }
|
---|
1391 | #else
|
---|
1392 | /*
|
---|
1393 | * https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion
|
---|
1394 | *
|
---|
1395 | * Even though this code path spares 12 squarings, 4.5%, and 13
|
---|
1396 | * multiplications, 25%, on grand scale sign operation is not that
|
---|
1397 | * much faster, not more that 2%...
|
---|
1398 | */
|
---|
1399 |
|
---|
1400 | /* pre-calculate powers */
|
---|
1401 | ecp_nistz256_ord_sqr_mont(table[i_10], table[i_1], 1);
|
---|
1402 |
|
---|
1403 | ecp_nistz256_ord_mul_mont(table[i_11], table[i_1], table[i_10]);
|
---|
1404 |
|
---|
1405 | ecp_nistz256_ord_mul_mont(table[i_101], table[i_11], table[i_10]);
|
---|
1406 |
|
---|
1407 | ecp_nistz256_ord_mul_mont(table[i_111], table[i_101], table[i_10]);
|
---|
1408 |
|
---|
1409 | ecp_nistz256_ord_sqr_mont(table[i_1010], table[i_101], 1);
|
---|
1410 |
|
---|
1411 | ecp_nistz256_ord_mul_mont(table[i_1111], table[i_1010], table[i_101]);
|
---|
1412 |
|
---|
1413 | ecp_nistz256_ord_sqr_mont(table[i_10101], table[i_1010], 1);
|
---|
1414 | ecp_nistz256_ord_mul_mont(table[i_10101], table[i_10101], table[i_1]);
|
---|
1415 |
|
---|
1416 | ecp_nistz256_ord_sqr_mont(table[i_101010], table[i_10101], 1);
|
---|
1417 |
|
---|
1418 | ecp_nistz256_ord_mul_mont(table[i_101111], table[i_101010], table[i_101]);
|
---|
1419 |
|
---|
1420 | ecp_nistz256_ord_mul_mont(table[i_x6], table[i_101010], table[i_10101]);
|
---|
1421 |
|
---|
1422 | ecp_nistz256_ord_sqr_mont(table[i_x8], table[i_x6], 2);
|
---|
1423 | ecp_nistz256_ord_mul_mont(table[i_x8], table[i_x8], table[i_11]);
|
---|
1424 |
|
---|
1425 | ecp_nistz256_ord_sqr_mont(table[i_x16], table[i_x8], 8);
|
---|
1426 | ecp_nistz256_ord_mul_mont(table[i_x16], table[i_x16], table[i_x8]);
|
---|
1427 |
|
---|
1428 | ecp_nistz256_ord_sqr_mont(table[i_x32], table[i_x16], 16);
|
---|
1429 | ecp_nistz256_ord_mul_mont(table[i_x32], table[i_x32], table[i_x16]);
|
---|
1430 |
|
---|
1431 | /* calculations */
|
---|
1432 | ecp_nistz256_ord_sqr_mont(out, table[i_x32], 64);
|
---|
1433 | ecp_nistz256_ord_mul_mont(out, out, table[i_x32]);
|
---|
1434 |
|
---|
1435 | for (i = 0; i < 27; i++) {
|
---|
1436 | static const struct { unsigned char p, i; } chain[27] = {
|
---|
1437 | { 32, i_x32 }, { 6, i_101111 }, { 5, i_111 },
|
---|
1438 | { 4, i_11 }, { 5, i_1111 }, { 5, i_10101 },
|
---|
1439 | { 4, i_101 }, { 3, i_101 }, { 3, i_101 },
|
---|
1440 | { 5, i_111 }, { 9, i_101111 }, { 6, i_1111 },
|
---|
1441 | { 2, i_1 }, { 5, i_1 }, { 6, i_1111 },
|
---|
1442 | { 5, i_111 }, { 4, i_111 }, { 5, i_111 },
|
---|
1443 | { 5, i_101 }, { 3, i_11 }, { 10, i_101111 },
|
---|
1444 | { 2, i_11 }, { 5, i_11 }, { 5, i_11 },
|
---|
1445 | { 3, i_1 }, { 7, i_10101 }, { 6, i_1111 }
|
---|
1446 | };
|
---|
1447 |
|
---|
1448 | ecp_nistz256_ord_sqr_mont(out, out, chain[i].p);
|
---|
1449 | ecp_nistz256_ord_mul_mont(out, out, table[chain[i].i]);
|
---|
1450 | }
|
---|
1451 | #endif
|
---|
1452 | ecp_nistz256_ord_mul_mont(out, out, one);
|
---|
1453 |
|
---|
1454 | /*
|
---|
1455 | * Can't fail, but check return code to be consistent anyway.
|
---|
1456 | */
|
---|
1457 | if (!bn_set_words(r, out, P256_LIMBS))
|
---|
1458 | goto err;
|
---|
1459 |
|
---|
1460 | ret = 1;
|
---|
1461 | err:
|
---|
1462 | return ret;
|
---|
1463 | }
|
---|
1464 | #else
|
---|
1465 | # define ecp_nistz256_inv_mod_ord NULL
|
---|
1466 | #endif
|
---|
1467 |
|
---|
1468 | const EC_METHOD *EC_GFp_nistz256_method(void)
|
---|
1469 | {
|
---|
1470 | static const EC_METHOD ret = {
|
---|
1471 | EC_FLAGS_DEFAULT_OCT,
|
---|
1472 | NID_X9_62_prime_field,
|
---|
1473 | ec_GFp_mont_group_init,
|
---|
1474 | ec_GFp_mont_group_finish,
|
---|
1475 | ec_GFp_mont_group_clear_finish,
|
---|
1476 | ec_GFp_mont_group_copy,
|
---|
1477 | ec_GFp_mont_group_set_curve,
|
---|
1478 | ec_GFp_simple_group_get_curve,
|
---|
1479 | ec_GFp_simple_group_get_degree,
|
---|
1480 | ec_group_simple_order_bits,
|
---|
1481 | ec_GFp_simple_group_check_discriminant,
|
---|
1482 | ec_GFp_simple_point_init,
|
---|
1483 | ec_GFp_simple_point_finish,
|
---|
1484 | ec_GFp_simple_point_clear_finish,
|
---|
1485 | ec_GFp_simple_point_copy,
|
---|
1486 | ec_GFp_simple_point_set_to_infinity,
|
---|
1487 | ec_GFp_simple_set_Jprojective_coordinates_GFp,
|
---|
1488 | ec_GFp_simple_get_Jprojective_coordinates_GFp,
|
---|
1489 | ec_GFp_simple_point_set_affine_coordinates,
|
---|
1490 | ecp_nistz256_get_affine,
|
---|
1491 | 0, 0, 0,
|
---|
1492 | ec_GFp_simple_add,
|
---|
1493 | ec_GFp_simple_dbl,
|
---|
1494 | ec_GFp_simple_invert,
|
---|
1495 | ec_GFp_simple_is_at_infinity,
|
---|
1496 | ec_GFp_simple_is_on_curve,
|
---|
1497 | ec_GFp_simple_cmp,
|
---|
1498 | ec_GFp_simple_make_affine,
|
---|
1499 | ec_GFp_simple_points_make_affine,
|
---|
1500 | ecp_nistz256_points_mul, /* mul */
|
---|
1501 | ecp_nistz256_mult_precompute, /* precompute_mult */
|
---|
1502 | ecp_nistz256_window_have_precompute_mult, /* have_precompute_mult */
|
---|
1503 | ec_GFp_mont_field_mul,
|
---|
1504 | ec_GFp_mont_field_sqr,
|
---|
1505 | 0, /* field_div */
|
---|
1506 | ec_GFp_mont_field_inv,
|
---|
1507 | ec_GFp_mont_field_encode,
|
---|
1508 | ec_GFp_mont_field_decode,
|
---|
1509 | ec_GFp_mont_field_set_to_one,
|
---|
1510 | ec_key_simple_priv2oct,
|
---|
1511 | ec_key_simple_oct2priv,
|
---|
1512 | 0, /* set private */
|
---|
1513 | ec_key_simple_generate_key,
|
---|
1514 | ec_key_simple_check_key,
|
---|
1515 | ec_key_simple_generate_public_key,
|
---|
1516 | 0, /* keycopy */
|
---|
1517 | 0, /* keyfinish */
|
---|
1518 | ecdh_simple_compute_key,
|
---|
1519 | ecp_nistz256_inv_mod_ord, /* can be #define-d NULL */
|
---|
1520 | 0, /* blind_coordinates */
|
---|
1521 | 0, /* ladder_pre */
|
---|
1522 | 0, /* ladder_step */
|
---|
1523 | 0 /* ladder_post */
|
---|
1524 | };
|
---|
1525 |
|
---|
1526 | return &ret;
|
---|
1527 | }
|
---|