ecp_nistp224.c@ 83531

Last change on this file since 83531 was 83531, checked in by vboxsync, 5 years ago
setting svn:sync-process=export for openssl-1.1.1f, all files except tests
File size: 59.3 KB

Line
1	/*
2	* Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
3	*
4	* Licensed under the OpenSSL license (the "License"). You may not use
5	* this file except in compliance with the License. You can obtain a copy
6	* in the file LICENSE in the source distribution or at
7	* https://www.openssl.org/source/license.html
8	*/
9
10	/* Copyright 2011 Google Inc.
11	*
12	* Licensed under the Apache License, Version 2.0 (the "License");
13	*
14	* you may not use this file except in compliance with the License.
15	* You may obtain a copy of the License at
16	*
17	* http://www.apache.org/licenses/LICENSE-2.0
18	*
19	* Unless required by applicable law or agreed to in writing, software
20	* distributed under the License is distributed on an "AS IS" BASIS,
21	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22	* See the License for the specific language governing permissions and
23	* limitations under the License.
24	*/
25
26	/*
27	* A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
28	*
29	* Inspired by Daniel J. Bernstein's public domain nistp224 implementation
30	* and Adam Langley's public domain 64-bit C implementation of curve25519
31	*/
32
33	#include <openssl/opensslconf.h>
34	#ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
35	NON_EMPTY_TRANSLATION_UNIT
36	#else
37
38	# include <stdint.h>
39	# include <string.h>
40	# include <openssl/err.h>
41	# include "ec_local.h"
42
43	# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
44	/* even with gcc, the typedef won't work for 32-bit platforms */
45	typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
46	* platforms */
47	# else
48	# error "Your compiler doesn't appear to support 128-bit integer types"
49	# endif
50
51	typedef uint8_t u8;
52	typedef uint64_t u64;
53
54	/******************************************************************************/
55	/*-
56	* INTERNAL REPRESENTATION OF FIELD ELEMENTS
57	*
58	* Field elements are represented as a_0 + 2^56a_1 + 2^112a_2 + 2^168*a_3
59	* using 64-bit coefficients called 'limbs',
60	* and sometimes (for multiplication results) as
61	* b_0 + 2^56b_1 + 2^112b_2 + 2^168b_3 + 2^224b_4 + 2^280b_5 + 2^336b_6
62	* using 128-bit coefficients called 'widelimbs'.
63	* A 4-limb representation is an 'felem';
64	* a 7-widelimb representation is a 'widefelem'.
65	* Even within felems, bits of adjacent limbs overlap, and we don't always
66	* reduce the representations: we ensure that inputs to each felem
67	* multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 42^602^60,
68	* and fit into a 128-bit word without overflow. The coefficients are then
69	* again partially reduced to obtain an felem satisfying a_i < 2^57.
70	* We only reduce to the unique minimal representation at the end of the
71	* computation.
72	*/
73
74	typedef uint64_t limb;
75	typedef uint128_t widelimb;
76
77	typedef limb felem[4];
78	typedef widelimb widefelem[7];
79
80	/*
81	* Field element represented as a byte array. 28*8 = 224 bits is also the
82	* group order size for the elliptic curve, and we also use this type for
83	* scalars for point multiplication.
84	*/
85	typedef u8 felem_bytearray[28];
86
87	static const felem_bytearray nistp224_curve_params[5] = {
88	{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
89	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
90	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
91	{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
92	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
93	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
94	{0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
95	0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
96	0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
97	{0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
98	0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
99	0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
100	{0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
101	0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
102	0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
103	};
104
105	/*-
106	* Precomputed multiples of the standard generator
107	* Points are given in coordinates (X, Y, Z) where Z normally is 1
108	* (0 for the point at infinity).
109	* For each field element, slice a_0 is word 0, etc.
110	*
111	* The table has 2 * 16 elements, starting with the following:
112	* index \| bits \| point
113	* ------+---------+------------------------------
114	* 0 \| 0 0 0 0 \| 0G
115	* 1 \| 0 0 0 1 \| 1G
116	* 2 \| 0 0 1 0 \| 2^56G
117	* 3 \| 0 0 1 1 \| (2^56 + 1)G
118	* 4 \| 0 1 0 0 \| 2^112G
119	* 5 \| 0 1 0 1 \| (2^112 + 1)G
120	* 6 \| 0 1 1 0 \| (2^112 + 2^56)G
121	* 7 \| 0 1 1 1 \| (2^112 + 2^56 + 1)G
122	* 8 \| 1 0 0 0 \| 2^168G
123	* 9 \| 1 0 0 1 \| (2^168 + 1)G
124	* 10 \| 1 0 1 0 \| (2^168 + 2^56)G
125	* 11 \| 1 0 1 1 \| (2^168 + 2^56 + 1)G
126	* 12 \| 1 1 0 0 \| (2^168 + 2^112)G
127	* 13 \| 1 1 0 1 \| (2^168 + 2^112 + 1)G
128	* 14 \| 1 1 1 0 \| (2^168 + 2^112 + 2^56)G
129	* 15 \| 1 1 1 1 \| (2^168 + 2^112 + 2^56 + 1)G
130	* followed by a copy of this with each element multiplied by 2^28.
131	*
132	* The reason for this is so that we can clock bits into four different
133	* locations when doing simple scalar multiplies against the base point,
134	* and then another four locations using the second 16 elements.
135	*/
136	static const felem gmul[2][16][3] = {
137	{{{0, 0, 0, 0},
138	{0, 0, 0, 0},
139	{0, 0, 0, 0}},
140	{{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
141	{0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
142	{1, 0, 0, 0}},
143	{{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
144	{0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
145	{1, 0, 0, 0}},
146	{{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
147	{0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
148	{1, 0, 0, 0}},
149	{{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
150	{0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
151	{1, 0, 0, 0}},
152	{{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
153	{0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
154	{1, 0, 0, 0}},
155	{{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
156	{0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
157	{1, 0, 0, 0}},
158	{{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
159	{0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
160	{1, 0, 0, 0}},
161	{{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
162	{0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
163	{1, 0, 0, 0}},
164	{{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
165	{0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
166	{1, 0, 0, 0}},
167	{{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
168	{0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
169	{1, 0, 0, 0}},
170	{{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
171	{0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
172	{1, 0, 0, 0}},
173	{{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
174	{0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
175	{1, 0, 0, 0}},
176	{{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
177	{0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
178	{1, 0, 0, 0}},
179	{{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
180	{0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
181	{1, 0, 0, 0}},
182	{{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
183	{0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
184	{1, 0, 0, 0}}},
185	{{{0, 0, 0, 0},
186	{0, 0, 0, 0},
187	{0, 0, 0, 0}},
188	{{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
189	{0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
190	{1, 0, 0, 0}},
191	{{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
192	{0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
193	{1, 0, 0, 0}},
194	{{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
195	{0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
196	{1, 0, 0, 0}},
197	{{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
198	{0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
199	{1, 0, 0, 0}},
200	{{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
201	{0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
202	{1, 0, 0, 0}},
203	{{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
204	{0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
205	{1, 0, 0, 0}},
206	{{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
207	{0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
208	{1, 0, 0, 0}},
209	{{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
210	{0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
211	{1, 0, 0, 0}},
212	{{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
213	{0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
214	{1, 0, 0, 0}},
215	{{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
216	{0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
217	{1, 0, 0, 0}},
218	{{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
219	{0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
220	{1, 0, 0, 0}},
221	{{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
222	{0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
223	{1, 0, 0, 0}},
224	{{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
225	{0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
226	{1, 0, 0, 0}},
227	{{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
228	{0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
229	{1, 0, 0, 0}},
230	{{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
231	{0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
232	{1, 0, 0, 0}}}
233	};
234
235	/* Precomputation for the group generator. */
236	struct nistp224_pre_comp_st {
237	felem g_pre_comp[2][16][3];
238	CRYPTO_REF_COUNT references;
239	CRYPTO_RWLOCK *lock;
240	};
241
242	const EC_METHOD *EC_GFp_nistp224_method(void)
243	{
244	static const EC_METHOD ret = {
245	EC_FLAGS_DEFAULT_OCT,
246	NID_X9_62_prime_field,
247	ec_GFp_nistp224_group_init,
248	ec_GFp_simple_group_finish,
249	ec_GFp_simple_group_clear_finish,
250	ec_GFp_nist_group_copy,
251	ec_GFp_nistp224_group_set_curve,
252	ec_GFp_simple_group_get_curve,
253	ec_GFp_simple_group_get_degree,
254	ec_group_simple_order_bits,
255	ec_GFp_simple_group_check_discriminant,
256	ec_GFp_simple_point_init,
257	ec_GFp_simple_point_finish,
258	ec_GFp_simple_point_clear_finish,
259	ec_GFp_simple_point_copy,
260	ec_GFp_simple_point_set_to_infinity,
261	ec_GFp_simple_set_Jprojective_coordinates_GFp,
262	ec_GFp_simple_get_Jprojective_coordinates_GFp,
263	ec_GFp_simple_point_set_affine_coordinates,
264	ec_GFp_nistp224_point_get_affine_coordinates,
265	0 /* point_set_compressed_coordinates */ ,
266	0 /* point2oct */ ,
267	0 /* oct2point */ ,
268	ec_GFp_simple_add,
269	ec_GFp_simple_dbl,
270	ec_GFp_simple_invert,
271	ec_GFp_simple_is_at_infinity,
272	ec_GFp_simple_is_on_curve,
273	ec_GFp_simple_cmp,
274	ec_GFp_simple_make_affine,
275	ec_GFp_simple_points_make_affine,
276	ec_GFp_nistp224_points_mul,
277	ec_GFp_nistp224_precompute_mult,
278	ec_GFp_nistp224_have_precompute_mult,
279	ec_GFp_nist_field_mul,
280	ec_GFp_nist_field_sqr,
281	0 /* field_div */ ,
282	ec_GFp_simple_field_inv,
283	0 /* field_encode */ ,
284	0 /* field_decode */ ,
285	0, /* field_set_to_one */
286	ec_key_simple_priv2oct,
287	ec_key_simple_oct2priv,
288	0, /* set private */
289	ec_key_simple_generate_key,
290	ec_key_simple_check_key,
291	ec_key_simple_generate_public_key,
292	0, /* keycopy */
293	0, /* keyfinish */
294	ecdh_simple_compute_key,
295	0, /* field_inverse_mod_ord */
296	0, /* blind_coordinates */
297	0, /* ladder_pre */
298	0, /* ladder_step */
299	0 /* ladder_post */
300	};
301
302	return &ret;
303	}
304
305	/*
306	* Helper functions to convert field elements to/from internal representation
307	*/
308	static void bin28_to_felem(felem out, const u8 in[28])
309	{
310	out[0] = ((const uint64_t )(in)) & 0x00ffffffffffffff;
311	out[1] = (((const uint64_t )(in + 7))) & 0x00ffffffffffffff;
312	out[2] = (((const uint64_t )(in + 14))) & 0x00ffffffffffffff;
313	out[3] = (((const uint64_t )(in+20))) >> 8;
314	}
315
316	static void felem_to_bin28(u8 out[28], const felem in)
317	{
318	unsigned i;
319	for (i = 0; i < 7; ++i) {
320	out[i] = in[0] >> (8 * i);
321	out[i + 7] = in[1] >> (8 * i);
322	out[i + 14] = in[2] >> (8 * i);
323	out[i + 21] = in[3] >> (8 * i);
324	}
325	}
326
327	/* From OpenSSL BIGNUM to internal representation */
328	static int BN_to_felem(felem out, const BIGNUM *bn)
329	{
330	felem_bytearray b_out;
331	int num_bytes;
332
333	if (BN_is_negative(bn)) {
334	ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
335	return 0;
336	}
337	num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
338	if (num_bytes < 0) {
339	ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
340	return 0;
341	}
342	bin28_to_felem(out, b_out);
343	return 1;
344	}
345
346	/* From internal representation to OpenSSL BIGNUM */
347	static BIGNUM felem_to_BN(BIGNUM out, const felem in)
348	{
349	felem_bytearray b_out;
350	felem_to_bin28(b_out, in);
351	return BN_lebin2bn(b_out, sizeof(b_out), out);
352	}
353
354	/******************************************************************************/
355	/*-
356	* FIELD OPERATIONS
357	*
358	* Field operations, using the internal representation of field elements.
359	* NB! These operations are specific to our point multiplication and cannot be
360	* expected to be correct in general - e.g., multiplication with a large scalar
361	* will cause an overflow.
362	*
363	*/
364
365	static void felem_one(felem out)
366	{
367	out[0] = 1;
368	out[1] = 0;
369	out[2] = 0;
370	out[3] = 0;
371	}
372
373	static void felem_assign(felem out, const felem in)
374	{
375	out[0] = in[0];
376	out[1] = in[1];
377	out[2] = in[2];
378	out[3] = in[3];
379	}
380
381	/* Sum two field elements: out += in */
382	static void felem_sum(felem out, const felem in)
383	{
384	out[0] += in[0];
385	out[1] += in[1];
386	out[2] += in[2];
387	out[3] += in[3];
388	}
389
390	/* Subtract field elements: out -= in */
391	/* Assumes in[i] < 2^57 */
392	static void felem_diff(felem out, const felem in)
393	{
394	static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
395	static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
396	static const limb two58m42m2 = (((limb) 1) << 58) -
397	(((limb) 1) << 42) - (((limb) 1) << 2);
398
399	/* Add 0 mod 2^224-2^96+1 to ensure out > in */
400	out[0] += two58p2;
401	out[1] += two58m42m2;
402	out[2] += two58m2;
403	out[3] += two58m2;
404
405	out[0] -= in[0];
406	out[1] -= in[1];
407	out[2] -= in[2];
408	out[3] -= in[3];
409	}
410
411	/* Subtract in unreduced 128-bit mode: out -= in */
412	/* Assumes in[i] < 2^119 */
413	static void widefelem_diff(widefelem out, const widefelem in)
414	{
415	static const widelimb two120 = ((widelimb) 1) << 120;
416	static const widelimb two120m64 = (((widelimb) 1) << 120) -
417	(((widelimb) 1) << 64);
418	static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
419	(((widelimb) 1) << 104) - (((widelimb) 1) << 64);
420
421	/* Add 0 mod 2^224-2^96+1 to ensure out > in */
422	out[0] += two120;
423	out[1] += two120m64;
424	out[2] += two120m64;
425	out[3] += two120;
426	out[4] += two120m104m64;
427	out[5] += two120m64;
428	out[6] += two120m64;
429
430	out[0] -= in[0];
431	out[1] -= in[1];
432	out[2] -= in[2];
433	out[3] -= in[3];
434	out[4] -= in[4];
435	out[5] -= in[5];
436	out[6] -= in[6];
437	}
438
439	/* Subtract in mixed mode: out128 -= in64 */
440	/* in[i] < 2^63 */
441	static void felem_diff_128_64(widefelem out, const felem in)
442	{
443	static const widelimb two64p8 = (((widelimb) 1) << 64) +
444	(((widelimb) 1) << 8);
445	static const widelimb two64m8 = (((widelimb) 1) << 64) -
446	(((widelimb) 1) << 8);
447	static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
448	(((widelimb) 1) << 48) - (((widelimb) 1) << 8);
449
450	/* Add 0 mod 2^224-2^96+1 to ensure out > in */
451	out[0] += two64p8;
452	out[1] += two64m48m8;
453	out[2] += two64m8;
454	out[3] += two64m8;
455
456	out[0] -= in[0];
457	out[1] -= in[1];
458	out[2] -= in[2];
459	out[3] -= in[3];
460	}
461
462	/*
463	* Multiply a field element by a scalar: out = out * scalar The scalars we
464	* actually use are small, so results fit without overflow
465	*/
466	static void felem_scalar(felem out, const limb scalar)
467	{
468	out[0] *= scalar;
469	out[1] *= scalar;
470	out[2] *= scalar;
471	out[3] *= scalar;
472	}
473
474	/*
475	* Multiply an unreduced field element by a scalar: out = out * scalar The
476	* scalars we actually use are small, so results fit without overflow
477	*/
478	static void widefelem_scalar(widefelem out, const widelimb scalar)
479	{
480	out[0] *= scalar;
481	out[1] *= scalar;
482	out[2] *= scalar;
483	out[3] *= scalar;
484	out[4] *= scalar;
485	out[5] *= scalar;
486	out[6] *= scalar;
487	}
488
489	/* Square a field element: out = in^2 */
490	static void felem_square(widefelem out, const felem in)
491	{
492	limb tmp0, tmp1, tmp2;
493	tmp0 = 2 * in[0];
494	tmp1 = 2 * in[1];
495	tmp2 = 2 * in[2];
496	out[0] = ((widelimb) in[0]) * in[0];
497	out[1] = ((widelimb) in[0]) * tmp1;
498	out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
499	out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
500	out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
501	out[5] = ((widelimb) in[3]) * tmp2;
502	out[6] = ((widelimb) in[3]) * in[3];
503	}
504
505	/* Multiply two field elements: out = in1 * in2 */
506	static void felem_mul(widefelem out, const felem in1, const felem in2)
507	{
508	out[0] = ((widelimb) in1[0]) * in2[0];
509	out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
510	out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
511	((widelimb) in1[2]) * in2[0];
512	out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
513	((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
514	out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
515	((widelimb) in1[3]) * in2[1];
516	out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
517	out[6] = ((widelimb) in1[3]) * in2[3];
518	}
519
520	/*-
521	* Reduce seven 128-bit coefficients to four 64-bit coefficients.
522	* Requires in[i] < 2^126,
523	* ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
524	static void felem_reduce(felem out, const widefelem in)
525	{
526	static const widelimb two127p15 = (((widelimb) 1) << 127) +
527	(((widelimb) 1) << 15);
528	static const widelimb two127m71 = (((widelimb) 1) << 127) -
529	(((widelimb) 1) << 71);
530	static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
531	(((widelimb) 1) << 71) - (((widelimb) 1) << 55);
532	widelimb output[5];
533
534	/* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
535	output[0] = in[0] + two127p15;
536	output[1] = in[1] + two127m71m55;
537	output[2] = in[2] + two127m71;
538	output[3] = in[3];
539	output[4] = in[4];
540
541	/* Eliminate in[4], in[5], in[6] */
542	output[4] += in[6] >> 16;
543	output[3] += (in[6] & 0xffff) << 40;
544	output[2] -= in[6];
545
546	output[3] += in[5] >> 16;
547	output[2] += (in[5] & 0xffff) << 40;
548	output[1] -= in[5];
549
550	output[2] += output[4] >> 16;
551	output[1] += (output[4] & 0xffff) << 40;
552	output[0] -= output[4];
553
554	/* Carry 2 -> 3 -> 4 */
555	output[3] += output[2] >> 56;
556	output[2] &= 0x00ffffffffffffff;
557
558	output[4] = output[3] >> 56;
559	output[3] &= 0x00ffffffffffffff;
560
561	/* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
562
563	/* Eliminate output[4] */
564	output[2] += output[4] >> 16;
565	/* output[2] < 2^56 + 2^56 = 2^57 */
566	output[1] += (output[4] & 0xffff) << 40;
567	output[0] -= output[4];
568
569	/* Carry 0 -> 1 -> 2 -> 3 */
570	output[1] += output[0] >> 56;
571	out[0] = output[0] & 0x00ffffffffffffff;
572
573	output[2] += output[1] >> 56;
574	/* output[2] < 2^57 + 2^72 */
575	out[1] = output[1] & 0x00ffffffffffffff;
576	output[3] += output[2] >> 56;
577	/* output[3] <= 2^56 + 2^16 */
578	out[2] = output[2] & 0x00ffffffffffffff;
579
580	/*-
581	* out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
582	* out[3] <= 2^56 + 2^16 (due to final carry),
583	* so out < 2*p
584	*/
585	out[3] = output[3];
586	}
587
588	static void felem_square_reduce(felem out, const felem in)
589	{
590	widefelem tmp;
591	felem_square(tmp, in);
592	felem_reduce(out, tmp);
593	}
594
595	static void felem_mul_reduce(felem out, const felem in1, const felem in2)
596	{
597	widefelem tmp;
598	felem_mul(tmp, in1, in2);
599	felem_reduce(out, tmp);
600	}
601
602	/*
603	* Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
604	* call felem_reduce first)
605	*/
606	static void felem_contract(felem out, const felem in)
607	{
608	static const int64_t two56 = ((limb) 1) << 56;
609	/* 0 <= in < 2p, p = 2^224 - 2^96 + 1 /
610	/* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
611	int64_t tmp[4], a;
612	tmp[0] = in[0];
613	tmp[1] = in[1];
614	tmp[2] = in[2];
615	tmp[3] = in[3];
616	/* Case 1: a = 1 iff in >= 2^224 */
617	a = (in[3] >> 56);
618	tmp[0] -= a;
619	tmp[1] += a << 40;
620	tmp[3] &= 0x00ffffffffffffff;
621	/*
622	* Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
623	* and the lower part is non-zero
624	*/
625	a = ((in[3] & in[2] & (in[1] \| 0x000000ffffffffff)) + 1) \|
626	(((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
627	a &= 0x00ffffffffffffff;
628	/* turn a into an all-one mask (if a = 0) or an all-zero mask */
629	a = (a - 1) >> 63;
630	/* subtract 2^224 - 2^96 + 1 if a is all-one */
631	tmp[3] &= a ^ 0xffffffffffffffff;
632	tmp[2] &= a ^ 0xffffffffffffffff;
633	tmp[1] &= (a ^ 0xffffffffffffffff) \| 0x000000ffffffffff;
634	tmp[0] -= 1 & a;
635
636	/*
637	* eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
638	* non-zero, so we only need one step
639	*/
640	a = tmp[0] >> 63;
641	tmp[0] += two56 & a;
642	tmp[1] -= 1 & a;
643
644	/* carry 1 -> 2 -> 3 */
645	tmp[2] += tmp[1] >> 56;
646	tmp[1] &= 0x00ffffffffffffff;
647
648	tmp[3] += tmp[2] >> 56;
649	tmp[2] &= 0x00ffffffffffffff;
650
651	/* Now 0 <= out < p */
652	out[0] = tmp[0];
653	out[1] = tmp[1];
654	out[2] = tmp[2];
655	out[3] = tmp[3];
656	}
657
658	/*
659	* Get negative value: out = -in
660	* Requires in[i] < 2^63,
661	* ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
662	*/
663	static void felem_neg(felem out, const felem in)
664	{
665	widefelem tmp = {0};
666	felem_diff_128_64(tmp, in);
667	felem_reduce(out, tmp);
668	}
669
670	/*
671	* Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
672	* elements are reduced to in < 2^225, so we only need to check three cases:
673	* 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
674	*/
675	static limb felem_is_zero(const felem in)
676	{
677	limb zero, two224m96p1, two225m97p2;
678
679	zero = in[0] \| in[1] \| in[2] \| in[3];
680	zero = (((int64_t) (zero) - 1) >> 63) & 1;
681	two224m96p1 = (in[0] ^ 1) \| (in[1] ^ 0x00ffff0000000000)
682	\| (in[2] ^ 0x00ffffffffffffff) \| (in[3] ^ 0x00ffffffffffffff);
683	two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
684	two225m97p2 = (in[0] ^ 2) \| (in[1] ^ 0x00fffe0000000000)
685	\| (in[2] ^ 0x00ffffffffffffff) \| (in[3] ^ 0x01ffffffffffffff);
686	two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
687	return (zero \| two224m96p1 \| two225m97p2);
688	}
689
690	static int felem_is_zero_int(const void *in)
691	{
692	return (int)(felem_is_zero(in) & ((limb) 1));
693	}
694
695	/* Invert a field element */
696	/* Computation chain copied from djb's code */
697	static void felem_inv(felem out, const felem in)
698	{
699	felem ftmp, ftmp2, ftmp3, ftmp4;
700	widefelem tmp;
701	unsigned i;
702
703	felem_square(tmp, in);
704	felem_reduce(ftmp, tmp); /* 2 */
705	felem_mul(tmp, in, ftmp);
706	felem_reduce(ftmp, tmp); /* 2^2 - 1 */
707	felem_square(tmp, ftmp);
708	felem_reduce(ftmp, tmp); /* 2^3 - 2 */
709	felem_mul(tmp, in, ftmp);
710	felem_reduce(ftmp, tmp); /* 2^3 - 1 */
711	felem_square(tmp, ftmp);
712	felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
713	felem_square(tmp, ftmp2);
714	felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
715	felem_square(tmp, ftmp2);
716	felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
717	felem_mul(tmp, ftmp2, ftmp);
718	felem_reduce(ftmp, tmp); /* 2^6 - 1 */
719	felem_square(tmp, ftmp);
720	felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
721	for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
722	felem_square(tmp, ftmp2);
723	felem_reduce(ftmp2, tmp);
724	}
725	felem_mul(tmp, ftmp2, ftmp);
726	felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
727	felem_square(tmp, ftmp2);
728	felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
729	for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
730	felem_square(tmp, ftmp3);
731	felem_reduce(ftmp3, tmp);
732	}
733	felem_mul(tmp, ftmp3, ftmp2);
734	felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
735	felem_square(tmp, ftmp2);
736	felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
737	for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
738	felem_square(tmp, ftmp3);
739	felem_reduce(ftmp3, tmp);
740	}
741	felem_mul(tmp, ftmp3, ftmp2);
742	felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
743	felem_square(tmp, ftmp3);
744	felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
745	for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
746	felem_square(tmp, ftmp4);
747	felem_reduce(ftmp4, tmp);
748	}
749	felem_mul(tmp, ftmp3, ftmp4);
750	felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
751	felem_square(tmp, ftmp3);
752	felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
753	for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
754	felem_square(tmp, ftmp4);
755	felem_reduce(ftmp4, tmp);
756	}
757	felem_mul(tmp, ftmp2, ftmp4);
758	felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
759	for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
760	felem_square(tmp, ftmp2);
761	felem_reduce(ftmp2, tmp);
762	}
763	felem_mul(tmp, ftmp2, ftmp);
764	felem_reduce(ftmp, tmp); /* 2^126 - 1 */
765	felem_square(tmp, ftmp);
766	felem_reduce(ftmp, tmp); /* 2^127 - 2 */
767	felem_mul(tmp, ftmp, in);
768	felem_reduce(ftmp, tmp); /* 2^127 - 1 */
769	for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
770	felem_square(tmp, ftmp);
771	felem_reduce(ftmp, tmp);
772	}
773	felem_mul(tmp, ftmp, ftmp3);
774	felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
775	}
776
777	/*
778	* Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
779	* out to itself.
780	*/
781	static void copy_conditional(felem out, const felem in, limb icopy)
782	{
783	unsigned i;
784	/*
785	* icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
786	*/
787	const limb copy = -icopy;
788	for (i = 0; i < 4; ++i) {
789	const limb tmp = copy & (in[i] ^ out[i]);
790	out[i] ^= tmp;
791	}
792	}
793
794	/******************************************************************************/
795	/*-
796	* ELLIPTIC CURVE POINT OPERATIONS
797	*
798	* Points are represented in Jacobian projective coordinates:
799	* (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
800	* or to the point at infinity if Z == 0.
801	*
802	*/
803
804	/*-
805	* Double an elliptic curve point:
806	* (X', Y', Z') = 2 * (X, Y, Z), where
807	* X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
808	* Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
809	* Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
810	* Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
811	* while x_out == y_in is not (maybe this works, but it's not tested).
812	*/
813	static void
814	point_double(felem x_out, felem y_out, felem z_out,
815	const felem x_in, const felem y_in, const felem z_in)
816	{
817	widefelem tmp, tmp2;
818	felem delta, gamma, beta, alpha, ftmp, ftmp2;
819
820	felem_assign(ftmp, x_in);
821	felem_assign(ftmp2, x_in);
822
823	/* delta = z^2 */
824	felem_square(tmp, z_in);
825	felem_reduce(delta, tmp);
826
827	/* gamma = y^2 */
828	felem_square(tmp, y_in);
829	felem_reduce(gamma, tmp);
830
831	/* beta = xgamma /
832	felem_mul(tmp, x_in, gamma);
833	felem_reduce(beta, tmp);
834
835	/* alpha = 3(x-delta)(x+delta) */
836	felem_diff(ftmp, delta);
837	/* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
838	felem_sum(ftmp2, delta);
839	/* ftmp2[i] < 2^57 + 2^57 = 2^58 */
840	felem_scalar(ftmp2, 3);
841	/* ftmp2[i] < 3 * 2^58 < 2^60 */
842	felem_mul(tmp, ftmp, ftmp2);
843	/* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
844	felem_reduce(alpha, tmp);
845
846	/* x' = alpha^2 - 8beta /
847	felem_square(tmp, alpha);
848	/* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
849	felem_assign(ftmp, beta);
850	felem_scalar(ftmp, 8);
851	/* ftmp[i] < 8 * 2^57 = 2^60 */
852	felem_diff_128_64(tmp, ftmp);
853	/* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
854	felem_reduce(x_out, tmp);
855
856	/* z' = (y + z)^2 - gamma - delta */
857	felem_sum(delta, gamma);
858	/* delta[i] < 2^57 + 2^57 = 2^58 */
859	felem_assign(ftmp, y_in);
860	felem_sum(ftmp, z_in);
861	/* ftmp[i] < 2^57 + 2^57 = 2^58 */
862	felem_square(tmp, ftmp);
863	/* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
864	felem_diff_128_64(tmp, delta);
865	/* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
866	felem_reduce(z_out, tmp);
867
868	/* y' = alpha(4beta - x') - 8gamma^2 /
869	felem_scalar(beta, 4);
870	/* beta[i] < 4 * 2^57 = 2^59 */
871	felem_diff(beta, x_out);
872	/* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
873	felem_mul(tmp, alpha, beta);
874	/* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
875	felem_square(tmp2, gamma);
876	/* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
877	widefelem_scalar(tmp2, 8);
878	/* tmp2[i] < 8 * 2^116 = 2^119 */
879	widefelem_diff(tmp, tmp2);
880	/* tmp[i] < 2^119 + 2^120 < 2^121 */
881	felem_reduce(y_out, tmp);
882	}
883
884	/*-
885	* Add two elliptic curve points:
886	* (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
887	* X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
888	* 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
889	* Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
890	* Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
891	* Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
892	*
893	* This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
894	*/
895
896	/*
897	* This function is not entirely constant-time: it includes a branch for
898	* checking whether the two input points are equal, (while not equal to the
899	* point at infinity). This case never happens during single point
900	* multiplication, so there is no timing leak for ECDH or ECDSA signing.
901	*/
902	static void point_add(felem x3, felem y3, felem z3,
903	const felem x1, const felem y1, const felem z1,
904	const int mixed, const felem x2, const felem y2,
905	const felem z2)
906	{
907	felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
908	widefelem tmp, tmp2;
909	limb z1_is_zero, z2_is_zero, x_equal, y_equal;
910	limb points_equal;
911
912	if (!mixed) {
913	/* ftmp2 = z2^2 */
914	felem_square(tmp, z2);
915	felem_reduce(ftmp2, tmp);
916
917	/* ftmp4 = z2^3 */
918	felem_mul(tmp, ftmp2, z2);
919	felem_reduce(ftmp4, tmp);
920
921	/* ftmp4 = z2^3y1 /
922	felem_mul(tmp2, ftmp4, y1);
923	felem_reduce(ftmp4, tmp2);
924
925	/* ftmp2 = z2^2x1 /
926	felem_mul(tmp2, ftmp2, x1);
927	felem_reduce(ftmp2, tmp2);
928	} else {
929	/*
930	* We'll assume z2 = 1 (special case z2 = 0 is handled later)
931	*/
932
933	/* ftmp4 = z2^3y1 /
934	felem_assign(ftmp4, y1);
935
936	/* ftmp2 = z2^2x1 /
937	felem_assign(ftmp2, x1);
938	}
939
940	/* ftmp = z1^2 */
941	felem_square(tmp, z1);
942	felem_reduce(ftmp, tmp);
943
944	/* ftmp3 = z1^3 */
945	felem_mul(tmp, ftmp, z1);
946	felem_reduce(ftmp3, tmp);
947
948	/* tmp = z1^3y2 /
949	felem_mul(tmp, ftmp3, y2);
950	/* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
951
952	/* ftmp3 = z1^3y2 - z2^3y1 */
953	felem_diff_128_64(tmp, ftmp4);
954	/* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
955	felem_reduce(ftmp3, tmp);
956
957	/* tmp = z1^2x2 /
958	felem_mul(tmp, ftmp, x2);
959	/* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
960
961	/* ftmp = z1^2x2 - z2^2x1 */
962	felem_diff_128_64(tmp, ftmp2);
963	/* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
964	felem_reduce(ftmp, tmp);
965
966	/*
967	* The formulae are incorrect if the points are equal, in affine coordinates
968	* (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
969	* happens.
970	*
971	* We use bitwise operations to avoid potential side-channels introduced by
972	* the short-circuiting behaviour of boolean operators.
973	*/
974	x_equal = felem_is_zero(ftmp);
975	y_equal = felem_is_zero(ftmp3);
976	/*
977	* The special case of either point being the point at infinity (z1 and/or
978	* z2 are zero), is handled separately later on in this function, so we
979	* avoid jumping to point_double here in those special cases.
980	*/
981	z1_is_zero = felem_is_zero(z1);
982	z2_is_zero = felem_is_zero(z2);
983
984	/*
985	* Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
986	* specific implementation `felem_is_zero()` returns truth as `0x1`
987	* (rather than `0xff..ff`).
988	*
989	* This implies that `~true` in this implementation becomes
990	* `0xff..fe` (rather than `0x0`): for this reason, to be used in
991	* the if expression, we mask out only the last bit in the next
992	* line.
993	*/
994	points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
995
996	if (points_equal) {
997	/*
998	* This is obviously not constant-time but, as mentioned before, this
999	* case never happens during single point multiplication, so there is no
1000	* timing leak for ECDH or ECDSA signing.
1001	*/
1002	point_double(x3, y3, z3, x1, y1, z1);
1003	return;
1004	}
1005
1006	/* ftmp5 = z1z2 /
1007	if (!mixed) {
1008	felem_mul(tmp, z1, z2);
1009	felem_reduce(ftmp5, tmp);
1010	} else {
1011	/* special case z2 = 0 is handled later */
1012	felem_assign(ftmp5, z1);
1013	}
1014
1015	/* z_out = (z1^2x2 - z2^2x1)(z1z2) */
1016	felem_mul(tmp, ftmp, ftmp5);
1017	felem_reduce(z_out, tmp);
1018
1019	/* ftmp = (z1^2x2 - z2^2x1)^2 */
1020	felem_assign(ftmp5, ftmp);
1021	felem_square(tmp, ftmp);
1022	felem_reduce(ftmp, tmp);
1023
1024	/* ftmp5 = (z1^2x2 - z2^2x1)^3 */
1025	felem_mul(tmp, ftmp, ftmp5);
1026	felem_reduce(ftmp5, tmp);
1027
1028	/* ftmp2 = z2^2x1(z1^2x2 - z2^2x1)^2 */
1029	felem_mul(tmp, ftmp2, ftmp);
1030	felem_reduce(ftmp2, tmp);
1031
1032	/* tmp = z2^3y1(z1^2x2 - z2^2x1)^3 */
1033	felem_mul(tmp, ftmp4, ftmp5);
1034	/* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1035
1036	/* tmp2 = (z1^3y2 - z2^3y1)^2 */
1037	felem_square(tmp2, ftmp3);
1038	/* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1039
1040	/* tmp2 = (z1^3y2 - z2^3y1)^2 - (z1^2x2 - z2^2x1)^3 */
1041	felem_diff_128_64(tmp2, ftmp5);
1042	/* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1043
1044	/* ftmp5 = 2z2^2x1(z1^2x2 - z2^2x1)^2 /
1045	felem_assign(ftmp5, ftmp2);
1046	felem_scalar(ftmp5, 2);
1047	/* ftmp5[i] < 2 * 2^57 = 2^58 */
1048
1049	/*-
1050	* x_out = (z1^3y2 - z2^3y1)^2 - (z1^2x2 - z2^2x1)^3 -
1051	* 2z2^2x1(z1^2x2 - z2^2*x1)^2
1052	*/
1053	felem_diff_128_64(tmp2, ftmp5);
1054	/* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1055	felem_reduce(x_out, tmp2);
1056
1057	/* ftmp2 = z2^2x1(z1^2x2 - z2^2x1)^2 - x_out */
1058	felem_diff(ftmp2, x_out);
1059	/* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1060
1061	/*
1062	* tmp2 = (z1^3y2 - z2^3y1)(z2^2x1(z1^2x2 - z2^2*x1)^2 - x_out)
1063	*/
1064	felem_mul(tmp2, ftmp3, ftmp2);
1065	/* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1066
1067	/*-
1068	* y_out = (z1^3y2 - z2^3y1)(z2^2x1(z1^2x2 - z2^2*x1)^2 - x_out) -
1069	* z2^3y1(z1^2x2 - z2^2x1)^3
1070	*/
1071	widefelem_diff(tmp2, tmp);
1072	/* tmp2[i] < 2^118 + 2^120 < 2^121 */
1073	felem_reduce(y_out, tmp2);
1074
1075	/*
1076	* the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1077	* the point at infinity, so we need to check for this separately
1078	*/
1079
1080	/*
1081	* if point 1 is at infinity, copy point 2 to output, and vice versa
1082	*/
1083	copy_conditional(x_out, x2, z1_is_zero);
1084	copy_conditional(x_out, x1, z2_is_zero);
1085	copy_conditional(y_out, y2, z1_is_zero);
1086	copy_conditional(y_out, y1, z2_is_zero);
1087	copy_conditional(z_out, z2, z1_is_zero);
1088	copy_conditional(z_out, z1, z2_is_zero);
1089	felem_assign(x3, x_out);
1090	felem_assign(y3, y_out);
1091	felem_assign(z3, z_out);
1092	}
1093
1094	/*
1095	* select_point selects the \|idx\|th point from a precomputation table and
1096	* copies it to out.
1097	* The pre_comp array argument should be size of \|size\| argument
1098	*/
1099	static void select_point(const u64 idx, unsigned int size,
1100	const felem pre_comp[][3], felem out[3])
1101	{
1102	unsigned i, j;
1103	limb *outlimbs = &out[0][0];
1104
1105	memset(out, 0, sizeof(out) 3);
1106	for (i = 0; i < size; i++) {
1107	const limb *inlimbs = &pre_comp[i][0][0];
1108	u64 mask = i ^ idx;
1109	mask \|= mask >> 4;
1110	mask \|= mask >> 2;
1111	mask \|= mask >> 1;
1112	mask &= 1;
1113	mask--;
1114	for (j = 0; j < 4 * 3; j++)
1115	outlimbs[j] \|= inlimbs[j] & mask;
1116	}
1117	}
1118
1119	/* get_bit returns the \|i\|th bit in \|in\| */
1120	static char get_bit(const felem_bytearray in, unsigned i)
1121	{
1122	if (i >= 224)
1123	return 0;
1124	return (in[i >> 3] >> (i & 7)) & 1;
1125	}
1126
1127	/*
1128	* Interleaved point multiplication using precomputed point multiples: The
1129	* small point multiples 0P, 1P, ..., 16*P are in pre_comp[], the scalars
1130	* in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1131	* generator, using certain (large) precomputed multiples in g_pre_comp.
1132	* Output point (X, Y, Z) is stored in x_out, y_out, z_out
1133	*/
1134	static void batch_mul(felem x_out, felem y_out, felem z_out,
1135	const felem_bytearray scalars[],
1136	const unsigned num_points, const u8 *g_scalar,
1137	const int mixed, const felem pre_comp[][17][3],
1138	const felem g_pre_comp[2][16][3])
1139	{
1140	int i, skip;
1141	unsigned num;
1142	unsigned gen_mul = (g_scalar != NULL);
1143	felem nq[3], tmp[4];
1144	u64 bits;
1145	u8 sign, digit;
1146
1147	/* set nq to the point at infinity */
1148	memset(nq, 0, sizeof(nq));
1149
1150	/*
1151	* Loop over all scalars msb-to-lsb, interleaving additions of multiples
1152	* of the generator (two in each of the last 28 rounds) and additions of
1153	* other points multiples (every 5th round).
1154	*/
1155	skip = 1; /* save two point operations in the first
1156	* round */
1157	for (i = (num_points ? 220 : 27); i >= 0; --i) {
1158	/* double */
1159	if (!skip)
1160	point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1161
1162	/* add multiples of the generator */
1163	if (gen_mul && (i <= 27)) {
1164	/* first, look 28 bits upwards */
1165	bits = get_bit(g_scalar, i + 196) << 3;
1166	bits \|= get_bit(g_scalar, i + 140) << 2;
1167	bits \|= get_bit(g_scalar, i + 84) << 1;
1168	bits \|= get_bit(g_scalar, i + 28);
1169	/* select the point to add, in constant time */
1170	select_point(bits, 16, g_pre_comp[1], tmp);
1171
1172	if (!skip) {
1173	/* value 1 below is argument for "mixed" */
1174	point_add(nq[0], nq[1], nq[2],
1175	nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1176	} else {
1177	memcpy(nq, tmp, 3 * sizeof(felem));
1178	skip = 0;
1179	}
1180
1181	/* second, look at the current position */
1182	bits = get_bit(g_scalar, i + 168) << 3;
1183	bits \|= get_bit(g_scalar, i + 112) << 2;
1184	bits \|= get_bit(g_scalar, i + 56) << 1;
1185	bits \|= get_bit(g_scalar, i);
1186	/* select the point to add, in constant time */
1187	select_point(bits, 16, g_pre_comp[0], tmp);
1188	point_add(nq[0], nq[1], nq[2],
1189	nq[0], nq[1], nq[2],
1190	1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1191	}
1192
1193	/* do other additions every 5 doublings */
1194	if (num_points && (i % 5 == 0)) {
1195	/* loop over all scalars */
1196	for (num = 0; num < num_points; ++num) {
1197	bits = get_bit(scalars[num], i + 4) << 5;
1198	bits \|= get_bit(scalars[num], i + 3) << 4;
1199	bits \|= get_bit(scalars[num], i + 2) << 3;
1200	bits \|= get_bit(scalars[num], i + 1) << 2;
1201	bits \|= get_bit(scalars[num], i) << 1;
1202	bits \|= get_bit(scalars[num], i - 1);
1203	ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1204
1205	/* select the point to add or subtract */
1206	select_point(digit, 17, pre_comp[num], tmp);
1207	felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1208	* point */
1209	copy_conditional(tmp[1], tmp[3], sign);
1210
1211	if (!skip) {
1212	point_add(nq[0], nq[1], nq[2],
1213	nq[0], nq[1], nq[2],
1214	mixed, tmp[0], tmp[1], tmp[2]);
1215	} else {
1216	memcpy(nq, tmp, 3 * sizeof(felem));
1217	skip = 0;
1218	}
1219	}
1220	}
1221	}
1222	felem_assign(x_out, nq[0]);
1223	felem_assign(y_out, nq[1]);
1224	felem_assign(z_out, nq[2]);
1225	}
1226
1227	/******************************************************************************/
1228	/*
1229	* FUNCTIONS TO MANAGE PRECOMPUTATION
1230	*/
1231
1232	static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1233	{
1234	NISTP224_PRE_COMP ret = OPENSSL_zalloc(sizeof(ret));
1235
1236	if (!ret) {
1237	ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1238	return ret;
1239	}
1240
1241	ret->references = 1;
1242
1243	ret->lock = CRYPTO_THREAD_lock_new();
1244	if (ret->lock == NULL) {
1245	ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1246	OPENSSL_free(ret);
1247	return NULL;
1248	}
1249	return ret;
1250	}
1251
1252	NISTP224_PRE_COMP EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP p)
1253	{
1254	int i;
1255	if (p != NULL)
1256	CRYPTO_UP_REF(&p->references, &i, p->lock);
1257	return p;
1258	}
1259
1260	void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1261	{
1262	int i;
1263
1264	if (p == NULL)
1265	return;
1266
1267	CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1268	REF_PRINT_COUNT("EC_nistp224", x);
1269	if (i > 0)
1270	return;
1271	REF_ASSERT_ISNT(i < 0);
1272
1273	CRYPTO_THREAD_lock_free(p->lock);
1274	OPENSSL_free(p);
1275	}
1276
1277	/******************************************************************************/
1278	/*
1279	* OPENSSL EC_METHOD FUNCTIONS
1280	*/
1281
1282	int ec_GFp_nistp224_group_init(EC_GROUP *group)
1283	{
1284	int ret;
1285	ret = ec_GFp_simple_group_init(group);
1286	group->a_is_minus3 = 1;
1287	return ret;
1288	}
1289
1290	int ec_GFp_nistp224_group_set_curve(EC_GROUP group, const BIGNUM p,
1291	const BIGNUM a, const BIGNUM b,
1292	BN_CTX *ctx)
1293	{
1294	int ret = 0;
1295	BN_CTX *new_ctx = NULL;
1296	BIGNUM curve_p, curve_a, *curve_b;
1297
1298	if (ctx == NULL)
1299	if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1300	return 0;
1301	BN_CTX_start(ctx);
1302	curve_p = BN_CTX_get(ctx);
1303	curve_a = BN_CTX_get(ctx);
1304	curve_b = BN_CTX_get(ctx);
1305	if (curve_b == NULL)
1306	goto err;
1307	BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1308	BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1309	BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1310	if ((BN_cmp(curve_p, p)) \|\| (BN_cmp(curve_a, a)) \|\| (BN_cmp(curve_b, b))) {
1311	ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1312	EC_R_WRONG_CURVE_PARAMETERS);
1313	goto err;
1314	}
1315	group->field_mod_func = BN_nist_mod_224;
1316	ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1317	err:
1318	BN_CTX_end(ctx);
1319	BN_CTX_free(new_ctx);
1320	return ret;
1321	}
1322
1323	/*
1324	* Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1325	* (X/Z^2, Y/Z^3)
1326	*/
1327	int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1328	const EC_POINT *point,
1329	BIGNUM x, BIGNUM y,
1330	BN_CTX *ctx)
1331	{
1332	felem z1, z2, x_in, y_in, x_out, y_out;
1333	widefelem tmp;
1334
1335	if (EC_POINT_is_at_infinity(group, point)) {
1336	ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1337	EC_R_POINT_AT_INFINITY);
1338	return 0;
1339	}
1340	if ((!BN_to_felem(x_in, point->X)) \|\| (!BN_to_felem(y_in, point->Y)) \|\|
1341	(!BN_to_felem(z1, point->Z)))
1342	return 0;
1343	felem_inv(z2, z1);
1344	felem_square(tmp, z2);
1345	felem_reduce(z1, tmp);
1346	felem_mul(tmp, x_in, z1);
1347	felem_reduce(x_in, tmp);
1348	felem_contract(x_out, x_in);
1349	if (x != NULL) {
1350	if (!felem_to_BN(x, x_out)) {
1351	ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1352	ERR_R_BN_LIB);
1353	return 0;
1354	}
1355	}
1356	felem_mul(tmp, z1, z2);
1357	felem_reduce(z1, tmp);
1358	felem_mul(tmp, y_in, z1);
1359	felem_reduce(y_in, tmp);
1360	felem_contract(y_out, y_in);
1361	if (y != NULL) {
1362	if (!felem_to_BN(y, y_out)) {
1363	ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1364	ERR_R_BN_LIB);
1365	return 0;
1366	}
1367	}
1368	return 1;
1369	}
1370
1371	static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1372	felem tmp_felems[ /* num+1 */ ])
1373	{
1374	/*
1375	* Runs in constant time, unless an input is the point at infinity (which
1376	* normally shouldn't happen).
1377	*/
1378	ec_GFp_nistp_points_make_affine_internal(num,
1379	points,
1380	sizeof(felem),
1381	tmp_felems,
1382	(void ()(void ))felem_one,
1383	felem_is_zero_int,
1384	(void ()(void , const void *))
1385	felem_assign,
1386	(void ()(void , const void *))
1387	felem_square_reduce, (void (*)
1388	(void *,
1389	const void
1390	*,
1391	const void
1392	*))
1393	felem_mul_reduce,
1394	(void ()(void , const void *))
1395	felem_inv,
1396	(void ()(void , const void *))
1397	felem_contract);
1398	}
1399
1400	/*
1401	* Computes scalargenerator + \sum scalars[i]points[i], ignoring NULL
1402	* values Result is stored in r (r can equal one of the inputs).
1403	*/
1404	int ec_GFp_nistp224_points_mul(const EC_GROUP group, EC_POINT r,
1405	const BIGNUM *scalar, size_t num,
1406	const EC_POINT *points[],
1407	const BIGNUM scalars[], BN_CTX ctx)
1408	{
1409	int ret = 0;
1410	int j;
1411	unsigned i;
1412	int mixed = 0;
1413	BIGNUM x, y, z, tmp_scalar;
1414	felem_bytearray g_secret;
1415	felem_bytearray *secrets = NULL;
1416	felem (*pre_comp)[17][3] = NULL;
1417	felem *tmp_felems = NULL;
1418	int num_bytes;
1419	int have_pre_comp = 0;
1420	size_t num_points = num;
1421	felem x_in, y_in, z_in, x_out, y_out, z_out;
1422	NISTP224_PRE_COMP *pre = NULL;
1423	const felem(*g_pre_comp)[16][3] = NULL;
1424	EC_POINT *generator = NULL;
1425	const EC_POINT *p = NULL;
1426	const BIGNUM *p_scalar = NULL;
1427
1428	BN_CTX_start(ctx);
1429	x = BN_CTX_get(ctx);
1430	y = BN_CTX_get(ctx);
1431	z = BN_CTX_get(ctx);
1432	tmp_scalar = BN_CTX_get(ctx);
1433	if (tmp_scalar == NULL)
1434	goto err;
1435
1436	if (scalar != NULL) {
1437	pre = group->pre_comp.nistp224;
1438	if (pre)
1439	/* we have precomputation, try to use it */
1440	g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1441	else
1442	/* try to use the standard precomputation */
1443	g_pre_comp = &gmul[0];
1444	generator = EC_POINT_new(group);
1445	if (generator == NULL)
1446	goto err;
1447	/* get the generator from precomputation */
1448	if (!felem_to_BN(x, g_pre_comp[0][1][0]) \|\|
1449	!felem_to_BN(y, g_pre_comp[0][1][1]) \|\|
1450	!felem_to_BN(z, g_pre_comp[0][1][2])) {
1451	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1452	goto err;
1453	}
1454	if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1455	generator, x, y, z,
1456	ctx))
1457	goto err;
1458	if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1459	/* precomputation matches generator */
1460	have_pre_comp = 1;
1461	else
1462	/*
1463	* we don't have valid precomputation: treat the generator as a
1464	* random point
1465	*/
1466	num_points = num_points + 1;
1467	}
1468
1469	if (num_points > 0) {
1470	if (num_points >= 3) {
1471	/*
1472	* unless we precompute multiples for just one or two points,
1473	* converting those into affine form is time well spent
1474	*/
1475	mixed = 1;
1476	}
1477	secrets = OPENSSL_zalloc(sizeof(secrets) num_points);
1478	pre_comp = OPENSSL_zalloc(sizeof(pre_comp) num_points);
1479	if (mixed)
1480	tmp_felems =
1481	OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1482	if ((secrets == NULL) \|\| (pre_comp == NULL)
1483	\|\| (mixed && (tmp_felems == NULL))) {
1484	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1485	goto err;
1486	}
1487
1488	/*
1489	* we treat NULL scalars as 0, and NULL points as points at infinity,
1490	* i.e., they contribute nothing to the linear combination
1491	*/
1492	for (i = 0; i < num_points; ++i) {
1493	if (i == num) {
1494	/* the generator */
1495	p = EC_GROUP_get0_generator(group);
1496	p_scalar = scalar;
1497	} else {
1498	/* the i^th point */
1499	p = points[i];
1500	p_scalar = scalars[i];
1501	}
1502	if ((p_scalar != NULL) && (p != NULL)) {
1503	/* reduce scalar to 0 <= scalar < 2^224 */
1504	if ((BN_num_bits(p_scalar) > 224)
1505	\|\| (BN_is_negative(p_scalar))) {
1506	/*
1507	* this is an unusual input, and we don't guarantee
1508	* constant-timeness
1509	*/
1510	if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1511	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1512	goto err;
1513	}
1514	num_bytes = BN_bn2lebinpad(tmp_scalar,
1515	secrets[i], sizeof(secrets[i]));
1516	} else {
1517	num_bytes = BN_bn2lebinpad(p_scalar,
1518	secrets[i], sizeof(secrets[i]));
1519	}
1520	if (num_bytes < 0) {
1521	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1522	goto err;
1523	}
1524	/* precompute multiples */
1525	if ((!BN_to_felem(x_out, p->X)) \|\|
1526	(!BN_to_felem(y_out, p->Y)) \|\|
1527	(!BN_to_felem(z_out, p->Z)))
1528	goto err;
1529	felem_assign(pre_comp[i][1][0], x_out);
1530	felem_assign(pre_comp[i][1][1], y_out);
1531	felem_assign(pre_comp[i][1][2], z_out);
1532	for (j = 2; j <= 16; ++j) {
1533	if (j & 1) {
1534	point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1535	pre_comp[i][j][2], pre_comp[i][1][0],
1536	pre_comp[i][1][1], pre_comp[i][1][2], 0,
1537	pre_comp[i][j - 1][0],
1538	pre_comp[i][j - 1][1],
1539	pre_comp[i][j - 1][2]);
1540	} else {
1541	point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1542	pre_comp[i][j][2], pre_comp[i][j / 2][0],
1543	pre_comp[i][j / 2][1],
1544	pre_comp[i][j / 2][2]);
1545	}
1546	}
1547	}
1548	}
1549	if (mixed)
1550	make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1551	}
1552
1553	/* the scalar for the generator */
1554	if ((scalar != NULL) && (have_pre_comp)) {
1555	memset(g_secret, 0, sizeof(g_secret));
1556	/* reduce scalar to 0 <= scalar < 2^224 */
1557	if ((BN_num_bits(scalar) > 224) \|\| (BN_is_negative(scalar))) {
1558	/*
1559	* this is an unusual input, and we don't guarantee
1560	* constant-timeness
1561	*/
1562	if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1563	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1564	goto err;
1565	}
1566	num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1567	} else {
1568	num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1569	}
1570	/* do the multiplication with generator precomputation */
1571	batch_mul(x_out, y_out, z_out,
1572	(const felem_bytearray(*))secrets, num_points,
1573	g_secret,
1574	mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1575	} else {
1576	/* do the multiplication without generator precomputation */
1577	batch_mul(x_out, y_out, z_out,
1578	(const felem_bytearray(*))secrets, num_points,
1579	NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1580	}
1581	/* reduce the output to its unique minimal representation */
1582	felem_contract(x_in, x_out);
1583	felem_contract(y_in, y_out);
1584	felem_contract(z_in, z_out);
1585	if ((!felem_to_BN(x, x_in)) \|\| (!felem_to_BN(y, y_in)) \|\|
1586	(!felem_to_BN(z, z_in))) {
1587	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1588	goto err;
1589	}
1590	ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1591
1592	err:
1593	BN_CTX_end(ctx);
1594	EC_POINT_free(generator);
1595	OPENSSL_free(secrets);
1596	OPENSSL_free(pre_comp);
1597	OPENSSL_free(tmp_felems);
1598	return ret;
1599	}
1600
1601	int ec_GFp_nistp224_precompute_mult(EC_GROUP group, BN_CTX ctx)
1602	{
1603	int ret = 0;
1604	NISTP224_PRE_COMP *pre = NULL;
1605	int i, j;
1606	BN_CTX *new_ctx = NULL;
1607	BIGNUM x, y;
1608	EC_POINT *generator = NULL;
1609	felem tmp_felems[32];
1610
1611	/* throw away old precomputation */
1612	EC_pre_comp_free(group);
1613	if (ctx == NULL)
1614	if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1615	return 0;
1616	BN_CTX_start(ctx);
1617	x = BN_CTX_get(ctx);
1618	y = BN_CTX_get(ctx);
1619	if (y == NULL)
1620	goto err;
1621	/* get the generator */
1622	if (group->generator == NULL)
1623	goto err;
1624	generator = EC_POINT_new(group);
1625	if (generator == NULL)
1626	goto err;
1627	BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1628	BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1629	if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1630	goto err;
1631	if ((pre = nistp224_pre_comp_new()) == NULL)
1632	goto err;
1633	/*
1634	* if the generator is the standard one, use built-in precomputation
1635	*/
1636	if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1637	memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1638	goto done;
1639	}
1640	if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) \|\|
1641	(!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) \|\|
1642	(!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1643	goto err;
1644	/*
1645	* compute 2^56G, 2^112G, 2^168G for the first table, 2^28G, 2^84*G,
1646	* 2^140G, 2^196G for the second one
1647	*/
1648	for (i = 1; i <= 8; i <<= 1) {
1649	point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1650	pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1651	pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1652	for (j = 0; j < 27; ++j) {
1653	point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1654	pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1655	pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1656	}
1657	if (i == 8)
1658	break;
1659	point_double(pre->g_pre_comp[0][2 * i][0],
1660	pre->g_pre_comp[0][2 * i][1],
1661	pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1662	pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1663	for (j = 0; j < 27; ++j) {
1664	point_double(pre->g_pre_comp[0][2 * i][0],
1665	pre->g_pre_comp[0][2 * i][1],
1666	pre->g_pre_comp[0][2 * i][2],
1667	pre->g_pre_comp[0][2 * i][0],
1668	pre->g_pre_comp[0][2 * i][1],
1669	pre->g_pre_comp[0][2 * i][2]);
1670	}
1671	}
1672	for (i = 0; i < 2; i++) {
1673	/* g_pre_comp[i][0] is the point at infinity */
1674	memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1675	/* the remaining multiples */
1676	/* 2^56G + 2^112G resp. 2^84G + 2^140G */
1677	point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1678	pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1679	pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1680	0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1681	pre->g_pre_comp[i][2][2]);
1682	/* 2^56G + 2^168G resp. 2^84G + 2^196G */
1683	point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1684	pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1685	pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1686	0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1687	pre->g_pre_comp[i][2][2]);
1688	/* 2^112G + 2^168G resp. 2^140G + 2^196G */
1689	point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1690	pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1691	pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1692	0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1693	pre->g_pre_comp[i][4][2]);
1694	/*
1695	* 2^56G + 2^112G + 2^168G resp. 2^84G + 2^140G + 2^196G
1696	*/
1697	point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1698	pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1699	pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1700	0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1701	pre->g_pre_comp[i][2][2]);
1702	for (j = 1; j < 8; ++j) {
1703	/* odd multiples: add G resp. 2^28G /
1704	point_add(pre->g_pre_comp[i][2 * j + 1][0],
1705	pre->g_pre_comp[i][2 * j + 1][1],
1706	pre->g_pre_comp[i][2 * j + 1][2],
1707	pre->g_pre_comp[i][2 * j][0],
1708	pre->g_pre_comp[i][2 * j][1],
1709	pre->g_pre_comp[i][2 * j][2], 0,
1710	pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1711	pre->g_pre_comp[i][1][2]);
1712	}
1713	}
1714	make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1715
1716	done:
1717	SETPRECOMP(group, nistp224, pre);
1718	pre = NULL;
1719	ret = 1;
1720	err:
1721	BN_CTX_end(ctx);
1722	EC_POINT_free(generator);
1723	BN_CTX_free(new_ctx);
1724	EC_nistp224_pre_comp_free(pre);
1725	return ret;
1726	}
1727
1728	int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1729	{
1730	return HAVEPRECOMP(group, nistp224);
1731	}
1732
1733	#endif

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/openssl-1.1.1f/crypto/ec/ecp_nistp224.c@ 83531

Download in other formats: