ecp_nistp224.c@ 88461

Last change on this file since 88461 was 87984, checked in by vboxsync, 4 years ago
openssl-1.1.1j: Applied and adjusted our OpenSSL changes to 1.1.1j. bugref:9963
File size: 59.3 KB

Line
1	/*
2	* Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
3	*
4	* Licensed under the OpenSSL license (the "License"). You may not use
5	* this file except in compliance with the License. You can obtain a copy
6	* in the file LICENSE in the source distribution or at
7	* https://www.openssl.org/source/license.html
8	*/
9
10	/* Copyright 2011 Google Inc.
11	*
12	* Licensed under the Apache License, Version 2.0 (the "License");
13	*
14	* you may not use this file except in compliance with the License.
15	* You may obtain a copy of the License at
16	*
17	* http://www.apache.org/licenses/LICENSE-2.0
18	*
19	* Unless required by applicable law or agreed to in writing, software
20	* distributed under the License is distributed on an "AS IS" BASIS,
21	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22	* See the License for the specific language governing permissions and
23	* limitations under the License.
24	*/
25
26	/*
27	* A 64-bit implementation of the NIST P-224 elliptic curve point multiplication
28	*
29	* Inspired by Daniel J. Bernstein's public domain nistp224 implementation
30	* and Adam Langley's public domain 64-bit C implementation of curve25519
31	*/
32
33	#include <openssl/opensslconf.h>
34	#ifdef OPENSSL_NO_EC_NISTP_64_GCC_128
35	NON_EMPTY_TRANSLATION_UNIT
36	#else
37
38	# include <stdint.h>
39	# include <string.h>
40	# include <openssl/err.h>
41	# include "ec_local.h"
42
43	# if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
44	/* even with gcc, the typedef won't work for 32-bit platforms */
45	typedef __uint128_t uint128_t; /* nonstandard; implemented by gcc on 64-bit
46	* platforms */
47	# else
48	# error "Your compiler doesn't appear to support 128-bit integer types"
49	# endif
50
51	typedef uint8_t u8;
52	typedef uint64_t u64;
53
54	/******************************************************************************/
55	/*-
56	* INTERNAL REPRESENTATION OF FIELD ELEMENTS
57	*
58	* Field elements are represented as a_0 + 2^56a_1 + 2^112a_2 + 2^168*a_3
59	* using 64-bit coefficients called 'limbs',
60	* and sometimes (for multiplication results) as
61	* b_0 + 2^56b_1 + 2^112b_2 + 2^168b_3 + 2^224b_4 + 2^280b_5 + 2^336b_6
62	* using 128-bit coefficients called 'widelimbs'.
63	* A 4-limb representation is an 'felem';
64	* a 7-widelimb representation is a 'widefelem'.
65	* Even within felems, bits of adjacent limbs overlap, and we don't always
66	* reduce the representations: we ensure that inputs to each felem
67	* multiplication satisfy a_i < 2^60, so outputs satisfy b_i < 42^602^60,
68	* and fit into a 128-bit word without overflow. The coefficients are then
69	* again partially reduced to obtain an felem satisfying a_i < 2^57.
70	* We only reduce to the unique minimal representation at the end of the
71	* computation.
72	*/
73
74	typedef uint64_t limb;
75	typedef uint64_t limb_aX __attribute((__aligned__(1)));
76	typedef uint128_t widelimb;
77
78	typedef limb felem[4];
79	typedef widelimb widefelem[7];
80
81	/*
82	* Field element represented as a byte array. 28*8 = 224 bits is also the
83	* group order size for the elliptic curve, and we also use this type for
84	* scalars for point multiplication.
85	*/
86	typedef u8 felem_bytearray[28];
87
88	static const felem_bytearray nistp224_curve_params[5] = {
89	{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
90	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
91	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
92	{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a */
93	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
94	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE},
95	{0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, 0xF5, 0x41, /* b */
96	0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, 0xD7, 0xBF, 0xD8, 0xBA,
97	0x27, 0x0B, 0x39, 0x43, 0x23, 0x55, 0xFF, 0xB4},
98	{0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, 0x32, 0x13, /* x */
99	0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, 0x56, 0xC2, 0x11, 0x22,
100	0x34, 0x32, 0x80, 0xD6, 0x11, 0x5C, 0x1D, 0x21},
101	{0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, /* y */
102	0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x07, 0x47, 0x64,
103	0x44, 0xd5, 0x81, 0x99, 0x85, 0x00, 0x7e, 0x34}
104	};
105
106	/*-
107	* Precomputed multiples of the standard generator
108	* Points are given in coordinates (X, Y, Z) where Z normally is 1
109	* (0 for the point at infinity).
110	* For each field element, slice a_0 is word 0, etc.
111	*
112	* The table has 2 * 16 elements, starting with the following:
113	* index \| bits \| point
114	* ------+---------+------------------------------
115	* 0 \| 0 0 0 0 \| 0G
116	* 1 \| 0 0 0 1 \| 1G
117	* 2 \| 0 0 1 0 \| 2^56G
118	* 3 \| 0 0 1 1 \| (2^56 + 1)G
119	* 4 \| 0 1 0 0 \| 2^112G
120	* 5 \| 0 1 0 1 \| (2^112 + 1)G
121	* 6 \| 0 1 1 0 \| (2^112 + 2^56)G
122	* 7 \| 0 1 1 1 \| (2^112 + 2^56 + 1)G
123	* 8 \| 1 0 0 0 \| 2^168G
124	* 9 \| 1 0 0 1 \| (2^168 + 1)G
125	* 10 \| 1 0 1 0 \| (2^168 + 2^56)G
126	* 11 \| 1 0 1 1 \| (2^168 + 2^56 + 1)G
127	* 12 \| 1 1 0 0 \| (2^168 + 2^112)G
128	* 13 \| 1 1 0 1 \| (2^168 + 2^112 + 1)G
129	* 14 \| 1 1 1 0 \| (2^168 + 2^112 + 2^56)G
130	* 15 \| 1 1 1 1 \| (2^168 + 2^112 + 2^56 + 1)G
131	* followed by a copy of this with each element multiplied by 2^28.
132	*
133	* The reason for this is so that we can clock bits into four different
134	* locations when doing simple scalar multiplies against the base point,
135	* and then another four locations using the second 16 elements.
136	*/
137	static const felem gmul[2][16][3] = {
138	{{{0, 0, 0, 0},
139	{0, 0, 0, 0},
140	{0, 0, 0, 0}},
141	{{0x3280d6115c1d21, 0xc1d356c2112234, 0x7f321390b94a03, 0xb70e0cbd6bb4bf},
142	{0xd5819985007e34, 0x75a05a07476444, 0xfb4c22dfe6cd43, 0xbd376388b5f723},
143	{1, 0, 0, 0}},
144	{{0xfd9675666ebbe9, 0xbca7664d40ce5e, 0x2242df8d8a2a43, 0x1f49bbb0f99bc5},
145	{0x29e0b892dc9c43, 0xece8608436e662, 0xdc858f185310d0, 0x9812dd4eb8d321},
146	{1, 0, 0, 0}},
147	{{0x6d3e678d5d8eb8, 0x559eed1cb362f1, 0x16e9a3bbce8a3f, 0xeedcccd8c2a748},
148	{0xf19f90ed50266d, 0xabf2b4bf65f9df, 0x313865468fafec, 0x5cb379ba910a17},
149	{1, 0, 0, 0}},
150	{{0x0641966cab26e3, 0x91fb2991fab0a0, 0xefec27a4e13a0b, 0x0499aa8a5f8ebe},
151	{0x7510407766af5d, 0x84d929610d5450, 0x81d77aae82f706, 0x6916f6d4338c5b},
152	{1, 0, 0, 0}},
153	{{0xea95ac3b1f15c6, 0x086000905e82d4, 0xdd323ae4d1c8b1, 0x932b56be7685a3},
154	{0x9ef93dea25dbbf, 0x41665960f390f0, 0xfdec76dbe2a8a7, 0x523e80f019062a},
155	{1, 0, 0, 0}},
156	{{0x822fdd26732c73, 0xa01c83531b5d0f, 0x363f37347c1ba4, 0xc391b45c84725c},
157	{0xbbd5e1b2d6ad24, 0xddfbcde19dfaec, 0xc393da7e222a7f, 0x1efb7890ede244},
158	{1, 0, 0, 0}},
159	{{0x4c9e90ca217da1, 0xd11beca79159bb, 0xff8d33c2c98b7c, 0x2610b39409f849},
160	{0x44d1352ac64da0, 0xcdbb7b2c46b4fb, 0x966c079b753c89, 0xfe67e4e820b112},
161	{1, 0, 0, 0}},
162	{{0xe28cae2df5312d, 0xc71b61d16f5c6e, 0x79b7619a3e7c4c, 0x05c73240899b47},
163	{0x9f7f6382c73e3a, 0x18615165c56bda, 0x641fab2116fd56, 0x72855882b08394},
164	{1, 0, 0, 0}},
165	{{0x0469182f161c09, 0x74a98ca8d00fb5, 0xb89da93489a3e0, 0x41c98768fb0c1d},
166	{0xe5ea05fb32da81, 0x3dce9ffbca6855, 0x1cfe2d3fbf59e6, 0x0e5e03408738a7},
167	{1, 0, 0, 0}},
168	{{0xdab22b2333e87f, 0x4430137a5dd2f6, 0xe03ab9f738beb8, 0xcb0c5d0dc34f24},
169	{0x764a7df0c8fda5, 0x185ba5c3fa2044, 0x9281d688bcbe50, 0xc40331df893881},
170	{1, 0, 0, 0}},
171	{{0xb89530796f0f60, 0xade92bd26909a3, 0x1a0c83fb4884da, 0x1765bf22a5a984},
172	{0x772a9ee75db09e, 0x23bc6c67cec16f, 0x4c1edba8b14e2f, 0xe2a215d9611369},
173	{1, 0, 0, 0}},
174	{{0x571e509fb5efb3, 0xade88696410552, 0xc8ae85fada74fe, 0x6c7e4be83bbde3},
175	{0xff9f51160f4652, 0xb47ce2495a6539, 0xa2946c53b582f4, 0x286d2db3ee9a60},
176	{1, 0, 0, 0}},
177	{{0x40bbd5081a44af, 0x0995183b13926c, 0xbcefba6f47f6d0, 0x215619e9cc0057},
178	{0x8bc94d3b0df45e, 0xf11c54a3694f6f, 0x8631b93cdfe8b5, 0xe7e3f4b0982db9},
179	{1, 0, 0, 0}},
180	{{0xb17048ab3e1c7b, 0xac38f36ff8a1d8, 0x1c29819435d2c6, 0xc813132f4c07e9},
181	{0x2891425503b11f, 0x08781030579fea, 0xf5426ba5cc9674, 0x1e28ebf18562bc},
182	{1, 0, 0, 0}},
183	{{0x9f31997cc864eb, 0x06cd91d28b5e4c, 0xff17036691a973, 0xf1aef351497c58},
184	{0xdd1f2d600564ff, 0xdead073b1402db, 0x74a684435bd693, 0xeea7471f962558},
185	{1, 0, 0, 0}}},
186	{{{0, 0, 0, 0},
187	{0, 0, 0, 0},
188	{0, 0, 0, 0}},
189	{{0x9665266dddf554, 0x9613d78b60ef2d, 0xce27a34cdba417, 0xd35ab74d6afc31},
190	{0x85ccdd22deb15e, 0x2137e5783a6aab, 0xa141cffd8c93c6, 0x355a1830e90f2d},
191	{1, 0, 0, 0}},
192	{{0x1a494eadaade65, 0xd6da4da77fe53c, 0xe7992996abec86, 0x65c3553c6090e3},
193	{0xfa610b1fb09346, 0xf1c6540b8a4aaf, 0xc51a13ccd3cbab, 0x02995b1b18c28a},
194	{1, 0, 0, 0}},
195	{{0x7874568e7295ef, 0x86b419fbe38d04, 0xdc0690a7550d9a, 0xd3966a44beac33},
196	{0x2b7280ec29132f, 0xbeaa3b6a032df3, 0xdc7dd88ae41200, 0xd25e2513e3a100},
197	{1, 0, 0, 0}},
198	{{0x924857eb2efafd, 0xac2bce41223190, 0x8edaa1445553fc, 0x825800fd3562d5},
199	{0x8d79148ea96621, 0x23a01c3dd9ed8d, 0xaf8b219f9416b5, 0xd8db0cc277daea},
200	{1, 0, 0, 0}},
201	{{0x76a9c3b1a700f0, 0xe9acd29bc7e691, 0x69212d1a6b0327, 0x6322e97fe154be},
202	{0x469fc5465d62aa, 0x8d41ed18883b05, 0x1f8eae66c52b88, 0xe4fcbe9325be51},
203	{1, 0, 0, 0}},
204	{{0x825fdf583cac16, 0x020b857c7b023a, 0x683c17744b0165, 0x14ffd0a2daf2f1},
205	{0x323b36184218f9, 0x4944ec4e3b47d4, 0xc15b3080841acf, 0x0bced4b01a28bb},
206	{1, 0, 0, 0}},
207	{{0x92ac22230df5c4, 0x52f33b4063eda8, 0xcb3f19870c0c93, 0x40064f2ba65233},
208	{0xfe16f0924f8992, 0x012da25af5b517, 0x1a57bb24f723a6, 0x06f8bc76760def},
209	{1, 0, 0, 0}},
210	{{0x4a7084f7817cb9, 0xbcab0738ee9a78, 0x3ec11e11d9c326, 0xdc0fe90e0f1aae},
211	{0xcf639ea5f98390, 0x5c350aa22ffb74, 0x9afae98a4047b7, 0x956ec2d617fc45},
212	{1, 0, 0, 0}},
213	{{0x4306d648c1be6a, 0x9247cd8bc9a462, 0xf5595e377d2f2e, 0xbd1c3caff1a52e},
214	{0x045e14472409d0, 0x29f3e17078f773, 0x745a602b2d4f7d, 0x191837685cdfbb},
215	{1, 0, 0, 0}},
216	{{0x5b6ee254a8cb79, 0x4953433f5e7026, 0xe21faeb1d1def4, 0xc4c225785c09de},
217	{0x307ce7bba1e518, 0x31b125b1036db8, 0x47e91868839e8f, 0xc765866e33b9f3},
218	{1, 0, 0, 0}},
219	{{0x3bfece24f96906, 0x4794da641e5093, 0xde5df64f95db26, 0x297ecd89714b05},
220	{0x701bd3ebb2c3aa, 0x7073b4f53cb1d5, 0x13c5665658af16, 0x9895089d66fe58},
221	{1, 0, 0, 0}},
222	{{0x0fef05f78c4790, 0x2d773633b05d2e, 0x94229c3a951c94, 0xbbbd70df4911bb},
223	{0xb2c6963d2c1168, 0x105f47a72b0d73, 0x9fdf6111614080, 0x7b7e94b39e67b0},
224	{1, 0, 0, 0}},
225	{{0xad1a7d6efbe2b3, 0xf012482c0da69d, 0x6b3bdf12438345, 0x40d7558d7aa4d9},
226	{0x8a09fffb5c6d3d, 0x9a356e5d9ffd38, 0x5973f15f4f9b1c, 0xdcd5f59f63c3ea},
227	{1, 0, 0, 0}},
228	{{0xacf39f4c5ca7ab, 0x4c8071cc5fd737, 0xc64e3602cd1184, 0x0acd4644c9abba},
229	{0x6c011a36d8bf6e, 0xfecd87ba24e32a, 0x19f6f56574fad8, 0x050b204ced9405},
230	{1, 0, 0, 0}},
231	{{0xed4f1cae7d9a96, 0x5ceef7ad94c40a, 0x778e4a3bf3ef9b, 0x7405783dc3b55e},
232	{0x32477c61b6e8c6, 0xb46a97570f018b, 0x91176d0a7e95d1, 0x3df90fbc4c7d0e},
233	{1, 0, 0, 0}}}
234	};
235
236	/* Precomputation for the group generator. */
237	struct nistp224_pre_comp_st {
238	felem g_pre_comp[2][16][3];
239	CRYPTO_REF_COUNT references;
240	CRYPTO_RWLOCK *lock;
241	};
242
243	const EC_METHOD *EC_GFp_nistp224_method(void)
244	{
245	static const EC_METHOD ret = {
246	EC_FLAGS_DEFAULT_OCT,
247	NID_X9_62_prime_field,
248	ec_GFp_nistp224_group_init,
249	ec_GFp_simple_group_finish,
250	ec_GFp_simple_group_clear_finish,
251	ec_GFp_nist_group_copy,
252	ec_GFp_nistp224_group_set_curve,
253	ec_GFp_simple_group_get_curve,
254	ec_GFp_simple_group_get_degree,
255	ec_group_simple_order_bits,
256	ec_GFp_simple_group_check_discriminant,
257	ec_GFp_simple_point_init,
258	ec_GFp_simple_point_finish,
259	ec_GFp_simple_point_clear_finish,
260	ec_GFp_simple_point_copy,
261	ec_GFp_simple_point_set_to_infinity,
262	ec_GFp_simple_set_Jprojective_coordinates_GFp,
263	ec_GFp_simple_get_Jprojective_coordinates_GFp,
264	ec_GFp_simple_point_set_affine_coordinates,
265	ec_GFp_nistp224_point_get_affine_coordinates,
266	0 /* point_set_compressed_coordinates */ ,
267	0 /* point2oct */ ,
268	0 /* oct2point */ ,
269	ec_GFp_simple_add,
270	ec_GFp_simple_dbl,
271	ec_GFp_simple_invert,
272	ec_GFp_simple_is_at_infinity,
273	ec_GFp_simple_is_on_curve,
274	ec_GFp_simple_cmp,
275	ec_GFp_simple_make_affine,
276	ec_GFp_simple_points_make_affine,
277	ec_GFp_nistp224_points_mul,
278	ec_GFp_nistp224_precompute_mult,
279	ec_GFp_nistp224_have_precompute_mult,
280	ec_GFp_nist_field_mul,
281	ec_GFp_nist_field_sqr,
282	0 /* field_div */ ,
283	ec_GFp_simple_field_inv,
284	0 /* field_encode */ ,
285	0 /* field_decode */ ,
286	0, /* field_set_to_one */
287	ec_key_simple_priv2oct,
288	ec_key_simple_oct2priv,
289	0, /* set private */
290	ec_key_simple_generate_key,
291	ec_key_simple_check_key,
292	ec_key_simple_generate_public_key,
293	0, /* keycopy */
294	0, /* keyfinish */
295	ecdh_simple_compute_key,
296	0, /* field_inverse_mod_ord */
297	0, /* blind_coordinates */
298	0, /* ladder_pre */
299	0, /* ladder_step */
300	0 /* ladder_post */
301	};
302
303	return &ret;
304	}
305
306	/*
307	* Helper functions to convert field elements to/from internal representation
308	*/
309	static void bin28_to_felem(felem out, const u8 in[28])
310	{
311	out[0] = ((const limb )(in)) & 0x00ffffffffffffff;
312	out[1] = (((const limb_aX )(in + 7))) & 0x00ffffffffffffff;
313	out[2] = (((const limb_aX )(in + 14))) & 0x00ffffffffffffff;
314	out[3] = (((const limb_aX )(in + 20))) >> 8;
315	}
316
317	static void felem_to_bin28(u8 out[28], const felem in)
318	{
319	unsigned i;
320	for (i = 0; i < 7; ++i) {
321	out[i] = in[0] >> (8 * i);
322	out[i + 7] = in[1] >> (8 * i);
323	out[i + 14] = in[2] >> (8 * i);
324	out[i + 21] = in[3] >> (8 * i);
325	}
326	}
327
328	/* From OpenSSL BIGNUM to internal representation */
329	static int BN_to_felem(felem out, const BIGNUM *bn)
330	{
331	felem_bytearray b_out;
332	int num_bytes;
333
334	if (BN_is_negative(bn)) {
335	ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
336	return 0;
337	}
338	num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
339	if (num_bytes < 0) {
340	ECerr(EC_F_BN_TO_FELEM, EC_R_BIGNUM_OUT_OF_RANGE);
341	return 0;
342	}
343	bin28_to_felem(out, b_out);
344	return 1;
345	}
346
347	/* From internal representation to OpenSSL BIGNUM */
348	static BIGNUM felem_to_BN(BIGNUM out, const felem in)
349	{
350	felem_bytearray b_out;
351	felem_to_bin28(b_out, in);
352	return BN_lebin2bn(b_out, sizeof(b_out), out);
353	}
354
355	/******************************************************************************/
356	/*-
357	* FIELD OPERATIONS
358	*
359	* Field operations, using the internal representation of field elements.
360	* NB! These operations are specific to our point multiplication and cannot be
361	* expected to be correct in general - e.g., multiplication with a large scalar
362	* will cause an overflow.
363	*
364	*/
365
366	static void felem_one(felem out)
367	{
368	out[0] = 1;
369	out[1] = 0;
370	out[2] = 0;
371	out[3] = 0;
372	}
373
374	static void felem_assign(felem out, const felem in)
375	{
376	out[0] = in[0];
377	out[1] = in[1];
378	out[2] = in[2];
379	out[3] = in[3];
380	}
381
382	/* Sum two field elements: out += in */
383	static void felem_sum(felem out, const felem in)
384	{
385	out[0] += in[0];
386	out[1] += in[1];
387	out[2] += in[2];
388	out[3] += in[3];
389	}
390
391	/* Subtract field elements: out -= in */
392	/* Assumes in[i] < 2^57 */
393	static void felem_diff(felem out, const felem in)
394	{
395	static const limb two58p2 = (((limb) 1) << 58) + (((limb) 1) << 2);
396	static const limb two58m2 = (((limb) 1) << 58) - (((limb) 1) << 2);
397	static const limb two58m42m2 = (((limb) 1) << 58) -
398	(((limb) 1) << 42) - (((limb) 1) << 2);
399
400	/* Add 0 mod 2^224-2^96+1 to ensure out > in */
401	out[0] += two58p2;
402	out[1] += two58m42m2;
403	out[2] += two58m2;
404	out[3] += two58m2;
405
406	out[0] -= in[0];
407	out[1] -= in[1];
408	out[2] -= in[2];
409	out[3] -= in[3];
410	}
411
412	/* Subtract in unreduced 128-bit mode: out -= in */
413	/* Assumes in[i] < 2^119 */
414	static void widefelem_diff(widefelem out, const widefelem in)
415	{
416	static const widelimb two120 = ((widelimb) 1) << 120;
417	static const widelimb two120m64 = (((widelimb) 1) << 120) -
418	(((widelimb) 1) << 64);
419	static const widelimb two120m104m64 = (((widelimb) 1) << 120) -
420	(((widelimb) 1) << 104) - (((widelimb) 1) << 64);
421
422	/* Add 0 mod 2^224-2^96+1 to ensure out > in */
423	out[0] += two120;
424	out[1] += two120m64;
425	out[2] += two120m64;
426	out[3] += two120;
427	out[4] += two120m104m64;
428	out[5] += two120m64;
429	out[6] += two120m64;
430
431	out[0] -= in[0];
432	out[1] -= in[1];
433	out[2] -= in[2];
434	out[3] -= in[3];
435	out[4] -= in[4];
436	out[5] -= in[5];
437	out[6] -= in[6];
438	}
439
440	/* Subtract in mixed mode: out128 -= in64 */
441	/* in[i] < 2^63 */
442	static void felem_diff_128_64(widefelem out, const felem in)
443	{
444	static const widelimb two64p8 = (((widelimb) 1) << 64) +
445	(((widelimb) 1) << 8);
446	static const widelimb two64m8 = (((widelimb) 1) << 64) -
447	(((widelimb) 1) << 8);
448	static const widelimb two64m48m8 = (((widelimb) 1) << 64) -
449	(((widelimb) 1) << 48) - (((widelimb) 1) << 8);
450
451	/* Add 0 mod 2^224-2^96+1 to ensure out > in */
452	out[0] += two64p8;
453	out[1] += two64m48m8;
454	out[2] += two64m8;
455	out[3] += two64m8;
456
457	out[0] -= in[0];
458	out[1] -= in[1];
459	out[2] -= in[2];
460	out[3] -= in[3];
461	}
462
463	/*
464	* Multiply a field element by a scalar: out = out * scalar The scalars we
465	* actually use are small, so results fit without overflow
466	*/
467	static void felem_scalar(felem out, const limb scalar)
468	{
469	out[0] *= scalar;
470	out[1] *= scalar;
471	out[2] *= scalar;
472	out[3] *= scalar;
473	}
474
475	/*
476	* Multiply an unreduced field element by a scalar: out = out * scalar The
477	* scalars we actually use are small, so results fit without overflow
478	*/
479	static void widefelem_scalar(widefelem out, const widelimb scalar)
480	{
481	out[0] *= scalar;
482	out[1] *= scalar;
483	out[2] *= scalar;
484	out[3] *= scalar;
485	out[4] *= scalar;
486	out[5] *= scalar;
487	out[6] *= scalar;
488	}
489
490	/* Square a field element: out = in^2 */
491	static void felem_square(widefelem out, const felem in)
492	{
493	limb tmp0, tmp1, tmp2;
494	tmp0 = 2 * in[0];
495	tmp1 = 2 * in[1];
496	tmp2 = 2 * in[2];
497	out[0] = ((widelimb) in[0]) * in[0];
498	out[1] = ((widelimb) in[0]) * tmp1;
499	out[2] = ((widelimb) in[0]) * tmp2 + ((widelimb) in[1]) * in[1];
500	out[3] = ((widelimb) in[3]) * tmp0 + ((widelimb) in[1]) * tmp2;
501	out[4] = ((widelimb) in[3]) * tmp1 + ((widelimb) in[2]) * in[2];
502	out[5] = ((widelimb) in[3]) * tmp2;
503	out[6] = ((widelimb) in[3]) * in[3];
504	}
505
506	/* Multiply two field elements: out = in1 * in2 */
507	static void felem_mul(widefelem out, const felem in1, const felem in2)
508	{
509	out[0] = ((widelimb) in1[0]) * in2[0];
510	out[1] = ((widelimb) in1[0]) * in2[1] + ((widelimb) in1[1]) * in2[0];
511	out[2] = ((widelimb) in1[0]) * in2[2] + ((widelimb) in1[1]) * in2[1] +
512	((widelimb) in1[2]) * in2[0];
513	out[3] = ((widelimb) in1[0]) * in2[3] + ((widelimb) in1[1]) * in2[2] +
514	((widelimb) in1[2]) * in2[1] + ((widelimb) in1[3]) * in2[0];
515	out[4] = ((widelimb) in1[1]) * in2[3] + ((widelimb) in1[2]) * in2[2] +
516	((widelimb) in1[3]) * in2[1];
517	out[5] = ((widelimb) in1[2]) * in2[3] + ((widelimb) in1[3]) * in2[2];
518	out[6] = ((widelimb) in1[3]) * in2[3];
519	}
520
521	/*-
522	* Reduce seven 128-bit coefficients to four 64-bit coefficients.
523	* Requires in[i] < 2^126,
524	* ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16 */
525	static void felem_reduce(felem out, const widefelem in)
526	{
527	static const widelimb two127p15 = (((widelimb) 1) << 127) +
528	(((widelimb) 1) << 15);
529	static const widelimb two127m71 = (((widelimb) 1) << 127) -
530	(((widelimb) 1) << 71);
531	static const widelimb two127m71m55 = (((widelimb) 1) << 127) -
532	(((widelimb) 1) << 71) - (((widelimb) 1) << 55);
533	widelimb output[5];
534
535	/* Add 0 mod 2^224-2^96+1 to ensure all differences are positive */
536	output[0] = in[0] + two127p15;
537	output[1] = in[1] + two127m71m55;
538	output[2] = in[2] + two127m71;
539	output[3] = in[3];
540	output[4] = in[4];
541
542	/* Eliminate in[4], in[5], in[6] */
543	output[4] += in[6] >> 16;
544	output[3] += (in[6] & 0xffff) << 40;
545	output[2] -= in[6];
546
547	output[3] += in[5] >> 16;
548	output[2] += (in[5] & 0xffff) << 40;
549	output[1] -= in[5];
550
551	output[2] += output[4] >> 16;
552	output[1] += (output[4] & 0xffff) << 40;
553	output[0] -= output[4];
554
555	/* Carry 2 -> 3 -> 4 */
556	output[3] += output[2] >> 56;
557	output[2] &= 0x00ffffffffffffff;
558
559	output[4] = output[3] >> 56;
560	output[3] &= 0x00ffffffffffffff;
561
562	/* Now output[2] < 2^56, output[3] < 2^56, output[4] < 2^72 */
563
564	/* Eliminate output[4] */
565	output[2] += output[4] >> 16;
566	/* output[2] < 2^56 + 2^56 = 2^57 */
567	output[1] += (output[4] & 0xffff) << 40;
568	output[0] -= output[4];
569
570	/* Carry 0 -> 1 -> 2 -> 3 */
571	output[1] += output[0] >> 56;
572	out[0] = output[0] & 0x00ffffffffffffff;
573
574	output[2] += output[1] >> 56;
575	/* output[2] < 2^57 + 2^72 */
576	out[1] = output[1] & 0x00ffffffffffffff;
577	output[3] += output[2] >> 56;
578	/* output[3] <= 2^56 + 2^16 */
579	out[2] = output[2] & 0x00ffffffffffffff;
580
581	/*-
582	* out[0] < 2^56, out[1] < 2^56, out[2] < 2^56,
583	* out[3] <= 2^56 + 2^16 (due to final carry),
584	* so out < 2*p
585	*/
586	out[3] = output[3];
587	}
588
589	static void felem_square_reduce(felem out, const felem in)
590	{
591	widefelem tmp;
592	felem_square(tmp, in);
593	felem_reduce(out, tmp);
594	}
595
596	static void felem_mul_reduce(felem out, const felem in1, const felem in2)
597	{
598	widefelem tmp;
599	felem_mul(tmp, in1, in2);
600	felem_reduce(out, tmp);
601	}
602
603	/*
604	* Reduce to unique minimal representation. Requires 0 <= in < 2*p (always
605	* call felem_reduce first)
606	*/
607	static void felem_contract(felem out, const felem in)
608	{
609	static const int64_t two56 = ((limb) 1) << 56;
610	/* 0 <= in < 2p, p = 2^224 - 2^96 + 1 /
611	/* if in > p , reduce in = in - 2^224 + 2^96 - 1 */
612	int64_t tmp[4], a;
613	tmp[0] = in[0];
614	tmp[1] = in[1];
615	tmp[2] = in[2];
616	tmp[3] = in[3];
617	/* Case 1: a = 1 iff in >= 2^224 */
618	a = (in[3] >> 56);
619	tmp[0] -= a;
620	tmp[1] += a << 40;
621	tmp[3] &= 0x00ffffffffffffff;
622	/*
623	* Case 2: a = 0 iff p <= in < 2^224, i.e., the high 128 bits are all 1
624	* and the lower part is non-zero
625	*/
626	a = ((in[3] & in[2] & (in[1] \| 0x000000ffffffffff)) + 1) \|
627	(((int64_t) (in[0] + (in[1] & 0x000000ffffffffff)) - 1) >> 63);
628	a &= 0x00ffffffffffffff;
629	/* turn a into an all-one mask (if a = 0) or an all-zero mask */
630	a = (a - 1) >> 63;
631	/* subtract 2^224 - 2^96 + 1 if a is all-one */
632	tmp[3] &= a ^ 0xffffffffffffffff;
633	tmp[2] &= a ^ 0xffffffffffffffff;
634	tmp[1] &= (a ^ 0xffffffffffffffff) \| 0x000000ffffffffff;
635	tmp[0] -= 1 & a;
636
637	/*
638	* eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
639	* non-zero, so we only need one step
640	*/
641	a = tmp[0] >> 63;
642	tmp[0] += two56 & a;
643	tmp[1] -= 1 & a;
644
645	/* carry 1 -> 2 -> 3 */
646	tmp[2] += tmp[1] >> 56;
647	tmp[1] &= 0x00ffffffffffffff;
648
649	tmp[3] += tmp[2] >> 56;
650	tmp[2] &= 0x00ffffffffffffff;
651
652	/* Now 0 <= out < p */
653	out[0] = tmp[0];
654	out[1] = tmp[1];
655	out[2] = tmp[2];
656	out[3] = tmp[3];
657	}
658
659	/*
660	* Get negative value: out = -in
661	* Requires in[i] < 2^63,
662	* ensures out[0] < 2^56, out[1] < 2^56, out[2] < 2^56, out[3] <= 2^56 + 2^16
663	*/
664	static void felem_neg(felem out, const felem in)
665	{
666	widefelem tmp = {0};
667	felem_diff_128_64(tmp, in);
668	felem_reduce(out, tmp);
669	}
670
671	/*
672	* Zero-check: returns 1 if input is 0, and 0 otherwise. We know that field
673	* elements are reduced to in < 2^225, so we only need to check three cases:
674	* 0, 2^224 - 2^96 + 1, and 2^225 - 2^97 + 2
675	*/
676	static limb felem_is_zero(const felem in)
677	{
678	limb zero, two224m96p1, two225m97p2;
679
680	zero = in[0] \| in[1] \| in[2] \| in[3];
681	zero = (((int64_t) (zero) - 1) >> 63) & 1;
682	two224m96p1 = (in[0] ^ 1) \| (in[1] ^ 0x00ffff0000000000)
683	\| (in[2] ^ 0x00ffffffffffffff) \| (in[3] ^ 0x00ffffffffffffff);
684	two224m96p1 = (((int64_t) (two224m96p1) - 1) >> 63) & 1;
685	two225m97p2 = (in[0] ^ 2) \| (in[1] ^ 0x00fffe0000000000)
686	\| (in[2] ^ 0x00ffffffffffffff) \| (in[3] ^ 0x01ffffffffffffff);
687	two225m97p2 = (((int64_t) (two225m97p2) - 1) >> 63) & 1;
688	return (zero \| two224m96p1 \| two225m97p2);
689	}
690
691	static int felem_is_zero_int(const void *in)
692	{
693	return (int)(felem_is_zero(in) & ((limb) 1));
694	}
695
696	/* Invert a field element */
697	/* Computation chain copied from djb's code */
698	static void felem_inv(felem out, const felem in)
699	{
700	felem ftmp, ftmp2, ftmp3, ftmp4;
701	widefelem tmp;
702	unsigned i;
703
704	felem_square(tmp, in);
705	felem_reduce(ftmp, tmp); /* 2 */
706	felem_mul(tmp, in, ftmp);
707	felem_reduce(ftmp, tmp); /* 2^2 - 1 */
708	felem_square(tmp, ftmp);
709	felem_reduce(ftmp, tmp); /* 2^3 - 2 */
710	felem_mul(tmp, in, ftmp);
711	felem_reduce(ftmp, tmp); /* 2^3 - 1 */
712	felem_square(tmp, ftmp);
713	felem_reduce(ftmp2, tmp); /* 2^4 - 2 */
714	felem_square(tmp, ftmp2);
715	felem_reduce(ftmp2, tmp); /* 2^5 - 4 */
716	felem_square(tmp, ftmp2);
717	felem_reduce(ftmp2, tmp); /* 2^6 - 8 */
718	felem_mul(tmp, ftmp2, ftmp);
719	felem_reduce(ftmp, tmp); /* 2^6 - 1 */
720	felem_square(tmp, ftmp);
721	felem_reduce(ftmp2, tmp); /* 2^7 - 2 */
722	for (i = 0; i < 5; ++i) { /* 2^12 - 2^6 */
723	felem_square(tmp, ftmp2);
724	felem_reduce(ftmp2, tmp);
725	}
726	felem_mul(tmp, ftmp2, ftmp);
727	felem_reduce(ftmp2, tmp); /* 2^12 - 1 */
728	felem_square(tmp, ftmp2);
729	felem_reduce(ftmp3, tmp); /* 2^13 - 2 */
730	for (i = 0; i < 11; ++i) { /* 2^24 - 2^12 */
731	felem_square(tmp, ftmp3);
732	felem_reduce(ftmp3, tmp);
733	}
734	felem_mul(tmp, ftmp3, ftmp2);
735	felem_reduce(ftmp2, tmp); /* 2^24 - 1 */
736	felem_square(tmp, ftmp2);
737	felem_reduce(ftmp3, tmp); /* 2^25 - 2 */
738	for (i = 0; i < 23; ++i) { /* 2^48 - 2^24 */
739	felem_square(tmp, ftmp3);
740	felem_reduce(ftmp3, tmp);
741	}
742	felem_mul(tmp, ftmp3, ftmp2);
743	felem_reduce(ftmp3, tmp); /* 2^48 - 1 */
744	felem_square(tmp, ftmp3);
745	felem_reduce(ftmp4, tmp); /* 2^49 - 2 */
746	for (i = 0; i < 47; ++i) { /* 2^96 - 2^48 */
747	felem_square(tmp, ftmp4);
748	felem_reduce(ftmp4, tmp);
749	}
750	felem_mul(tmp, ftmp3, ftmp4);
751	felem_reduce(ftmp3, tmp); /* 2^96 - 1 */
752	felem_square(tmp, ftmp3);
753	felem_reduce(ftmp4, tmp); /* 2^97 - 2 */
754	for (i = 0; i < 23; ++i) { /* 2^120 - 2^24 */
755	felem_square(tmp, ftmp4);
756	felem_reduce(ftmp4, tmp);
757	}
758	felem_mul(tmp, ftmp2, ftmp4);
759	felem_reduce(ftmp2, tmp); /* 2^120 - 1 */
760	for (i = 0; i < 6; ++i) { /* 2^126 - 2^6 */
761	felem_square(tmp, ftmp2);
762	felem_reduce(ftmp2, tmp);
763	}
764	felem_mul(tmp, ftmp2, ftmp);
765	felem_reduce(ftmp, tmp); /* 2^126 - 1 */
766	felem_square(tmp, ftmp);
767	felem_reduce(ftmp, tmp); /* 2^127 - 2 */
768	felem_mul(tmp, ftmp, in);
769	felem_reduce(ftmp, tmp); /* 2^127 - 1 */
770	for (i = 0; i < 97; ++i) { /* 2^224 - 2^97 */
771	felem_square(tmp, ftmp);
772	felem_reduce(ftmp, tmp);
773	}
774	felem_mul(tmp, ftmp, ftmp3);
775	felem_reduce(out, tmp); /* 2^224 - 2^96 - 1 */
776	}
777
778	/*
779	* Copy in constant time: if icopy == 1, copy in to out, if icopy == 0, copy
780	* out to itself.
781	*/
782	static void copy_conditional(felem out, const felem in, limb icopy)
783	{
784	unsigned i;
785	/*
786	* icopy is a (64-bit) 0 or 1, so copy is either all-zero or all-one
787	*/
788	const limb copy = -icopy;
789	for (i = 0; i < 4; ++i) {
790	const limb tmp = copy & (in[i] ^ out[i]);
791	out[i] ^= tmp;
792	}
793	}
794
795	/******************************************************************************/
796	/*-
797	* ELLIPTIC CURVE POINT OPERATIONS
798	*
799	* Points are represented in Jacobian projective coordinates:
800	* (X, Y, Z) corresponds to the affine point (X/Z^2, Y/Z^3),
801	* or to the point at infinity if Z == 0.
802	*
803	*/
804
805	/*-
806	* Double an elliptic curve point:
807	* (X', Y', Z') = 2 * (X, Y, Z), where
808	* X' = (3 * (X - Z^2) * (X + Z^2))^2 - 8 * X * Y^2
809	* Y' = 3 * (X - Z^2) * (X + Z^2) * (4 * X * Y^2 - X') - 8 * Y^4
810	* Z' = (Y + Z)^2 - Y^2 - Z^2 = 2 * Y * Z
811	* Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed,
812	* while x_out == y_in is not (maybe this works, but it's not tested).
813	*/
814	static void
815	point_double(felem x_out, felem y_out, felem z_out,
816	const felem x_in, const felem y_in, const felem z_in)
817	{
818	widefelem tmp, tmp2;
819	felem delta, gamma, beta, alpha, ftmp, ftmp2;
820
821	felem_assign(ftmp, x_in);
822	felem_assign(ftmp2, x_in);
823
824	/* delta = z^2 */
825	felem_square(tmp, z_in);
826	felem_reduce(delta, tmp);
827
828	/* gamma = y^2 */
829	felem_square(tmp, y_in);
830	felem_reduce(gamma, tmp);
831
832	/* beta = xgamma /
833	felem_mul(tmp, x_in, gamma);
834	felem_reduce(beta, tmp);
835
836	/* alpha = 3(x-delta)(x+delta) */
837	felem_diff(ftmp, delta);
838	/* ftmp[i] < 2^57 + 2^58 + 2 < 2^59 */
839	felem_sum(ftmp2, delta);
840	/* ftmp2[i] < 2^57 + 2^57 = 2^58 */
841	felem_scalar(ftmp2, 3);
842	/* ftmp2[i] < 3 * 2^58 < 2^60 */
843	felem_mul(tmp, ftmp, ftmp2);
844	/* tmp[i] < 2^60 * 2^59 * 4 = 2^121 */
845	felem_reduce(alpha, tmp);
846
847	/* x' = alpha^2 - 8beta /
848	felem_square(tmp, alpha);
849	/* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
850	felem_assign(ftmp, beta);
851	felem_scalar(ftmp, 8);
852	/* ftmp[i] < 8 * 2^57 = 2^60 */
853	felem_diff_128_64(tmp, ftmp);
854	/* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
855	felem_reduce(x_out, tmp);
856
857	/* z' = (y + z)^2 - gamma - delta */
858	felem_sum(delta, gamma);
859	/* delta[i] < 2^57 + 2^57 = 2^58 */
860	felem_assign(ftmp, y_in);
861	felem_sum(ftmp, z_in);
862	/* ftmp[i] < 2^57 + 2^57 = 2^58 */
863	felem_square(tmp, ftmp);
864	/* tmp[i] < 4 * 2^58 * 2^58 = 2^118 */
865	felem_diff_128_64(tmp, delta);
866	/* tmp[i] < 2^118 + 2^64 + 8 < 2^119 */
867	felem_reduce(z_out, tmp);
868
869	/* y' = alpha(4beta - x') - 8gamma^2 /
870	felem_scalar(beta, 4);
871	/* beta[i] < 4 * 2^57 = 2^59 */
872	felem_diff(beta, x_out);
873	/* beta[i] < 2^59 + 2^58 + 2 < 2^60 */
874	felem_mul(tmp, alpha, beta);
875	/* tmp[i] < 4 * 2^57 * 2^60 = 2^119 */
876	felem_square(tmp2, gamma);
877	/* tmp2[i] < 4 * 2^57 * 2^57 = 2^116 */
878	widefelem_scalar(tmp2, 8);
879	/* tmp2[i] < 8 * 2^116 = 2^119 */
880	widefelem_diff(tmp, tmp2);
881	/* tmp[i] < 2^119 + 2^120 < 2^121 */
882	felem_reduce(y_out, tmp);
883	}
884
885	/*-
886	* Add two elliptic curve points:
887	* (X_1, Y_1, Z_1) + (X_2, Y_2, Z_2) = (X_3, Y_3, Z_3), where
888	* X_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1)^2 - (Z_1^2 * X_2 - Z_2^2 * X_1)^3 -
889	* 2 * Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2
890	* Y_3 = (Z_1^3 * Y_2 - Z_2^3 * Y_1) * (Z_2^2 * X_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^2 - X_3) -
891	* Z_2^3 * Y_1 * (Z_1^2 * X_2 - Z_2^2 * X_1)^3
892	* Z_3 = (Z_1^2 * X_2 - Z_2^2 * X_1) * (Z_1 * Z_2)
893	*
894	* This runs faster if 'mixed' is set, which requires Z_2 = 1 or Z_2 = 0.
895	*/
896
897	/*
898	* This function is not entirely constant-time: it includes a branch for
899	* checking whether the two input points are equal, (while not equal to the
900	* point at infinity). This case never happens during single point
901	* multiplication, so there is no timing leak for ECDH or ECDSA signing.
902	*/
903	static void point_add(felem x3, felem y3, felem z3,
904	const felem x1, const felem y1, const felem z1,
905	const int mixed, const felem x2, const felem y2,
906	const felem z2)
907	{
908	felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
909	widefelem tmp, tmp2;
910	limb z1_is_zero, z2_is_zero, x_equal, y_equal;
911	limb points_equal;
912
913	if (!mixed) {
914	/* ftmp2 = z2^2 */
915	felem_square(tmp, z2);
916	felem_reduce(ftmp2, tmp);
917
918	/* ftmp4 = z2^3 */
919	felem_mul(tmp, ftmp2, z2);
920	felem_reduce(ftmp4, tmp);
921
922	/* ftmp4 = z2^3y1 /
923	felem_mul(tmp2, ftmp4, y1);
924	felem_reduce(ftmp4, tmp2);
925
926	/* ftmp2 = z2^2x1 /
927	felem_mul(tmp2, ftmp2, x1);
928	felem_reduce(ftmp2, tmp2);
929	} else {
930	/*
931	* We'll assume z2 = 1 (special case z2 = 0 is handled later)
932	*/
933
934	/* ftmp4 = z2^3y1 /
935	felem_assign(ftmp4, y1);
936
937	/* ftmp2 = z2^2x1 /
938	felem_assign(ftmp2, x1);
939	}
940
941	/* ftmp = z1^2 */
942	felem_square(tmp, z1);
943	felem_reduce(ftmp, tmp);
944
945	/* ftmp3 = z1^3 */
946	felem_mul(tmp, ftmp, z1);
947	felem_reduce(ftmp3, tmp);
948
949	/* tmp = z1^3y2 /
950	felem_mul(tmp, ftmp3, y2);
951	/* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
952
953	/* ftmp3 = z1^3y2 - z2^3y1 */
954	felem_diff_128_64(tmp, ftmp4);
955	/* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
956	felem_reduce(ftmp3, tmp);
957
958	/* tmp = z1^2x2 /
959	felem_mul(tmp, ftmp, x2);
960	/* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
961
962	/* ftmp = z1^2x2 - z2^2x1 */
963	felem_diff_128_64(tmp, ftmp2);
964	/* tmp[i] < 2^116 + 2^64 + 8 < 2^117 */
965	felem_reduce(ftmp, tmp);
966
967	/*
968	* The formulae are incorrect if the points are equal, in affine coordinates
969	* (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
970	* happens.
971	*
972	* We use bitwise operations to avoid potential side-channels introduced by
973	* the short-circuiting behaviour of boolean operators.
974	*/
975	x_equal = felem_is_zero(ftmp);
976	y_equal = felem_is_zero(ftmp3);
977	/*
978	* The special case of either point being the point at infinity (z1 and/or
979	* z2 are zero), is handled separately later on in this function, so we
980	* avoid jumping to point_double here in those special cases.
981	*/
982	z1_is_zero = felem_is_zero(z1);
983	z2_is_zero = felem_is_zero(z2);
984
985	/*
986	* Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
987	* specific implementation `felem_is_zero()` returns truth as `0x1`
988	* (rather than `0xff..ff`).
989	*
990	* This implies that `~true` in this implementation becomes
991	* `0xff..fe` (rather than `0x0`): for this reason, to be used in
992	* the if expression, we mask out only the last bit in the next
993	* line.
994	*/
995	points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
996
997	if (points_equal) {
998	/*
999	* This is obviously not constant-time but, as mentioned before, this
1000	* case never happens during single point multiplication, so there is no
1001	* timing leak for ECDH or ECDSA signing.
1002	*/
1003	point_double(x3, y3, z3, x1, y1, z1);
1004	return;
1005	}
1006
1007	/* ftmp5 = z1z2 /
1008	if (!mixed) {
1009	felem_mul(tmp, z1, z2);
1010	felem_reduce(ftmp5, tmp);
1011	} else {
1012	/* special case z2 = 0 is handled later */
1013	felem_assign(ftmp5, z1);
1014	}
1015
1016	/* z_out = (z1^2x2 - z2^2x1)(z1z2) */
1017	felem_mul(tmp, ftmp, ftmp5);
1018	felem_reduce(z_out, tmp);
1019
1020	/* ftmp = (z1^2x2 - z2^2x1)^2 */
1021	felem_assign(ftmp5, ftmp);
1022	felem_square(tmp, ftmp);
1023	felem_reduce(ftmp, tmp);
1024
1025	/* ftmp5 = (z1^2x2 - z2^2x1)^3 */
1026	felem_mul(tmp, ftmp, ftmp5);
1027	felem_reduce(ftmp5, tmp);
1028
1029	/* ftmp2 = z2^2x1(z1^2x2 - z2^2x1)^2 */
1030	felem_mul(tmp, ftmp2, ftmp);
1031	felem_reduce(ftmp2, tmp);
1032
1033	/* tmp = z2^3y1(z1^2x2 - z2^2x1)^3 */
1034	felem_mul(tmp, ftmp4, ftmp5);
1035	/* tmp[i] < 4 * 2^57 * 2^57 = 2^116 */
1036
1037	/* tmp2 = (z1^3y2 - z2^3y1)^2 */
1038	felem_square(tmp2, ftmp3);
1039	/* tmp2[i] < 4 * 2^57 * 2^57 < 2^116 */
1040
1041	/* tmp2 = (z1^3y2 - z2^3y1)^2 - (z1^2x2 - z2^2x1)^3 */
1042	felem_diff_128_64(tmp2, ftmp5);
1043	/* tmp2[i] < 2^116 + 2^64 + 8 < 2^117 */
1044
1045	/* ftmp5 = 2z2^2x1(z1^2x2 - z2^2x1)^2 /
1046	felem_assign(ftmp5, ftmp2);
1047	felem_scalar(ftmp5, 2);
1048	/* ftmp5[i] < 2 * 2^57 = 2^58 */
1049
1050	/*-
1051	* x_out = (z1^3y2 - z2^3y1)^2 - (z1^2x2 - z2^2x1)^3 -
1052	* 2z2^2x1(z1^2x2 - z2^2*x1)^2
1053	*/
1054	felem_diff_128_64(tmp2, ftmp5);
1055	/* tmp2[i] < 2^117 + 2^64 + 8 < 2^118 */
1056	felem_reduce(x_out, tmp2);
1057
1058	/* ftmp2 = z2^2x1(z1^2x2 - z2^2x1)^2 - x_out */
1059	felem_diff(ftmp2, x_out);
1060	/* ftmp2[i] < 2^57 + 2^58 + 2 < 2^59 */
1061
1062	/*
1063	* tmp2 = (z1^3y2 - z2^3y1)(z2^2x1(z1^2x2 - z2^2*x1)^2 - x_out)
1064	*/
1065	felem_mul(tmp2, ftmp3, ftmp2);
1066	/* tmp2[i] < 4 * 2^57 * 2^59 = 2^118 */
1067
1068	/*-
1069	* y_out = (z1^3y2 - z2^3y1)(z2^2x1(z1^2x2 - z2^2*x1)^2 - x_out) -
1070	* z2^3y1(z1^2x2 - z2^2x1)^3
1071	*/
1072	widefelem_diff(tmp2, tmp);
1073	/* tmp2[i] < 2^118 + 2^120 < 2^121 */
1074	felem_reduce(y_out, tmp2);
1075
1076	/*
1077	* the result (x_out, y_out, z_out) is incorrect if one of the inputs is
1078	* the point at infinity, so we need to check for this separately
1079	*/
1080
1081	/*
1082	* if point 1 is at infinity, copy point 2 to output, and vice versa
1083	*/
1084	copy_conditional(x_out, x2, z1_is_zero);
1085	copy_conditional(x_out, x1, z2_is_zero);
1086	copy_conditional(y_out, y2, z1_is_zero);
1087	copy_conditional(y_out, y1, z2_is_zero);
1088	copy_conditional(z_out, z2, z1_is_zero);
1089	copy_conditional(z_out, z1, z2_is_zero);
1090	felem_assign(x3, x_out);
1091	felem_assign(y3, y_out);
1092	felem_assign(z3, z_out);
1093	}
1094
1095	/*
1096	* select_point selects the \|idx\|th point from a precomputation table and
1097	* copies it to out.
1098	* The pre_comp array argument should be size of \|size\| argument
1099	*/
1100	static void select_point(const u64 idx, unsigned int size,
1101	const felem pre_comp[][3], felem out[3])
1102	{
1103	unsigned i, j;
1104	limb *outlimbs = &out[0][0];
1105
1106	memset(out, 0, sizeof(out) 3);
1107	for (i = 0; i < size; i++) {
1108	const limb *inlimbs = &pre_comp[i][0][0];
1109	u64 mask = i ^ idx;
1110	mask \|= mask >> 4;
1111	mask \|= mask >> 2;
1112	mask \|= mask >> 1;
1113	mask &= 1;
1114	mask--;
1115	for (j = 0; j < 4 * 3; j++)
1116	outlimbs[j] \|= inlimbs[j] & mask;
1117	}
1118	}
1119
1120	/* get_bit returns the \|i\|th bit in \|in\| */
1121	static char get_bit(const felem_bytearray in, unsigned i)
1122	{
1123	if (i >= 224)
1124	return 0;
1125	return (in[i >> 3] >> (i & 7)) & 1;
1126	}
1127
1128	/*
1129	* Interleaved point multiplication using precomputed point multiples: The
1130	* small point multiples 0P, 1P, ..., 16*P are in pre_comp[], the scalars
1131	* in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
1132	* generator, using certain (large) precomputed multiples in g_pre_comp.
1133	* Output point (X, Y, Z) is stored in x_out, y_out, z_out
1134	*/
1135	static void batch_mul(felem x_out, felem y_out, felem z_out,
1136	const felem_bytearray scalars[],
1137	const unsigned num_points, const u8 *g_scalar,
1138	const int mixed, const felem pre_comp[][17][3],
1139	const felem g_pre_comp[2][16][3])
1140	{
1141	int i, skip;
1142	unsigned num;
1143	unsigned gen_mul = (g_scalar != NULL);
1144	felem nq[3], tmp[4];
1145	u64 bits;
1146	u8 sign, digit;
1147
1148	/* set nq to the point at infinity */
1149	memset(nq, 0, sizeof(nq));
1150
1151	/*
1152	* Loop over all scalars msb-to-lsb, interleaving additions of multiples
1153	* of the generator (two in each of the last 28 rounds) and additions of
1154	* other points multiples (every 5th round).
1155	*/
1156	skip = 1; /* save two point operations in the first
1157	* round */
1158	for (i = (num_points ? 220 : 27); i >= 0; --i) {
1159	/* double */
1160	if (!skip)
1161	point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
1162
1163	/* add multiples of the generator */
1164	if (gen_mul && (i <= 27)) {
1165	/* first, look 28 bits upwards */
1166	bits = get_bit(g_scalar, i + 196) << 3;
1167	bits \|= get_bit(g_scalar, i + 140) << 2;
1168	bits \|= get_bit(g_scalar, i + 84) << 1;
1169	bits \|= get_bit(g_scalar, i + 28);
1170	/* select the point to add, in constant time */
1171	select_point(bits, 16, g_pre_comp[1], tmp);
1172
1173	if (!skip) {
1174	/* value 1 below is argument for "mixed" */
1175	point_add(nq[0], nq[1], nq[2],
1176	nq[0], nq[1], nq[2], 1, tmp[0], tmp[1], tmp[2]);
1177	} else {
1178	memcpy(nq, tmp, 3 * sizeof(felem));
1179	skip = 0;
1180	}
1181
1182	/* second, look at the current position */
1183	bits = get_bit(g_scalar, i + 168) << 3;
1184	bits \|= get_bit(g_scalar, i + 112) << 2;
1185	bits \|= get_bit(g_scalar, i + 56) << 1;
1186	bits \|= get_bit(g_scalar, i);
1187	/* select the point to add, in constant time */
1188	select_point(bits, 16, g_pre_comp[0], tmp);
1189	point_add(nq[0], nq[1], nq[2],
1190	nq[0], nq[1], nq[2],
1191	1 /* mixed */ , tmp[0], tmp[1], tmp[2]);
1192	}
1193
1194	/* do other additions every 5 doublings */
1195	if (num_points && (i % 5 == 0)) {
1196	/* loop over all scalars */
1197	for (num = 0; num < num_points; ++num) {
1198	bits = get_bit(scalars[num], i + 4) << 5;
1199	bits \|= get_bit(scalars[num], i + 3) << 4;
1200	bits \|= get_bit(scalars[num], i + 2) << 3;
1201	bits \|= get_bit(scalars[num], i + 1) << 2;
1202	bits \|= get_bit(scalars[num], i) << 1;
1203	bits \|= get_bit(scalars[num], i - 1);
1204	ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
1205
1206	/* select the point to add or subtract */
1207	select_point(digit, 17, pre_comp[num], tmp);
1208	felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
1209	* point */
1210	copy_conditional(tmp[1], tmp[3], sign);
1211
1212	if (!skip) {
1213	point_add(nq[0], nq[1], nq[2],
1214	nq[0], nq[1], nq[2],
1215	mixed, tmp[0], tmp[1], tmp[2]);
1216	} else {
1217	memcpy(nq, tmp, 3 * sizeof(felem));
1218	skip = 0;
1219	}
1220	}
1221	}
1222	}
1223	felem_assign(x_out, nq[0]);
1224	felem_assign(y_out, nq[1]);
1225	felem_assign(z_out, nq[2]);
1226	}
1227
1228	/******************************************************************************/
1229	/*
1230	* FUNCTIONS TO MANAGE PRECOMPUTATION
1231	*/
1232
1233	static NISTP224_PRE_COMP *nistp224_pre_comp_new(void)
1234	{
1235	NISTP224_PRE_COMP ret = OPENSSL_zalloc(sizeof(ret));
1236
1237	if (!ret) {
1238	ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1239	return ret;
1240	}
1241
1242	ret->references = 1;
1243
1244	ret->lock = CRYPTO_THREAD_lock_new();
1245	if (ret->lock == NULL) {
1246	ECerr(EC_F_NISTP224_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE);
1247	OPENSSL_free(ret);
1248	return NULL;
1249	}
1250	return ret;
1251	}
1252
1253	NISTP224_PRE_COMP EC_nistp224_pre_comp_dup(NISTP224_PRE_COMP p)
1254	{
1255	int i;
1256	if (p != NULL)
1257	CRYPTO_UP_REF(&p->references, &i, p->lock);
1258	return p;
1259	}
1260
1261	void EC_nistp224_pre_comp_free(NISTP224_PRE_COMP *p)
1262	{
1263	int i;
1264
1265	if (p == NULL)
1266	return;
1267
1268	CRYPTO_DOWN_REF(&p->references, &i, p->lock);
1269	REF_PRINT_COUNT("EC_nistp224", x);
1270	if (i > 0)
1271	return;
1272	REF_ASSERT_ISNT(i < 0);
1273
1274	CRYPTO_THREAD_lock_free(p->lock);
1275	OPENSSL_free(p);
1276	}
1277
1278	/******************************************************************************/
1279	/*
1280	* OPENSSL EC_METHOD FUNCTIONS
1281	*/
1282
1283	int ec_GFp_nistp224_group_init(EC_GROUP *group)
1284	{
1285	int ret;
1286	ret = ec_GFp_simple_group_init(group);
1287	group->a_is_minus3 = 1;
1288	return ret;
1289	}
1290
1291	int ec_GFp_nistp224_group_set_curve(EC_GROUP group, const BIGNUM p,
1292	const BIGNUM a, const BIGNUM b,
1293	BN_CTX *ctx)
1294	{
1295	int ret = 0;
1296	BN_CTX *new_ctx = NULL;
1297	BIGNUM curve_p, curve_a, *curve_b;
1298
1299	if (ctx == NULL)
1300	if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1301	return 0;
1302	BN_CTX_start(ctx);
1303	curve_p = BN_CTX_get(ctx);
1304	curve_a = BN_CTX_get(ctx);
1305	curve_b = BN_CTX_get(ctx);
1306	if (curve_b == NULL)
1307	goto err;
1308	BN_bin2bn(nistp224_curve_params[0], sizeof(felem_bytearray), curve_p);
1309	BN_bin2bn(nistp224_curve_params[1], sizeof(felem_bytearray), curve_a);
1310	BN_bin2bn(nistp224_curve_params[2], sizeof(felem_bytearray), curve_b);
1311	if ((BN_cmp(curve_p, p)) \|\| (BN_cmp(curve_a, a)) \|\| (BN_cmp(curve_b, b))) {
1312	ECerr(EC_F_EC_GFP_NISTP224_GROUP_SET_CURVE,
1313	EC_R_WRONG_CURVE_PARAMETERS);
1314	goto err;
1315	}
1316	group->field_mod_func = BN_nist_mod_224;
1317	ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
1318	err:
1319	BN_CTX_end(ctx);
1320	BN_CTX_free(new_ctx);
1321	return ret;
1322	}
1323
1324	/*
1325	* Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
1326	* (X/Z^2, Y/Z^3)
1327	*/
1328	int ec_GFp_nistp224_point_get_affine_coordinates(const EC_GROUP *group,
1329	const EC_POINT *point,
1330	BIGNUM x, BIGNUM y,
1331	BN_CTX *ctx)
1332	{
1333	felem z1, z2, x_in, y_in, x_out, y_out;
1334	widefelem tmp;
1335
1336	if (EC_POINT_is_at_infinity(group, point)) {
1337	ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1338	EC_R_POINT_AT_INFINITY);
1339	return 0;
1340	}
1341	if ((!BN_to_felem(x_in, point->X)) \|\| (!BN_to_felem(y_in, point->Y)) \|\|
1342	(!BN_to_felem(z1, point->Z)))
1343	return 0;
1344	felem_inv(z2, z1);
1345	felem_square(tmp, z2);
1346	felem_reduce(z1, tmp);
1347	felem_mul(tmp, x_in, z1);
1348	felem_reduce(x_in, tmp);
1349	felem_contract(x_out, x_in);
1350	if (x != NULL) {
1351	if (!felem_to_BN(x, x_out)) {
1352	ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1353	ERR_R_BN_LIB);
1354	return 0;
1355	}
1356	}
1357	felem_mul(tmp, z1, z2);
1358	felem_reduce(z1, tmp);
1359	felem_mul(tmp, y_in, z1);
1360	felem_reduce(y_in, tmp);
1361	felem_contract(y_out, y_in);
1362	if (y != NULL) {
1363	if (!felem_to_BN(y, y_out)) {
1364	ECerr(EC_F_EC_GFP_NISTP224_POINT_GET_AFFINE_COORDINATES,
1365	ERR_R_BN_LIB);
1366	return 0;
1367	}
1368	}
1369	return 1;
1370	}
1371
1372	static void make_points_affine(size_t num, felem points[ /* num */ ][3],
1373	felem tmp_felems[ /* num+1 */ ])
1374	{
1375	/*
1376	* Runs in constant time, unless an input is the point at infinity (which
1377	* normally shouldn't happen).
1378	*/
1379	ec_GFp_nistp_points_make_affine_internal(num,
1380	points,
1381	sizeof(felem),
1382	tmp_felems,
1383	(void ()(void ))felem_one,
1384	felem_is_zero_int,
1385	(void ()(void , const void *))
1386	felem_assign,
1387	(void ()(void , const void *))
1388	felem_square_reduce, (void (*)
1389	(void *,
1390	const void
1391	*,
1392	const void
1393	*))
1394	felem_mul_reduce,
1395	(void ()(void , const void *))
1396	felem_inv,
1397	(void ()(void , const void *))
1398	felem_contract);
1399	}
1400
1401	/*
1402	* Computes scalargenerator + \sum scalars[i]points[i], ignoring NULL
1403	* values Result is stored in r (r can equal one of the inputs).
1404	*/
1405	int ec_GFp_nistp224_points_mul(const EC_GROUP group, EC_POINT r,
1406	const BIGNUM *scalar, size_t num,
1407	const EC_POINT *points[],
1408	const BIGNUM scalars[], BN_CTX ctx)
1409	{
1410	int ret = 0;
1411	int j;
1412	unsigned i;
1413	int mixed = 0;
1414	BIGNUM x, y, z, tmp_scalar;
1415	felem_bytearray g_secret;
1416	felem_bytearray *secrets = NULL;
1417	felem (*pre_comp)[17][3] = NULL;
1418	felem *tmp_felems = NULL;
1419	int num_bytes;
1420	int have_pre_comp = 0;
1421	size_t num_points = num;
1422	felem x_in, y_in, z_in, x_out, y_out, z_out;
1423	NISTP224_PRE_COMP *pre = NULL;
1424	const felem(*g_pre_comp)[16][3] = NULL;
1425	EC_POINT *generator = NULL;
1426	const EC_POINT *p = NULL;
1427	const BIGNUM *p_scalar = NULL;
1428
1429	BN_CTX_start(ctx);
1430	x = BN_CTX_get(ctx);
1431	y = BN_CTX_get(ctx);
1432	z = BN_CTX_get(ctx);
1433	tmp_scalar = BN_CTX_get(ctx);
1434	if (tmp_scalar == NULL)
1435	goto err;
1436
1437	if (scalar != NULL) {
1438	pre = group->pre_comp.nistp224;
1439	if (pre)
1440	/* we have precomputation, try to use it */
1441	g_pre_comp = (const felem(*)[16][3])pre->g_pre_comp;
1442	else
1443	/* try to use the standard precomputation */
1444	g_pre_comp = &gmul[0];
1445	generator = EC_POINT_new(group);
1446	if (generator == NULL)
1447	goto err;
1448	/* get the generator from precomputation */
1449	if (!felem_to_BN(x, g_pre_comp[0][1][0]) \|\|
1450	!felem_to_BN(y, g_pre_comp[0][1][1]) \|\|
1451	!felem_to_BN(z, g_pre_comp[0][1][2])) {
1452	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1453	goto err;
1454	}
1455	if (!EC_POINT_set_Jprojective_coordinates_GFp(group,
1456	generator, x, y, z,
1457	ctx))
1458	goto err;
1459	if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
1460	/* precomputation matches generator */
1461	have_pre_comp = 1;
1462	else
1463	/*
1464	* we don't have valid precomputation: treat the generator as a
1465	* random point
1466	*/
1467	num_points = num_points + 1;
1468	}
1469
1470	if (num_points > 0) {
1471	if (num_points >= 3) {
1472	/*
1473	* unless we precompute multiples for just one or two points,
1474	* converting those into affine form is time well spent
1475	*/
1476	mixed = 1;
1477	}
1478	secrets = OPENSSL_zalloc(sizeof(secrets) num_points);
1479	pre_comp = OPENSSL_zalloc(sizeof(pre_comp) num_points);
1480	if (mixed)
1481	tmp_felems =
1482	OPENSSL_malloc(sizeof(felem) * (num_points * 17 + 1));
1483	if ((secrets == NULL) \|\| (pre_comp == NULL)
1484	\|\| (mixed && (tmp_felems == NULL))) {
1485	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_MALLOC_FAILURE);
1486	goto err;
1487	}
1488
1489	/*
1490	* we treat NULL scalars as 0, and NULL points as points at infinity,
1491	* i.e., they contribute nothing to the linear combination
1492	*/
1493	for (i = 0; i < num_points; ++i) {
1494	if (i == num) {
1495	/* the generator */
1496	p = EC_GROUP_get0_generator(group);
1497	p_scalar = scalar;
1498	} else {
1499	/* the i^th point */
1500	p = points[i];
1501	p_scalar = scalars[i];
1502	}
1503	if ((p_scalar != NULL) && (p != NULL)) {
1504	/* reduce scalar to 0 <= scalar < 2^224 */
1505	if ((BN_num_bits(p_scalar) > 224)
1506	\|\| (BN_is_negative(p_scalar))) {
1507	/*
1508	* this is an unusual input, and we don't guarantee
1509	* constant-timeness
1510	*/
1511	if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
1512	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1513	goto err;
1514	}
1515	num_bytes = BN_bn2lebinpad(tmp_scalar,
1516	secrets[i], sizeof(secrets[i]));
1517	} else {
1518	num_bytes = BN_bn2lebinpad(p_scalar,
1519	secrets[i], sizeof(secrets[i]));
1520	}
1521	if (num_bytes < 0) {
1522	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1523	goto err;
1524	}
1525	/* precompute multiples */
1526	if ((!BN_to_felem(x_out, p->X)) \|\|
1527	(!BN_to_felem(y_out, p->Y)) \|\|
1528	(!BN_to_felem(z_out, p->Z)))
1529	goto err;
1530	felem_assign(pre_comp[i][1][0], x_out);
1531	felem_assign(pre_comp[i][1][1], y_out);
1532	felem_assign(pre_comp[i][1][2], z_out);
1533	for (j = 2; j <= 16; ++j) {
1534	if (j & 1) {
1535	point_add(pre_comp[i][j][0], pre_comp[i][j][1],
1536	pre_comp[i][j][2], pre_comp[i][1][0],
1537	pre_comp[i][1][1], pre_comp[i][1][2], 0,
1538	pre_comp[i][j - 1][0],
1539	pre_comp[i][j - 1][1],
1540	pre_comp[i][j - 1][2]);
1541	} else {
1542	point_double(pre_comp[i][j][0], pre_comp[i][j][1],
1543	pre_comp[i][j][2], pre_comp[i][j / 2][0],
1544	pre_comp[i][j / 2][1],
1545	pre_comp[i][j / 2][2]);
1546	}
1547	}
1548	}
1549	}
1550	if (mixed)
1551	make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
1552	}
1553
1554	/* the scalar for the generator */
1555	if ((scalar != NULL) && (have_pre_comp)) {
1556	memset(g_secret, 0, sizeof(g_secret));
1557	/* reduce scalar to 0 <= scalar < 2^224 */
1558	if ((BN_num_bits(scalar) > 224) \|\| (BN_is_negative(scalar))) {
1559	/*
1560	* this is an unusual input, and we don't guarantee
1561	* constant-timeness
1562	*/
1563	if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
1564	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1565	goto err;
1566	}
1567	num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
1568	} else {
1569	num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
1570	}
1571	/* do the multiplication with generator precomputation */
1572	batch_mul(x_out, y_out, z_out,
1573	(const felem_bytearray(*))secrets, num_points,
1574	g_secret,
1575	mixed, (const felem(*)[17][3])pre_comp, g_pre_comp);
1576	} else {
1577	/* do the multiplication without generator precomputation */
1578	batch_mul(x_out, y_out, z_out,
1579	(const felem_bytearray(*))secrets, num_points,
1580	NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
1581	}
1582	/* reduce the output to its unique minimal representation */
1583	felem_contract(x_in, x_out);
1584	felem_contract(y_in, y_out);
1585	felem_contract(z_in, z_out);
1586	if ((!felem_to_BN(x, x_in)) \|\| (!felem_to_BN(y, y_in)) \|\|
1587	(!felem_to_BN(z, z_in))) {
1588	ECerr(EC_F_EC_GFP_NISTP224_POINTS_MUL, ERR_R_BN_LIB);
1589	goto err;
1590	}
1591	ret = EC_POINT_set_Jprojective_coordinates_GFp(group, r, x, y, z, ctx);
1592
1593	err:
1594	BN_CTX_end(ctx);
1595	EC_POINT_free(generator);
1596	OPENSSL_free(secrets);
1597	OPENSSL_free(pre_comp);
1598	OPENSSL_free(tmp_felems);
1599	return ret;
1600	}
1601
1602	int ec_GFp_nistp224_precompute_mult(EC_GROUP group, BN_CTX ctx)
1603	{
1604	int ret = 0;
1605	NISTP224_PRE_COMP *pre = NULL;
1606	int i, j;
1607	BN_CTX *new_ctx = NULL;
1608	BIGNUM x, y;
1609	EC_POINT *generator = NULL;
1610	felem tmp_felems[32];
1611
1612	/* throw away old precomputation */
1613	EC_pre_comp_free(group);
1614	if (ctx == NULL)
1615	if ((ctx = new_ctx = BN_CTX_new()) == NULL)
1616	return 0;
1617	BN_CTX_start(ctx);
1618	x = BN_CTX_get(ctx);
1619	y = BN_CTX_get(ctx);
1620	if (y == NULL)
1621	goto err;
1622	/* get the generator */
1623	if (group->generator == NULL)
1624	goto err;
1625	generator = EC_POINT_new(group);
1626	if (generator == NULL)
1627	goto err;
1628	BN_bin2bn(nistp224_curve_params[3], sizeof(felem_bytearray), x);
1629	BN_bin2bn(nistp224_curve_params[4], sizeof(felem_bytearray), y);
1630	if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
1631	goto err;
1632	if ((pre = nistp224_pre_comp_new()) == NULL)
1633	goto err;
1634	/*
1635	* if the generator is the standard one, use built-in precomputation
1636	*/
1637	if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
1638	memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
1639	goto done;
1640	}
1641	if ((!BN_to_felem(pre->g_pre_comp[0][1][0], group->generator->X)) \|\|
1642	(!BN_to_felem(pre->g_pre_comp[0][1][1], group->generator->Y)) \|\|
1643	(!BN_to_felem(pre->g_pre_comp[0][1][2], group->generator->Z)))
1644	goto err;
1645	/*
1646	* compute 2^56G, 2^112G, 2^168G for the first table, 2^28G, 2^84*G,
1647	* 2^140G, 2^196G for the second one
1648	*/
1649	for (i = 1; i <= 8; i <<= 1) {
1650	point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1651	pre->g_pre_comp[1][i][2], pre->g_pre_comp[0][i][0],
1652	pre->g_pre_comp[0][i][1], pre->g_pre_comp[0][i][2]);
1653	for (j = 0; j < 27; ++j) {
1654	point_double(pre->g_pre_comp[1][i][0], pre->g_pre_comp[1][i][1],
1655	pre->g_pre_comp[1][i][2], pre->g_pre_comp[1][i][0],
1656	pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1657	}
1658	if (i == 8)
1659	break;
1660	point_double(pre->g_pre_comp[0][2 * i][0],
1661	pre->g_pre_comp[0][2 * i][1],
1662	pre->g_pre_comp[0][2 * i][2], pre->g_pre_comp[1][i][0],
1663	pre->g_pre_comp[1][i][1], pre->g_pre_comp[1][i][2]);
1664	for (j = 0; j < 27; ++j) {
1665	point_double(pre->g_pre_comp[0][2 * i][0],
1666	pre->g_pre_comp[0][2 * i][1],
1667	pre->g_pre_comp[0][2 * i][2],
1668	pre->g_pre_comp[0][2 * i][0],
1669	pre->g_pre_comp[0][2 * i][1],
1670	pre->g_pre_comp[0][2 * i][2]);
1671	}
1672	}
1673	for (i = 0; i < 2; i++) {
1674	/* g_pre_comp[i][0] is the point at infinity */
1675	memset(pre->g_pre_comp[i][0], 0, sizeof(pre->g_pre_comp[i][0]));
1676	/* the remaining multiples */
1677	/* 2^56G + 2^112G resp. 2^84G + 2^140G */
1678	point_add(pre->g_pre_comp[i][6][0], pre->g_pre_comp[i][6][1],
1679	pre->g_pre_comp[i][6][2], pre->g_pre_comp[i][4][0],
1680	pre->g_pre_comp[i][4][1], pre->g_pre_comp[i][4][2],
1681	0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1682	pre->g_pre_comp[i][2][2]);
1683	/* 2^56G + 2^168G resp. 2^84G + 2^196G */
1684	point_add(pre->g_pre_comp[i][10][0], pre->g_pre_comp[i][10][1],
1685	pre->g_pre_comp[i][10][2], pre->g_pre_comp[i][8][0],
1686	pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1687	0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1688	pre->g_pre_comp[i][2][2]);
1689	/* 2^112G + 2^168G resp. 2^140G + 2^196G */
1690	point_add(pre->g_pre_comp[i][12][0], pre->g_pre_comp[i][12][1],
1691	pre->g_pre_comp[i][12][2], pre->g_pre_comp[i][8][0],
1692	pre->g_pre_comp[i][8][1], pre->g_pre_comp[i][8][2],
1693	0, pre->g_pre_comp[i][4][0], pre->g_pre_comp[i][4][1],
1694	pre->g_pre_comp[i][4][2]);
1695	/*
1696	* 2^56G + 2^112G + 2^168G resp. 2^84G + 2^140G + 2^196G
1697	*/
1698	point_add(pre->g_pre_comp[i][14][0], pre->g_pre_comp[i][14][1],
1699	pre->g_pre_comp[i][14][2], pre->g_pre_comp[i][12][0],
1700	pre->g_pre_comp[i][12][1], pre->g_pre_comp[i][12][2],
1701	0, pre->g_pre_comp[i][2][0], pre->g_pre_comp[i][2][1],
1702	pre->g_pre_comp[i][2][2]);
1703	for (j = 1; j < 8; ++j) {
1704	/* odd multiples: add G resp. 2^28G /
1705	point_add(pre->g_pre_comp[i][2 * j + 1][0],
1706	pre->g_pre_comp[i][2 * j + 1][1],
1707	pre->g_pre_comp[i][2 * j + 1][2],
1708	pre->g_pre_comp[i][2 * j][0],
1709	pre->g_pre_comp[i][2 * j][1],
1710	pre->g_pre_comp[i][2 * j][2], 0,
1711	pre->g_pre_comp[i][1][0], pre->g_pre_comp[i][1][1],
1712	pre->g_pre_comp[i][1][2]);
1713	}
1714	}
1715	make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
1716
1717	done:
1718	SETPRECOMP(group, nistp224, pre);
1719	pre = NULL;
1720	ret = 1;
1721	err:
1722	BN_CTX_end(ctx);
1723	EC_POINT_free(generator);
1724	BN_CTX_free(new_ctx);
1725	EC_nistp224_pre_comp_free(pre);
1726	return ret;
1727	}
1728
1729	int ec_GFp_nistp224_have_precompute_mult(const EC_GROUP *group)
1730	{
1731	return HAVEPRECOMP(group, nistp224);
1732	}
1733
1734	#endif

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/libs/openssl-1.1.1j/crypto/ec/ecp_nistp224.c@ 88461

Download in other formats: