1 | /*
|
---|
2 | * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
|
---|
3 | *
|
---|
4 | * Licensed under the Apache License 2.0 (the "License"). You may not use
|
---|
5 | * this file except in compliance with the License. You can obtain a copy
|
---|
6 | * in the file LICENSE in the source distribution or at
|
---|
7 | * https://www.openssl.org/source/license.html
|
---|
8 | */
|
---|
9 |
|
---|
10 | #include <assert.h>
|
---|
11 | #include <openssl/crypto.h>
|
---|
12 | #include "internal/cryptlib.h"
|
---|
13 | #include "bn_local.h"
|
---|
14 |
|
---|
15 | #if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
|
---|
16 |
|
---|
17 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
|
---|
18 | BN_ULONG w)
|
---|
19 | {
|
---|
20 | BN_ULONG c1 = 0;
|
---|
21 |
|
---|
22 | assert(num >= 0);
|
---|
23 | if (num <= 0)
|
---|
24 | return c1;
|
---|
25 |
|
---|
26 | # ifndef OPENSSL_SMALL_FOOTPRINT
|
---|
27 | while (num & ~3) {
|
---|
28 | mul_add(rp[0], ap[0], w, c1);
|
---|
29 | mul_add(rp[1], ap[1], w, c1);
|
---|
30 | mul_add(rp[2], ap[2], w, c1);
|
---|
31 | mul_add(rp[3], ap[3], w, c1);
|
---|
32 | ap += 4;
|
---|
33 | rp += 4;
|
---|
34 | num -= 4;
|
---|
35 | }
|
---|
36 | # endif
|
---|
37 | while (num) {
|
---|
38 | mul_add(rp[0], ap[0], w, c1);
|
---|
39 | ap++;
|
---|
40 | rp++;
|
---|
41 | num--;
|
---|
42 | }
|
---|
43 |
|
---|
44 | return c1;
|
---|
45 | }
|
---|
46 |
|
---|
47 | BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
|
---|
48 | {
|
---|
49 | BN_ULONG c1 = 0;
|
---|
50 |
|
---|
51 | assert(num >= 0);
|
---|
52 | if (num <= 0)
|
---|
53 | return c1;
|
---|
54 |
|
---|
55 | # ifndef OPENSSL_SMALL_FOOTPRINT
|
---|
56 | while (num & ~3) {
|
---|
57 | mul(rp[0], ap[0], w, c1);
|
---|
58 | mul(rp[1], ap[1], w, c1);
|
---|
59 | mul(rp[2], ap[2], w, c1);
|
---|
60 | mul(rp[3], ap[3], w, c1);
|
---|
61 | ap += 4;
|
---|
62 | rp += 4;
|
---|
63 | num -= 4;
|
---|
64 | }
|
---|
65 | # endif
|
---|
66 | while (num) {
|
---|
67 | mul(rp[0], ap[0], w, c1);
|
---|
68 | ap++;
|
---|
69 | rp++;
|
---|
70 | num--;
|
---|
71 | }
|
---|
72 | return c1;
|
---|
73 | }
|
---|
74 |
|
---|
75 | void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
|
---|
76 | {
|
---|
77 | assert(n >= 0);
|
---|
78 | if (n <= 0)
|
---|
79 | return;
|
---|
80 |
|
---|
81 | # ifndef OPENSSL_SMALL_FOOTPRINT
|
---|
82 | while (n & ~3) {
|
---|
83 | sqr(r[0], r[1], a[0]);
|
---|
84 | sqr(r[2], r[3], a[1]);
|
---|
85 | sqr(r[4], r[5], a[2]);
|
---|
86 | sqr(r[6], r[7], a[3]);
|
---|
87 | a += 4;
|
---|
88 | r += 8;
|
---|
89 | n -= 4;
|
---|
90 | }
|
---|
91 | # endif
|
---|
92 | while (n) {
|
---|
93 | sqr(r[0], r[1], a[0]);
|
---|
94 | a++;
|
---|
95 | r += 2;
|
---|
96 | n--;
|
---|
97 | }
|
---|
98 | }
|
---|
99 |
|
---|
100 | #else /* !(defined(BN_LLONG) ||
|
---|
101 | * defined(BN_UMULT_HIGH)) */
|
---|
102 |
|
---|
103 | BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
|
---|
104 | BN_ULONG w)
|
---|
105 | {
|
---|
106 | BN_ULONG c = 0;
|
---|
107 | BN_ULONG bl, bh;
|
---|
108 |
|
---|
109 | assert(num >= 0);
|
---|
110 | if (num <= 0)
|
---|
111 | return (BN_ULONG)0;
|
---|
112 |
|
---|
113 | bl = LBITS(w);
|
---|
114 | bh = HBITS(w);
|
---|
115 |
|
---|
116 | # ifndef OPENSSL_SMALL_FOOTPRINT
|
---|
117 | while (num & ~3) {
|
---|
118 | mul_add(rp[0], ap[0], bl, bh, c);
|
---|
119 | mul_add(rp[1], ap[1], bl, bh, c);
|
---|
120 | mul_add(rp[2], ap[2], bl, bh, c);
|
---|
121 | mul_add(rp[3], ap[3], bl, bh, c);
|
---|
122 | ap += 4;
|
---|
123 | rp += 4;
|
---|
124 | num -= 4;
|
---|
125 | }
|
---|
126 | # endif
|
---|
127 | while (num) {
|
---|
128 | mul_add(rp[0], ap[0], bl, bh, c);
|
---|
129 | ap++;
|
---|
130 | rp++;
|
---|
131 | num--;
|
---|
132 | }
|
---|
133 | return c;
|
---|
134 | }
|
---|
135 |
|
---|
136 | BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
|
---|
137 | {
|
---|
138 | BN_ULONG carry = 0;
|
---|
139 | BN_ULONG bl, bh;
|
---|
140 |
|
---|
141 | assert(num >= 0);
|
---|
142 | if (num <= 0)
|
---|
143 | return (BN_ULONG)0;
|
---|
144 |
|
---|
145 | bl = LBITS(w);
|
---|
146 | bh = HBITS(w);
|
---|
147 |
|
---|
148 | # ifndef OPENSSL_SMALL_FOOTPRINT
|
---|
149 | while (num & ~3) {
|
---|
150 | mul(rp[0], ap[0], bl, bh, carry);
|
---|
151 | mul(rp[1], ap[1], bl, bh, carry);
|
---|
152 | mul(rp[2], ap[2], bl, bh, carry);
|
---|
153 | mul(rp[3], ap[3], bl, bh, carry);
|
---|
154 | ap += 4;
|
---|
155 | rp += 4;
|
---|
156 | num -= 4;
|
---|
157 | }
|
---|
158 | # endif
|
---|
159 | while (num) {
|
---|
160 | mul(rp[0], ap[0], bl, bh, carry);
|
---|
161 | ap++;
|
---|
162 | rp++;
|
---|
163 | num--;
|
---|
164 | }
|
---|
165 | return carry;
|
---|
166 | }
|
---|
167 |
|
---|
168 | void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
|
---|
169 | {
|
---|
170 | assert(n >= 0);
|
---|
171 | if (n <= 0)
|
---|
172 | return;
|
---|
173 |
|
---|
174 | # ifndef OPENSSL_SMALL_FOOTPRINT
|
---|
175 | while (n & ~3) {
|
---|
176 | sqr64(r[0], r[1], a[0]);
|
---|
177 | sqr64(r[2], r[3], a[1]);
|
---|
178 | sqr64(r[4], r[5], a[2]);
|
---|
179 | sqr64(r[6], r[7], a[3]);
|
---|
180 | a += 4;
|
---|
181 | r += 8;
|
---|
182 | n -= 4;
|
---|
183 | }
|
---|
184 | # endif
|
---|
185 | while (n) {
|
---|
186 | sqr64(r[0], r[1], a[0]);
|
---|
187 | a++;
|
---|
188 | r += 2;
|
---|
189 | n--;
|
---|
190 | }
|
---|
191 | }
|
---|
192 |
|
---|
193 | #endif /* !(defined(BN_LLONG) ||
|
---|
194 | * defined(BN_UMULT_HIGH)) */
|
---|
195 |
|
---|
196 | #if defined(BN_LLONG) && defined(BN_DIV2W)
|
---|
197 |
|
---|
198 | BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
|
---|
199 | {
|
---|
200 | return ((BN_ULONG)(((((BN_ULLONG) h) << BN_BITS2) | l) / (BN_ULLONG) d));
|
---|
201 | }
|
---|
202 |
|
---|
203 | #else
|
---|
204 |
|
---|
205 | /* Divide h,l by d and return the result. */
|
---|
206 | /* I need to test this some more :-( */
|
---|
207 | BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
|
---|
208 | {
|
---|
209 | BN_ULONG dh, dl, q, ret = 0, th, tl, t;
|
---|
210 | int i, count = 2;
|
---|
211 |
|
---|
212 | if (d == 0)
|
---|
213 | return BN_MASK2;
|
---|
214 |
|
---|
215 | i = BN_num_bits_word(d);
|
---|
216 | assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i));
|
---|
217 |
|
---|
218 | i = BN_BITS2 - i;
|
---|
219 | if (h >= d)
|
---|
220 | h -= d;
|
---|
221 |
|
---|
222 | if (i) {
|
---|
223 | d <<= i;
|
---|
224 | h = (h << i) | (l >> (BN_BITS2 - i));
|
---|
225 | l <<= i;
|
---|
226 | }
|
---|
227 | dh = (d & BN_MASK2h) >> BN_BITS4;
|
---|
228 | dl = (d & BN_MASK2l);
|
---|
229 | for (;;) {
|
---|
230 | if ((h >> BN_BITS4) == dh)
|
---|
231 | q = BN_MASK2l;
|
---|
232 | else
|
---|
233 | q = h / dh;
|
---|
234 |
|
---|
235 | th = q * dh;
|
---|
236 | tl = dl * q;
|
---|
237 | for (;;) {
|
---|
238 | t = h - th;
|
---|
239 | if ((t & BN_MASK2h) ||
|
---|
240 | ((tl) <= ((t << BN_BITS4) | ((l & BN_MASK2h) >> BN_BITS4))))
|
---|
241 | break;
|
---|
242 | q--;
|
---|
243 | th -= dh;
|
---|
244 | tl -= dl;
|
---|
245 | }
|
---|
246 | t = (tl >> BN_BITS4);
|
---|
247 | tl = (tl << BN_BITS4) & BN_MASK2h;
|
---|
248 | th += t;
|
---|
249 |
|
---|
250 | if (l < tl)
|
---|
251 | th++;
|
---|
252 | l -= tl;
|
---|
253 | if (h < th) {
|
---|
254 | h += d;
|
---|
255 | q--;
|
---|
256 | }
|
---|
257 | h -= th;
|
---|
258 |
|
---|
259 | if (--count == 0)
|
---|
260 | break;
|
---|
261 |
|
---|
262 | ret = q << BN_BITS4;
|
---|
263 | h = ((h << BN_BITS4) | (l >> BN_BITS4)) & BN_MASK2;
|
---|
264 | l = (l & BN_MASK2l) << BN_BITS4;
|
---|
265 | }
|
---|
266 | ret |= q;
|
---|
267 | return ret;
|
---|
268 | }
|
---|
269 | #endif /* !defined(BN_LLONG) && defined(BN_DIV2W) */
|
---|
270 |
|
---|
271 | #ifdef BN_LLONG
|
---|
272 | BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
|
---|
273 | int n)
|
---|
274 | {
|
---|
275 | BN_ULLONG ll = 0;
|
---|
276 |
|
---|
277 | assert(n >= 0);
|
---|
278 | if (n <= 0)
|
---|
279 | return (BN_ULONG)0;
|
---|
280 |
|
---|
281 | # ifndef OPENSSL_SMALL_FOOTPRINT
|
---|
282 | while (n & ~3) {
|
---|
283 | ll += (BN_ULLONG) a[0] + b[0];
|
---|
284 | r[0] = (BN_ULONG)ll & BN_MASK2;
|
---|
285 | ll >>= BN_BITS2;
|
---|
286 | ll += (BN_ULLONG) a[1] + b[1];
|
---|
287 | r[1] = (BN_ULONG)ll & BN_MASK2;
|
---|
288 | ll >>= BN_BITS2;
|
---|
289 | ll += (BN_ULLONG) a[2] + b[2];
|
---|
290 | r[2] = (BN_ULONG)ll & BN_MASK2;
|
---|
291 | ll >>= BN_BITS2;
|
---|
292 | ll += (BN_ULLONG) a[3] + b[3];
|
---|
293 | r[3] = (BN_ULONG)ll & BN_MASK2;
|
---|
294 | ll >>= BN_BITS2;
|
---|
295 | a += 4;
|
---|
296 | b += 4;
|
---|
297 | r += 4;
|
---|
298 | n -= 4;
|
---|
299 | }
|
---|
300 | # endif
|
---|
301 | while (n) {
|
---|
302 | ll += (BN_ULLONG) a[0] + b[0];
|
---|
303 | r[0] = (BN_ULONG)ll & BN_MASK2;
|
---|
304 | ll >>= BN_BITS2;
|
---|
305 | a++;
|
---|
306 | b++;
|
---|
307 | r++;
|
---|
308 | n--;
|
---|
309 | }
|
---|
310 | return (BN_ULONG)ll;
|
---|
311 | }
|
---|
312 | #else /* !BN_LLONG */
|
---|
313 | BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
|
---|
314 | int n)
|
---|
315 | {
|
---|
316 | BN_ULONG c, l, t;
|
---|
317 |
|
---|
318 | assert(n >= 0);
|
---|
319 | if (n <= 0)
|
---|
320 | return (BN_ULONG)0;
|
---|
321 |
|
---|
322 | c = 0;
|
---|
323 | # ifndef OPENSSL_SMALL_FOOTPRINT
|
---|
324 | while (n & ~3) {
|
---|
325 | t = a[0];
|
---|
326 | t = (t + c) & BN_MASK2;
|
---|
327 | c = (t < c);
|
---|
328 | l = (t + b[0]) & BN_MASK2;
|
---|
329 | c += (l < t);
|
---|
330 | r[0] = l;
|
---|
331 | t = a[1];
|
---|
332 | t = (t + c) & BN_MASK2;
|
---|
333 | c = (t < c);
|
---|
334 | l = (t + b[1]) & BN_MASK2;
|
---|
335 | c += (l < t);
|
---|
336 | r[1] = l;
|
---|
337 | t = a[2];
|
---|
338 | t = (t + c) & BN_MASK2;
|
---|
339 | c = (t < c);
|
---|
340 | l = (t + b[2]) & BN_MASK2;
|
---|
341 | c += (l < t);
|
---|
342 | r[2] = l;
|
---|
343 | t = a[3];
|
---|
344 | t = (t + c) & BN_MASK2;
|
---|
345 | c = (t < c);
|
---|
346 | l = (t + b[3]) & BN_MASK2;
|
---|
347 | c += (l < t);
|
---|
348 | r[3] = l;
|
---|
349 | a += 4;
|
---|
350 | b += 4;
|
---|
351 | r += 4;
|
---|
352 | n -= 4;
|
---|
353 | }
|
---|
354 | # endif
|
---|
355 | while (n) {
|
---|
356 | t = a[0];
|
---|
357 | t = (t + c) & BN_MASK2;
|
---|
358 | c = (t < c);
|
---|
359 | l = (t + b[0]) & BN_MASK2;
|
---|
360 | c += (l < t);
|
---|
361 | r[0] = l;
|
---|
362 | a++;
|
---|
363 | b++;
|
---|
364 | r++;
|
---|
365 | n--;
|
---|
366 | }
|
---|
367 | return (BN_ULONG)c;
|
---|
368 | }
|
---|
369 | #endif /* !BN_LLONG */
|
---|
370 |
|
---|
371 | BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
|
---|
372 | int n)
|
---|
373 | {
|
---|
374 | BN_ULONG t1, t2;
|
---|
375 | int c = 0;
|
---|
376 |
|
---|
377 | assert(n >= 0);
|
---|
378 | if (n <= 0)
|
---|
379 | return (BN_ULONG)0;
|
---|
380 |
|
---|
381 | #ifndef OPENSSL_SMALL_FOOTPRINT
|
---|
382 | while (n & ~3) {
|
---|
383 | t1 = a[0];
|
---|
384 | t2 = b[0];
|
---|
385 | r[0] = (t1 - t2 - c) & BN_MASK2;
|
---|
386 | if (t1 != t2)
|
---|
387 | c = (t1 < t2);
|
---|
388 | t1 = a[1];
|
---|
389 | t2 = b[1];
|
---|
390 | r[1] = (t1 - t2 - c) & BN_MASK2;
|
---|
391 | if (t1 != t2)
|
---|
392 | c = (t1 < t2);
|
---|
393 | t1 = a[2];
|
---|
394 | t2 = b[2];
|
---|
395 | r[2] = (t1 - t2 - c) & BN_MASK2;
|
---|
396 | if (t1 != t2)
|
---|
397 | c = (t1 < t2);
|
---|
398 | t1 = a[3];
|
---|
399 | t2 = b[3];
|
---|
400 | r[3] = (t1 - t2 - c) & BN_MASK2;
|
---|
401 | if (t1 != t2)
|
---|
402 | c = (t1 < t2);
|
---|
403 | a += 4;
|
---|
404 | b += 4;
|
---|
405 | r += 4;
|
---|
406 | n -= 4;
|
---|
407 | }
|
---|
408 | #endif
|
---|
409 | while (n) {
|
---|
410 | t1 = a[0];
|
---|
411 | t2 = b[0];
|
---|
412 | r[0] = (t1 - t2 - c) & BN_MASK2;
|
---|
413 | if (t1 != t2)
|
---|
414 | c = (t1 < t2);
|
---|
415 | a++;
|
---|
416 | b++;
|
---|
417 | r++;
|
---|
418 | n--;
|
---|
419 | }
|
---|
420 | return c;
|
---|
421 | }
|
---|
422 |
|
---|
423 | #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT)
|
---|
424 |
|
---|
425 | #ifndef ___openssl_mangling_h___ /* bird */
|
---|
426 | # undef bn_mul_comba8
|
---|
427 | # undef bn_mul_comba4
|
---|
428 | # undef bn_sqr_comba8
|
---|
429 | # undef bn_sqr_comba4
|
---|
430 | #endif /* !___openssl_mangling_h___*/ /* bird */
|
---|
431 |
|
---|
432 | /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
|
---|
433 | /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
|
---|
434 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
|
---|
435 | /*
|
---|
436 | * sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number
|
---|
437 | * c=(c2,c1,c0)
|
---|
438 | */
|
---|
439 |
|
---|
440 | # ifdef BN_LLONG
|
---|
441 | /*
|
---|
442 | * Keep in mind that additions to multiplication result can not
|
---|
443 | * overflow, because its high half cannot be all-ones.
|
---|
444 | */
|
---|
445 | # define mul_add_c(a,b,c0,c1,c2) do { \
|
---|
446 | BN_ULONG hi; \
|
---|
447 | BN_ULLONG t = (BN_ULLONG)(a)*(b); \
|
---|
448 | t += c0; /* no carry */ \
|
---|
449 | c0 = (BN_ULONG)Lw(t); \
|
---|
450 | hi = (BN_ULONG)Hw(t); \
|
---|
451 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
|
---|
452 | } while(0)
|
---|
453 |
|
---|
454 | # define mul_add_c2(a,b,c0,c1,c2) do { \
|
---|
455 | BN_ULONG hi; \
|
---|
456 | BN_ULLONG t = (BN_ULLONG)(a)*(b); \
|
---|
457 | BN_ULLONG tt = t+c0; /* no carry */ \
|
---|
458 | c0 = (BN_ULONG)Lw(tt); \
|
---|
459 | hi = (BN_ULONG)Hw(tt); \
|
---|
460 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
|
---|
461 | t += c0; /* no carry */ \
|
---|
462 | c0 = (BN_ULONG)Lw(t); \
|
---|
463 | hi = (BN_ULONG)Hw(t); \
|
---|
464 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
|
---|
465 | } while(0)
|
---|
466 |
|
---|
467 | # define sqr_add_c(a,i,c0,c1,c2) do { \
|
---|
468 | BN_ULONG hi; \
|
---|
469 | BN_ULLONG t = (BN_ULLONG)a[i]*a[i]; \
|
---|
470 | t += c0; /* no carry */ \
|
---|
471 | c0 = (BN_ULONG)Lw(t); \
|
---|
472 | hi = (BN_ULONG)Hw(t); \
|
---|
473 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
|
---|
474 | } while(0)
|
---|
475 |
|
---|
476 | # define sqr_add_c2(a,i,j,c0,c1,c2) \
|
---|
477 | mul_add_c2((a)[i],(a)[j],c0,c1,c2)
|
---|
478 |
|
---|
479 | # elif defined(BN_UMULT_LOHI)
|
---|
480 | /*
|
---|
481 | * Keep in mind that additions to hi can not overflow, because
|
---|
482 | * the high word of a multiplication result cannot be all-ones.
|
---|
483 | */
|
---|
484 | # define mul_add_c(a,b,c0,c1,c2) do { \
|
---|
485 | BN_ULONG ta = (a), tb = (b); \
|
---|
486 | BN_ULONG lo, hi; \
|
---|
487 | BN_UMULT_LOHI(lo,hi,ta,tb); \
|
---|
488 | c0 += lo; hi += (c0<lo)?1:0; \
|
---|
489 | c1 += hi; c2 += (c1<hi)?1:0; \
|
---|
490 | } while(0)
|
---|
491 |
|
---|
492 | # define mul_add_c2(a,b,c0,c1,c2) do { \
|
---|
493 | BN_ULONG ta = (a), tb = (b); \
|
---|
494 | BN_ULONG lo, hi, tt; \
|
---|
495 | BN_UMULT_LOHI(lo,hi,ta,tb); \
|
---|
496 | c0 += lo; tt = hi+((c0<lo)?1:0); \
|
---|
497 | c1 += tt; c2 += (c1<tt)?1:0; \
|
---|
498 | c0 += lo; hi += (c0<lo)?1:0; \
|
---|
499 | c1 += hi; c2 += (c1<hi)?1:0; \
|
---|
500 | } while(0)
|
---|
501 |
|
---|
502 | # define sqr_add_c(a,i,c0,c1,c2) do { \
|
---|
503 | BN_ULONG ta = (a)[i]; \
|
---|
504 | BN_ULONG lo, hi; \
|
---|
505 | BN_UMULT_LOHI(lo,hi,ta,ta); \
|
---|
506 | c0 += lo; hi += (c0<lo)?1:0; \
|
---|
507 | c1 += hi; c2 += (c1<hi)?1:0; \
|
---|
508 | } while(0)
|
---|
509 |
|
---|
510 | # define sqr_add_c2(a,i,j,c0,c1,c2) \
|
---|
511 | mul_add_c2((a)[i],(a)[j],c0,c1,c2)
|
---|
512 |
|
---|
513 | # elif defined(BN_UMULT_HIGH)
|
---|
514 | /*
|
---|
515 | * Keep in mind that additions to hi can not overflow, because
|
---|
516 | * the high word of a multiplication result cannot be all-ones.
|
---|
517 | */
|
---|
518 | # define mul_add_c(a,b,c0,c1,c2) do { \
|
---|
519 | BN_ULONG ta = (a), tb = (b); \
|
---|
520 | BN_ULONG lo = ta * tb; \
|
---|
521 | BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \
|
---|
522 | c0 += lo; hi += (c0<lo)?1:0; \
|
---|
523 | c1 += hi; c2 += (c1<hi)?1:0; \
|
---|
524 | } while(0)
|
---|
525 |
|
---|
526 | # define mul_add_c2(a,b,c0,c1,c2) do { \
|
---|
527 | BN_ULONG ta = (a), tb = (b), tt; \
|
---|
528 | BN_ULONG lo = ta * tb; \
|
---|
529 | BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \
|
---|
530 | c0 += lo; tt = hi + ((c0<lo)?1:0); \
|
---|
531 | c1 += tt; c2 += (c1<tt)?1:0; \
|
---|
532 | c0 += lo; hi += (c0<lo)?1:0; \
|
---|
533 | c1 += hi; c2 += (c1<hi)?1:0; \
|
---|
534 | } while(0)
|
---|
535 |
|
---|
536 | # define sqr_add_c(a,i,c0,c1,c2) do { \
|
---|
537 | BN_ULONG ta = (a)[i]; \
|
---|
538 | BN_ULONG lo = ta * ta; \
|
---|
539 | BN_ULONG hi = BN_UMULT_HIGH(ta,ta); \
|
---|
540 | c0 += lo; hi += (c0<lo)?1:0; \
|
---|
541 | c1 += hi; c2 += (c1<hi)?1:0; \
|
---|
542 | } while(0)
|
---|
543 |
|
---|
544 | # define sqr_add_c2(a,i,j,c0,c1,c2) \
|
---|
545 | mul_add_c2((a)[i],(a)[j],c0,c1,c2)
|
---|
546 |
|
---|
547 | # else /* !BN_LLONG */
|
---|
548 | /*
|
---|
549 | * Keep in mind that additions to hi can not overflow, because
|
---|
550 | * the high word of a multiplication result cannot be all-ones.
|
---|
551 | */
|
---|
552 | # define mul_add_c(a,b,c0,c1,c2) do { \
|
---|
553 | BN_ULONG lo = LBITS(a), hi = HBITS(a); \
|
---|
554 | BN_ULONG bl = LBITS(b), bh = HBITS(b); \
|
---|
555 | mul64(lo,hi,bl,bh); \
|
---|
556 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
|
---|
557 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
|
---|
558 | } while(0)
|
---|
559 |
|
---|
560 | # define mul_add_c2(a,b,c0,c1,c2) do { \
|
---|
561 | BN_ULONG tt; \
|
---|
562 | BN_ULONG lo = LBITS(a), hi = HBITS(a); \
|
---|
563 | BN_ULONG bl = LBITS(b), bh = HBITS(b); \
|
---|
564 | mul64(lo,hi,bl,bh); \
|
---|
565 | tt = hi; \
|
---|
566 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \
|
---|
567 | c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \
|
---|
568 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
|
---|
569 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
|
---|
570 | } while(0)
|
---|
571 |
|
---|
572 | # define sqr_add_c(a,i,c0,c1,c2) do { \
|
---|
573 | BN_ULONG lo, hi; \
|
---|
574 | sqr64(lo,hi,(a)[i]); \
|
---|
575 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
|
---|
576 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
|
---|
577 | } while(0)
|
---|
578 |
|
---|
579 | # define sqr_add_c2(a,i,j,c0,c1,c2) \
|
---|
580 | mul_add_c2((a)[i],(a)[j],c0,c1,c2)
|
---|
581 | # endif /* !BN_LLONG */
|
---|
582 |
|
---|
583 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
|
---|
584 | {
|
---|
585 | BN_ULONG c1, c2, c3;
|
---|
586 |
|
---|
587 | c1 = 0;
|
---|
588 | c2 = 0;
|
---|
589 | c3 = 0;
|
---|
590 | mul_add_c(a[0], b[0], c1, c2, c3);
|
---|
591 | r[0] = c1;
|
---|
592 | c1 = 0;
|
---|
593 | mul_add_c(a[0], b[1], c2, c3, c1);
|
---|
594 | mul_add_c(a[1], b[0], c2, c3, c1);
|
---|
595 | r[1] = c2;
|
---|
596 | c2 = 0;
|
---|
597 | mul_add_c(a[2], b[0], c3, c1, c2);
|
---|
598 | mul_add_c(a[1], b[1], c3, c1, c2);
|
---|
599 | mul_add_c(a[0], b[2], c3, c1, c2);
|
---|
600 | r[2] = c3;
|
---|
601 | c3 = 0;
|
---|
602 | mul_add_c(a[0], b[3], c1, c2, c3);
|
---|
603 | mul_add_c(a[1], b[2], c1, c2, c3);
|
---|
604 | mul_add_c(a[2], b[1], c1, c2, c3);
|
---|
605 | mul_add_c(a[3], b[0], c1, c2, c3);
|
---|
606 | r[3] = c1;
|
---|
607 | c1 = 0;
|
---|
608 | mul_add_c(a[4], b[0], c2, c3, c1);
|
---|
609 | mul_add_c(a[3], b[1], c2, c3, c1);
|
---|
610 | mul_add_c(a[2], b[2], c2, c3, c1);
|
---|
611 | mul_add_c(a[1], b[3], c2, c3, c1);
|
---|
612 | mul_add_c(a[0], b[4], c2, c3, c1);
|
---|
613 | r[4] = c2;
|
---|
614 | c2 = 0;
|
---|
615 | mul_add_c(a[0], b[5], c3, c1, c2);
|
---|
616 | mul_add_c(a[1], b[4], c3, c1, c2);
|
---|
617 | mul_add_c(a[2], b[3], c3, c1, c2);
|
---|
618 | mul_add_c(a[3], b[2], c3, c1, c2);
|
---|
619 | mul_add_c(a[4], b[1], c3, c1, c2);
|
---|
620 | mul_add_c(a[5], b[0], c3, c1, c2);
|
---|
621 | r[5] = c3;
|
---|
622 | c3 = 0;
|
---|
623 | mul_add_c(a[6], b[0], c1, c2, c3);
|
---|
624 | mul_add_c(a[5], b[1], c1, c2, c3);
|
---|
625 | mul_add_c(a[4], b[2], c1, c2, c3);
|
---|
626 | mul_add_c(a[3], b[3], c1, c2, c3);
|
---|
627 | mul_add_c(a[2], b[4], c1, c2, c3);
|
---|
628 | mul_add_c(a[1], b[5], c1, c2, c3);
|
---|
629 | mul_add_c(a[0], b[6], c1, c2, c3);
|
---|
630 | r[6] = c1;
|
---|
631 | c1 = 0;
|
---|
632 | mul_add_c(a[0], b[7], c2, c3, c1);
|
---|
633 | mul_add_c(a[1], b[6], c2, c3, c1);
|
---|
634 | mul_add_c(a[2], b[5], c2, c3, c1);
|
---|
635 | mul_add_c(a[3], b[4], c2, c3, c1);
|
---|
636 | mul_add_c(a[4], b[3], c2, c3, c1);
|
---|
637 | mul_add_c(a[5], b[2], c2, c3, c1);
|
---|
638 | mul_add_c(a[6], b[1], c2, c3, c1);
|
---|
639 | mul_add_c(a[7], b[0], c2, c3, c1);
|
---|
640 | r[7] = c2;
|
---|
641 | c2 = 0;
|
---|
642 | mul_add_c(a[7], b[1], c3, c1, c2);
|
---|
643 | mul_add_c(a[6], b[2], c3, c1, c2);
|
---|
644 | mul_add_c(a[5], b[3], c3, c1, c2);
|
---|
645 | mul_add_c(a[4], b[4], c3, c1, c2);
|
---|
646 | mul_add_c(a[3], b[5], c3, c1, c2);
|
---|
647 | mul_add_c(a[2], b[6], c3, c1, c2);
|
---|
648 | mul_add_c(a[1], b[7], c3, c1, c2);
|
---|
649 | r[8] = c3;
|
---|
650 | c3 = 0;
|
---|
651 | mul_add_c(a[2], b[7], c1, c2, c3);
|
---|
652 | mul_add_c(a[3], b[6], c1, c2, c3);
|
---|
653 | mul_add_c(a[4], b[5], c1, c2, c3);
|
---|
654 | mul_add_c(a[5], b[4], c1, c2, c3);
|
---|
655 | mul_add_c(a[6], b[3], c1, c2, c3);
|
---|
656 | mul_add_c(a[7], b[2], c1, c2, c3);
|
---|
657 | r[9] = c1;
|
---|
658 | c1 = 0;
|
---|
659 | mul_add_c(a[7], b[3], c2, c3, c1);
|
---|
660 | mul_add_c(a[6], b[4], c2, c3, c1);
|
---|
661 | mul_add_c(a[5], b[5], c2, c3, c1);
|
---|
662 | mul_add_c(a[4], b[6], c2, c3, c1);
|
---|
663 | mul_add_c(a[3], b[7], c2, c3, c1);
|
---|
664 | r[10] = c2;
|
---|
665 | c2 = 0;
|
---|
666 | mul_add_c(a[4], b[7], c3, c1, c2);
|
---|
667 | mul_add_c(a[5], b[6], c3, c1, c2);
|
---|
668 | mul_add_c(a[6], b[5], c3, c1, c2);
|
---|
669 | mul_add_c(a[7], b[4], c3, c1, c2);
|
---|
670 | r[11] = c3;
|
---|
671 | c3 = 0;
|
---|
672 | mul_add_c(a[7], b[5], c1, c2, c3);
|
---|
673 | mul_add_c(a[6], b[6], c1, c2, c3);
|
---|
674 | mul_add_c(a[5], b[7], c1, c2, c3);
|
---|
675 | r[12] = c1;
|
---|
676 | c1 = 0;
|
---|
677 | mul_add_c(a[6], b[7], c2, c3, c1);
|
---|
678 | mul_add_c(a[7], b[6], c2, c3, c1);
|
---|
679 | r[13] = c2;
|
---|
680 | c2 = 0;
|
---|
681 | mul_add_c(a[7], b[7], c3, c1, c2);
|
---|
682 | r[14] = c3;
|
---|
683 | r[15] = c1;
|
---|
684 | }
|
---|
685 |
|
---|
686 | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
|
---|
687 | {
|
---|
688 | BN_ULONG c1, c2, c3;
|
---|
689 |
|
---|
690 | c1 = 0;
|
---|
691 | c2 = 0;
|
---|
692 | c3 = 0;
|
---|
693 | mul_add_c(a[0], b[0], c1, c2, c3);
|
---|
694 | r[0] = c1;
|
---|
695 | c1 = 0;
|
---|
696 | mul_add_c(a[0], b[1], c2, c3, c1);
|
---|
697 | mul_add_c(a[1], b[0], c2, c3, c1);
|
---|
698 | r[1] = c2;
|
---|
699 | c2 = 0;
|
---|
700 | mul_add_c(a[2], b[0], c3, c1, c2);
|
---|
701 | mul_add_c(a[1], b[1], c3, c1, c2);
|
---|
702 | mul_add_c(a[0], b[2], c3, c1, c2);
|
---|
703 | r[2] = c3;
|
---|
704 | c3 = 0;
|
---|
705 | mul_add_c(a[0], b[3], c1, c2, c3);
|
---|
706 | mul_add_c(a[1], b[2], c1, c2, c3);
|
---|
707 | mul_add_c(a[2], b[1], c1, c2, c3);
|
---|
708 | mul_add_c(a[3], b[0], c1, c2, c3);
|
---|
709 | r[3] = c1;
|
---|
710 | c1 = 0;
|
---|
711 | mul_add_c(a[3], b[1], c2, c3, c1);
|
---|
712 | mul_add_c(a[2], b[2], c2, c3, c1);
|
---|
713 | mul_add_c(a[1], b[3], c2, c3, c1);
|
---|
714 | r[4] = c2;
|
---|
715 | c2 = 0;
|
---|
716 | mul_add_c(a[2], b[3], c3, c1, c2);
|
---|
717 | mul_add_c(a[3], b[2], c3, c1, c2);
|
---|
718 | r[5] = c3;
|
---|
719 | c3 = 0;
|
---|
720 | mul_add_c(a[3], b[3], c1, c2, c3);
|
---|
721 | r[6] = c1;
|
---|
722 | r[7] = c2;
|
---|
723 | }
|
---|
724 |
|
---|
725 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
|
---|
726 | {
|
---|
727 | BN_ULONG c1, c2, c3;
|
---|
728 |
|
---|
729 | c1 = 0;
|
---|
730 | c2 = 0;
|
---|
731 | c3 = 0;
|
---|
732 | sqr_add_c(a, 0, c1, c2, c3);
|
---|
733 | r[0] = c1;
|
---|
734 | c1 = 0;
|
---|
735 | sqr_add_c2(a, 1, 0, c2, c3, c1);
|
---|
736 | r[1] = c2;
|
---|
737 | c2 = 0;
|
---|
738 | sqr_add_c(a, 1, c3, c1, c2);
|
---|
739 | sqr_add_c2(a, 2, 0, c3, c1, c2);
|
---|
740 | r[2] = c3;
|
---|
741 | c3 = 0;
|
---|
742 | sqr_add_c2(a, 3, 0, c1, c2, c3);
|
---|
743 | sqr_add_c2(a, 2, 1, c1, c2, c3);
|
---|
744 | r[3] = c1;
|
---|
745 | c1 = 0;
|
---|
746 | sqr_add_c(a, 2, c2, c3, c1);
|
---|
747 | sqr_add_c2(a, 3, 1, c2, c3, c1);
|
---|
748 | sqr_add_c2(a, 4, 0, c2, c3, c1);
|
---|
749 | r[4] = c2;
|
---|
750 | c2 = 0;
|
---|
751 | sqr_add_c2(a, 5, 0, c3, c1, c2);
|
---|
752 | sqr_add_c2(a, 4, 1, c3, c1, c2);
|
---|
753 | sqr_add_c2(a, 3, 2, c3, c1, c2);
|
---|
754 | r[5] = c3;
|
---|
755 | c3 = 0;
|
---|
756 | sqr_add_c(a, 3, c1, c2, c3);
|
---|
757 | sqr_add_c2(a, 4, 2, c1, c2, c3);
|
---|
758 | sqr_add_c2(a, 5, 1, c1, c2, c3);
|
---|
759 | sqr_add_c2(a, 6, 0, c1, c2, c3);
|
---|
760 | r[6] = c1;
|
---|
761 | c1 = 0;
|
---|
762 | sqr_add_c2(a, 7, 0, c2, c3, c1);
|
---|
763 | sqr_add_c2(a, 6, 1, c2, c3, c1);
|
---|
764 | sqr_add_c2(a, 5, 2, c2, c3, c1);
|
---|
765 | sqr_add_c2(a, 4, 3, c2, c3, c1);
|
---|
766 | r[7] = c2;
|
---|
767 | c2 = 0;
|
---|
768 | sqr_add_c(a, 4, c3, c1, c2);
|
---|
769 | sqr_add_c2(a, 5, 3, c3, c1, c2);
|
---|
770 | sqr_add_c2(a, 6, 2, c3, c1, c2);
|
---|
771 | sqr_add_c2(a, 7, 1, c3, c1, c2);
|
---|
772 | r[8] = c3;
|
---|
773 | c3 = 0;
|
---|
774 | sqr_add_c2(a, 7, 2, c1, c2, c3);
|
---|
775 | sqr_add_c2(a, 6, 3, c1, c2, c3);
|
---|
776 | sqr_add_c2(a, 5, 4, c1, c2, c3);
|
---|
777 | r[9] = c1;
|
---|
778 | c1 = 0;
|
---|
779 | sqr_add_c(a, 5, c2, c3, c1);
|
---|
780 | sqr_add_c2(a, 6, 4, c2, c3, c1);
|
---|
781 | sqr_add_c2(a, 7, 3, c2, c3, c1);
|
---|
782 | r[10] = c2;
|
---|
783 | c2 = 0;
|
---|
784 | sqr_add_c2(a, 7, 4, c3, c1, c2);
|
---|
785 | sqr_add_c2(a, 6, 5, c3, c1, c2);
|
---|
786 | r[11] = c3;
|
---|
787 | c3 = 0;
|
---|
788 | sqr_add_c(a, 6, c1, c2, c3);
|
---|
789 | sqr_add_c2(a, 7, 5, c1, c2, c3);
|
---|
790 | r[12] = c1;
|
---|
791 | c1 = 0;
|
---|
792 | sqr_add_c2(a, 7, 6, c2, c3, c1);
|
---|
793 | r[13] = c2;
|
---|
794 | c2 = 0;
|
---|
795 | sqr_add_c(a, 7, c3, c1, c2);
|
---|
796 | r[14] = c3;
|
---|
797 | r[15] = c1;
|
---|
798 | }
|
---|
799 |
|
---|
800 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
|
---|
801 | {
|
---|
802 | BN_ULONG c1, c2, c3;
|
---|
803 |
|
---|
804 | c1 = 0;
|
---|
805 | c2 = 0;
|
---|
806 | c3 = 0;
|
---|
807 | sqr_add_c(a, 0, c1, c2, c3);
|
---|
808 | r[0] = c1;
|
---|
809 | c1 = 0;
|
---|
810 | sqr_add_c2(a, 1, 0, c2, c3, c1);
|
---|
811 | r[1] = c2;
|
---|
812 | c2 = 0;
|
---|
813 | sqr_add_c(a, 1, c3, c1, c2);
|
---|
814 | sqr_add_c2(a, 2, 0, c3, c1, c2);
|
---|
815 | r[2] = c3;
|
---|
816 | c3 = 0;
|
---|
817 | sqr_add_c2(a, 3, 0, c1, c2, c3);
|
---|
818 | sqr_add_c2(a, 2, 1, c1, c2, c3);
|
---|
819 | r[3] = c1;
|
---|
820 | c1 = 0;
|
---|
821 | sqr_add_c(a, 2, c2, c3, c1);
|
---|
822 | sqr_add_c2(a, 3, 1, c2, c3, c1);
|
---|
823 | r[4] = c2;
|
---|
824 | c2 = 0;
|
---|
825 | sqr_add_c2(a, 3, 2, c3, c1, c2);
|
---|
826 | r[5] = c3;
|
---|
827 | c3 = 0;
|
---|
828 | sqr_add_c(a, 3, c1, c2, c3);
|
---|
829 | r[6] = c1;
|
---|
830 | r[7] = c2;
|
---|
831 | }
|
---|
832 |
|
---|
833 | # ifdef OPENSSL_NO_ASM
|
---|
834 | # ifdef OPENSSL_BN_ASM_MONT
|
---|
835 | # include <alloca.h>
|
---|
836 | /*
|
---|
837 | * This is essentially reference implementation, which may or may not
|
---|
838 | * result in performance improvement. E.g. on IA-32 this routine was
|
---|
839 | * observed to give 40% faster rsa1024 private key operations and 10%
|
---|
840 | * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only
|
---|
841 | * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a
|
---|
842 | * reference implementation, one to be used as starting point for
|
---|
843 | * platform-specific assembler. Mentioned numbers apply to compiler
|
---|
844 | * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and
|
---|
845 | * can vary not only from platform to platform, but even for compiler
|
---|
846 | * versions. Assembler vs. assembler improvement coefficients can
|
---|
847 | * [and are known to] differ and are to be documented elsewhere.
|
---|
848 | */
|
---|
849 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
---|
850 | const BN_ULONG *np, const BN_ULONG *n0p, int num)
|
---|
851 | {
|
---|
852 | BN_ULONG c0, c1, ml, *tp, n0;
|
---|
853 | # ifdef mul64
|
---|
854 | BN_ULONG mh;
|
---|
855 | # endif
|
---|
856 | volatile BN_ULONG *vp;
|
---|
857 | int i = 0, j;
|
---|
858 |
|
---|
859 | # if 0 /* template for platform-specific
|
---|
860 | * implementation */
|
---|
861 | if (ap == bp)
|
---|
862 | return bn_sqr_mont(rp, ap, np, n0p, num);
|
---|
863 | # endif
|
---|
864 | vp = tp = alloca((num + 2) * sizeof(BN_ULONG));
|
---|
865 |
|
---|
866 | n0 = *n0p;
|
---|
867 |
|
---|
868 | c0 = 0;
|
---|
869 | ml = bp[0];
|
---|
870 | # ifdef mul64
|
---|
871 | mh = HBITS(ml);
|
---|
872 | ml = LBITS(ml);
|
---|
873 | for (j = 0; j < num; ++j)
|
---|
874 | mul(tp[j], ap[j], ml, mh, c0);
|
---|
875 | # else
|
---|
876 | for (j = 0; j < num; ++j)
|
---|
877 | mul(tp[j], ap[j], ml, c0);
|
---|
878 | # endif
|
---|
879 |
|
---|
880 | tp[num] = c0;
|
---|
881 | tp[num + 1] = 0;
|
---|
882 | goto enter;
|
---|
883 |
|
---|
884 | for (i = 0; i < num; i++) {
|
---|
885 | c0 = 0;
|
---|
886 | ml = bp[i];
|
---|
887 | # ifdef mul64
|
---|
888 | mh = HBITS(ml);
|
---|
889 | ml = LBITS(ml);
|
---|
890 | for (j = 0; j < num; ++j)
|
---|
891 | mul_add(tp[j], ap[j], ml, mh, c0);
|
---|
892 | # else
|
---|
893 | for (j = 0; j < num; ++j)
|
---|
894 | mul_add(tp[j], ap[j], ml, c0);
|
---|
895 | # endif
|
---|
896 | c1 = (tp[num] + c0) & BN_MASK2;
|
---|
897 | tp[num] = c1;
|
---|
898 | tp[num + 1] = (c1 < c0 ? 1 : 0);
|
---|
899 | enter:
|
---|
900 | c1 = tp[0];
|
---|
901 | ml = (c1 * n0) & BN_MASK2;
|
---|
902 | c0 = 0;
|
---|
903 | # ifdef mul64
|
---|
904 | mh = HBITS(ml);
|
---|
905 | ml = LBITS(ml);
|
---|
906 | mul_add(c1, np[0], ml, mh, c0);
|
---|
907 | # else
|
---|
908 | mul_add(c1, ml, np[0], c0);
|
---|
909 | # endif
|
---|
910 | for (j = 1; j < num; j++) {
|
---|
911 | c1 = tp[j];
|
---|
912 | # ifdef mul64
|
---|
913 | mul_add(c1, np[j], ml, mh, c0);
|
---|
914 | # else
|
---|
915 | mul_add(c1, ml, np[j], c0);
|
---|
916 | # endif
|
---|
917 | tp[j - 1] = c1 & BN_MASK2;
|
---|
918 | }
|
---|
919 | c1 = (tp[num] + c0) & BN_MASK2;
|
---|
920 | tp[num - 1] = c1;
|
---|
921 | tp[num] = tp[num + 1] + (c1 < c0 ? 1 : 0);
|
---|
922 | }
|
---|
923 |
|
---|
924 | if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) {
|
---|
925 | c0 = bn_sub_words(rp, tp, np, num);
|
---|
926 | if (tp[num] != 0 || c0 == 0) {
|
---|
927 | for (i = 0; i < num + 2; i++)
|
---|
928 | vp[i] = 0;
|
---|
929 | return 1;
|
---|
930 | }
|
---|
931 | }
|
---|
932 | for (i = 0; i < num; i++)
|
---|
933 | rp[i] = tp[i], vp[i] = 0;
|
---|
934 | vp[num] = 0;
|
---|
935 | vp[num + 1] = 0;
|
---|
936 | return 1;
|
---|
937 | }
|
---|
938 | # else
|
---|
939 | /*
|
---|
940 | * Return value of 0 indicates that multiplication/convolution was not
|
---|
941 | * performed to signal the caller to fall down to alternative/original
|
---|
942 | * code-path.
|
---|
943 | */
|
---|
944 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
---|
945 | const BN_ULONG *np, const BN_ULONG *n0, int num)
|
---|
946 | {
|
---|
947 | return 0;
|
---|
948 | }
|
---|
949 | # endif /* OPENSSL_BN_ASM_MONT */
|
---|
950 | # endif
|
---|
951 |
|
---|
952 | #else /* !BN_MUL_COMBA */
|
---|
953 |
|
---|
954 | /* hmm... is it faster just to do a multiply? */
|
---|
955 | #ifndef ___openssl_mangling_h___ /* bird */
|
---|
956 | # undef bn_sqr_comba4
|
---|
957 | # undef bn_sqr_comba8
|
---|
958 | #endif
|
---|
959 | void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
|
---|
960 | {
|
---|
961 | BN_ULONG t[8];
|
---|
962 | bn_sqr_normal(r, a, 4, t);
|
---|
963 | }
|
---|
964 |
|
---|
965 | void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
|
---|
966 | {
|
---|
967 | BN_ULONG t[16];
|
---|
968 | bn_sqr_normal(r, a, 8, t);
|
---|
969 | }
|
---|
970 |
|
---|
971 | void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
|
---|
972 | {
|
---|
973 | r[4] = bn_mul_words(&(r[0]), a, 4, b[0]);
|
---|
974 | r[5] = bn_mul_add_words(&(r[1]), a, 4, b[1]);
|
---|
975 | r[6] = bn_mul_add_words(&(r[2]), a, 4, b[2]);
|
---|
976 | r[7] = bn_mul_add_words(&(r[3]), a, 4, b[3]);
|
---|
977 | }
|
---|
978 |
|
---|
979 | void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
|
---|
980 | {
|
---|
981 | r[8] = bn_mul_words(&(r[0]), a, 8, b[0]);
|
---|
982 | r[9] = bn_mul_add_words(&(r[1]), a, 8, b[1]);
|
---|
983 | r[10] = bn_mul_add_words(&(r[2]), a, 8, b[2]);
|
---|
984 | r[11] = bn_mul_add_words(&(r[3]), a, 8, b[3]);
|
---|
985 | r[12] = bn_mul_add_words(&(r[4]), a, 8, b[4]);
|
---|
986 | r[13] = bn_mul_add_words(&(r[5]), a, 8, b[5]);
|
---|
987 | r[14] = bn_mul_add_words(&(r[6]), a, 8, b[6]);
|
---|
988 | r[15] = bn_mul_add_words(&(r[7]), a, 8, b[7]);
|
---|
989 | }
|
---|
990 |
|
---|
991 | # ifdef OPENSSL_NO_ASM
|
---|
992 | # ifdef OPENSSL_BN_ASM_MONT
|
---|
993 | # include <alloca.h>
|
---|
994 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
---|
995 | const BN_ULONG *np, const BN_ULONG *n0p, int num)
|
---|
996 | {
|
---|
997 | BN_ULONG c0, c1, *tp, n0 = *n0p;
|
---|
998 | volatile BN_ULONG *vp;
|
---|
999 | int i = 0, j;
|
---|
1000 |
|
---|
1001 | vp = tp = alloca((num + 2) * sizeof(BN_ULONG));
|
---|
1002 |
|
---|
1003 | for (i = 0; i <= num; i++)
|
---|
1004 | tp[i] = 0;
|
---|
1005 |
|
---|
1006 | for (i = 0; i < num; i++) {
|
---|
1007 | c0 = bn_mul_add_words(tp, ap, num, bp[i]);
|
---|
1008 | c1 = (tp[num] + c0) & BN_MASK2;
|
---|
1009 | tp[num] = c1;
|
---|
1010 | tp[num + 1] = (c1 < c0 ? 1 : 0);
|
---|
1011 |
|
---|
1012 | c0 = bn_mul_add_words(tp, np, num, tp[0] * n0);
|
---|
1013 | c1 = (tp[num] + c0) & BN_MASK2;
|
---|
1014 | tp[num] = c1;
|
---|
1015 | tp[num + 1] += (c1 < c0 ? 1 : 0);
|
---|
1016 | for (j = 0; j <= num; j++)
|
---|
1017 | tp[j] = tp[j + 1];
|
---|
1018 | }
|
---|
1019 |
|
---|
1020 | if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) {
|
---|
1021 | c0 = bn_sub_words(rp, tp, np, num);
|
---|
1022 | if (tp[num] != 0 || c0 == 0) {
|
---|
1023 | for (i = 0; i < num + 2; i++)
|
---|
1024 | vp[i] = 0;
|
---|
1025 | return 1;
|
---|
1026 | }
|
---|
1027 | }
|
---|
1028 | for (i = 0; i < num; i++)
|
---|
1029 | rp[i] = tp[i], vp[i] = 0;
|
---|
1030 | vp[num] = 0;
|
---|
1031 | vp[num + 1] = 0;
|
---|
1032 | return 1;
|
---|
1033 | }
|
---|
1034 | # else
|
---|
1035 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
---|
1036 | const BN_ULONG *np, const BN_ULONG *n0, int num)
|
---|
1037 | {
|
---|
1038 | return 0;
|
---|
1039 | }
|
---|
1040 | # endif /* OPENSSL_BN_ASM_MONT */
|
---|
1041 | # endif
|
---|
1042 |
|
---|
1043 | #endif /* !BN_MUL_COMBA */
|
---|