1 | #! /usr/bin/env perl
|
---|
2 | # Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved.
|
---|
3 | #
|
---|
4 | # Licensed under the OpenSSL license (the "License"). You may not use
|
---|
5 | # this file except in compliance with the License. You can obtain a copy
|
---|
6 | # in the file LICENSE in the source distribution or at
|
---|
7 | # https://www.openssl.org/source/license.html
|
---|
8 |
|
---|
9 |
|
---|
10 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
---|
11 | push(@INC,"${dir}","${dir}../../perlasm");
|
---|
12 | require "x86asm.pl";
|
---|
13 |
|
---|
14 | $output = pop;
|
---|
15 | open STDOUT,">$output";
|
---|
16 |
|
---|
17 | &asm_init($ARGV[0]);
|
---|
18 |
|
---|
19 | $sse2=0;
|
---|
20 | for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
---|
21 |
|
---|
22 | &external_label("OPENSSL_ia32cap_P") if ($sse2);
|
---|
23 |
|
---|
24 | &bn_mul_add_words("bn_mul_add_words");
|
---|
25 | &bn_mul_words("bn_mul_words");
|
---|
26 | &bn_sqr_words("bn_sqr_words");
|
---|
27 | &bn_div_words("bn_div_words");
|
---|
28 | &bn_add_words("bn_add_words");
|
---|
29 | &bn_sub_words("bn_sub_words");
|
---|
30 | &bn_sub_part_words("bn_sub_part_words");
|
---|
31 |
|
---|
32 | &asm_finish();
|
---|
33 |
|
---|
34 | close STDOUT or die "error closing STDOUT: $!";
|
---|
35 |
|
---|
36 | sub bn_mul_add_words
|
---|
37 | {
|
---|
38 | local($name)=@_;
|
---|
39 |
|
---|
40 | &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
|
---|
41 |
|
---|
42 | $r="eax";
|
---|
43 | $a="edx";
|
---|
44 | $c="ecx";
|
---|
45 |
|
---|
46 | if ($sse2) {
|
---|
47 | &picmeup("eax","OPENSSL_ia32cap_P");
|
---|
48 | &bt(&DWP(0,"eax"),26);
|
---|
49 | &jnc(&label("maw_non_sse2"));
|
---|
50 |
|
---|
51 | &mov($r,&wparam(0));
|
---|
52 | &mov($a,&wparam(1));
|
---|
53 | &mov($c,&wparam(2));
|
---|
54 | &movd("mm0",&wparam(3)); # mm0 = w
|
---|
55 | &pxor("mm1","mm1"); # mm1 = carry_in
|
---|
56 | &jmp(&label("maw_sse2_entry"));
|
---|
57 |
|
---|
58 | &set_label("maw_sse2_unrolled",16);
|
---|
59 | &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0]
|
---|
60 | &paddq("mm1","mm3"); # mm1 = carry_in + r[0]
|
---|
61 | &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0]
|
---|
62 | &pmuludq("mm2","mm0"); # mm2 = w*a[0]
|
---|
63 | &movd("mm4",&DWP(4,$a,"",0)); # mm4 = a[1]
|
---|
64 | &pmuludq("mm4","mm0"); # mm4 = w*a[1]
|
---|
65 | &movd("mm6",&DWP(8,$a,"",0)); # mm6 = a[2]
|
---|
66 | &pmuludq("mm6","mm0"); # mm6 = w*a[2]
|
---|
67 | &movd("mm7",&DWP(12,$a,"",0)); # mm7 = a[3]
|
---|
68 | &pmuludq("mm7","mm0"); # mm7 = w*a[3]
|
---|
69 | &paddq("mm1","mm2"); # mm1 = carry_in + r[0] + w*a[0]
|
---|
70 | &movd("mm3",&DWP(4,$r,"",0)); # mm3 = r[1]
|
---|
71 | &paddq("mm3","mm4"); # mm3 = r[1] + w*a[1]
|
---|
72 | &movd("mm5",&DWP(8,$r,"",0)); # mm5 = r[2]
|
---|
73 | &paddq("mm5","mm6"); # mm5 = r[2] + w*a[2]
|
---|
74 | &movd("mm4",&DWP(12,$r,"",0)); # mm4 = r[3]
|
---|
75 | &paddq("mm7","mm4"); # mm7 = r[3] + w*a[3]
|
---|
76 | &movd(&DWP(0,$r,"",0),"mm1");
|
---|
77 | &movd("mm2",&DWP(16,$a,"",0)); # mm2 = a[4]
|
---|
78 | &pmuludq("mm2","mm0"); # mm2 = w*a[4]
|
---|
79 | &psrlq("mm1",32); # mm1 = carry0
|
---|
80 | &movd("mm4",&DWP(20,$a,"",0)); # mm4 = a[5]
|
---|
81 | &pmuludq("mm4","mm0"); # mm4 = w*a[5]
|
---|
82 | &paddq("mm1","mm3"); # mm1 = carry0 + r[1] + w*a[1]
|
---|
83 | &movd("mm6",&DWP(24,$a,"",0)); # mm6 = a[6]
|
---|
84 | &pmuludq("mm6","mm0"); # mm6 = w*a[6]
|
---|
85 | &movd(&DWP(4,$r,"",0),"mm1");
|
---|
86 | &psrlq("mm1",32); # mm1 = carry1
|
---|
87 | &movd("mm3",&DWP(28,$a,"",0)); # mm3 = a[7]
|
---|
88 | &add($a,32);
|
---|
89 | &pmuludq("mm3","mm0"); # mm3 = w*a[7]
|
---|
90 | &paddq("mm1","mm5"); # mm1 = carry1 + r[2] + w*a[2]
|
---|
91 | &movd("mm5",&DWP(16,$r,"",0)); # mm5 = r[4]
|
---|
92 | &paddq("mm2","mm5"); # mm2 = r[4] + w*a[4]
|
---|
93 | &movd(&DWP(8,$r,"",0),"mm1");
|
---|
94 | &psrlq("mm1",32); # mm1 = carry2
|
---|
95 | &paddq("mm1","mm7"); # mm1 = carry2 + r[3] + w*a[3]
|
---|
96 | &movd("mm5",&DWP(20,$r,"",0)); # mm5 = r[5]
|
---|
97 | &paddq("mm4","mm5"); # mm4 = r[5] + w*a[5]
|
---|
98 | &movd(&DWP(12,$r,"",0),"mm1");
|
---|
99 | &psrlq("mm1",32); # mm1 = carry3
|
---|
100 | &paddq("mm1","mm2"); # mm1 = carry3 + r[4] + w*a[4]
|
---|
101 | &movd("mm5",&DWP(24,$r,"",0)); # mm5 = r[6]
|
---|
102 | &paddq("mm6","mm5"); # mm6 = r[6] + w*a[6]
|
---|
103 | &movd(&DWP(16,$r,"",0),"mm1");
|
---|
104 | &psrlq("mm1",32); # mm1 = carry4
|
---|
105 | &paddq("mm1","mm4"); # mm1 = carry4 + r[5] + w*a[5]
|
---|
106 | &movd("mm5",&DWP(28,$r,"",0)); # mm5 = r[7]
|
---|
107 | &paddq("mm3","mm5"); # mm3 = r[7] + w*a[7]
|
---|
108 | &movd(&DWP(20,$r,"",0),"mm1");
|
---|
109 | &psrlq("mm1",32); # mm1 = carry5
|
---|
110 | &paddq("mm1","mm6"); # mm1 = carry5 + r[6] + w*a[6]
|
---|
111 | &movd(&DWP(24,$r,"",0),"mm1");
|
---|
112 | &psrlq("mm1",32); # mm1 = carry6
|
---|
113 | &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7]
|
---|
114 | &movd(&DWP(28,$r,"",0),"mm1");
|
---|
115 | &lea($r,&DWP(32,$r));
|
---|
116 | &psrlq("mm1",32); # mm1 = carry_out
|
---|
117 |
|
---|
118 | &sub($c,8);
|
---|
119 | &jz(&label("maw_sse2_exit"));
|
---|
120 | &set_label("maw_sse2_entry");
|
---|
121 | &test($c,0xfffffff8);
|
---|
122 | &jnz(&label("maw_sse2_unrolled"));
|
---|
123 |
|
---|
124 | &set_label("maw_sse2_loop",4);
|
---|
125 | &movd("mm2",&DWP(0,$a)); # mm2 = a[i]
|
---|
126 | &movd("mm3",&DWP(0,$r)); # mm3 = r[i]
|
---|
127 | &pmuludq("mm2","mm0"); # a[i] *= w
|
---|
128 | &lea($a,&DWP(4,$a));
|
---|
129 | &paddq("mm1","mm3"); # carry += r[i]
|
---|
130 | &paddq("mm1","mm2"); # carry += a[i]*w
|
---|
131 | &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low
|
---|
132 | &sub($c,1);
|
---|
133 | &psrlq("mm1",32); # carry = carry_high
|
---|
134 | &lea($r,&DWP(4,$r));
|
---|
135 | &jnz(&label("maw_sse2_loop"));
|
---|
136 | &set_label("maw_sse2_exit");
|
---|
137 | &movd("eax","mm1"); # c = carry_out
|
---|
138 | &emms();
|
---|
139 | &ret();
|
---|
140 |
|
---|
141 | &set_label("maw_non_sse2",16);
|
---|
142 | }
|
---|
143 |
|
---|
144 | # function_begin prologue
|
---|
145 | &push("ebp");
|
---|
146 | &push("ebx");
|
---|
147 | &push("esi");
|
---|
148 | &push("edi");
|
---|
149 |
|
---|
150 | &comment("");
|
---|
151 | $Low="eax";
|
---|
152 | $High="edx";
|
---|
153 | $a="ebx";
|
---|
154 | $w="ebp";
|
---|
155 | $r="edi";
|
---|
156 | $c="esi";
|
---|
157 |
|
---|
158 | &xor($c,$c); # clear carry
|
---|
159 | &mov($r,&wparam(0)); #
|
---|
160 |
|
---|
161 | &mov("ecx",&wparam(2)); #
|
---|
162 | &mov($a,&wparam(1)); #
|
---|
163 |
|
---|
164 | &and("ecx",0xfffffff8); # num / 8
|
---|
165 | &mov($w,&wparam(3)); #
|
---|
166 |
|
---|
167 | &push("ecx"); # Up the stack for a tmp variable
|
---|
168 |
|
---|
169 | &jz(&label("maw_finish"));
|
---|
170 |
|
---|
171 | &set_label("maw_loop",16);
|
---|
172 |
|
---|
173 | for ($i=0; $i<32; $i+=4)
|
---|
174 | {
|
---|
175 | &comment("Round $i");
|
---|
176 |
|
---|
177 | &mov("eax",&DWP($i,$a)); # *a
|
---|
178 | &mul($w); # *a * w
|
---|
179 | &add("eax",$c); # L(t)+= c
|
---|
180 | &adc("edx",0); # H(t)+=carry
|
---|
181 | &add("eax",&DWP($i,$r)); # L(t)+= *r
|
---|
182 | &adc("edx",0); # H(t)+=carry
|
---|
183 | &mov(&DWP($i,$r),"eax"); # *r= L(t);
|
---|
184 | &mov($c,"edx"); # c= H(t);
|
---|
185 | }
|
---|
186 |
|
---|
187 | &comment("");
|
---|
188 | &sub("ecx",8);
|
---|
189 | &lea($a,&DWP(32,$a));
|
---|
190 | &lea($r,&DWP(32,$r));
|
---|
191 | &jnz(&label("maw_loop"));
|
---|
192 |
|
---|
193 | &set_label("maw_finish",0);
|
---|
194 | &mov("ecx",&wparam(2)); # get num
|
---|
195 | &and("ecx",7);
|
---|
196 | &jnz(&label("maw_finish2")); # helps branch prediction
|
---|
197 | &jmp(&label("maw_end"));
|
---|
198 |
|
---|
199 | &set_label("maw_finish2",1);
|
---|
200 | for ($i=0; $i<7; $i++)
|
---|
201 | {
|
---|
202 | &comment("Tail Round $i");
|
---|
203 | &mov("eax",&DWP($i*4,$a)); # *a
|
---|
204 | &mul($w); # *a * w
|
---|
205 | &add("eax",$c); # L(t)+=c
|
---|
206 | &adc("edx",0); # H(t)+=carry
|
---|
207 | &add("eax",&DWP($i*4,$r)); # L(t)+= *r
|
---|
208 | &adc("edx",0); # H(t)+=carry
|
---|
209 | &dec("ecx") if ($i != 7-1);
|
---|
210 | &mov(&DWP($i*4,$r),"eax"); # *r= L(t);
|
---|
211 | &mov($c,"edx"); # c= H(t);
|
---|
212 | &jz(&label("maw_end")) if ($i != 7-1);
|
---|
213 | }
|
---|
214 | &set_label("maw_end",0);
|
---|
215 | &mov("eax",$c);
|
---|
216 |
|
---|
217 | &pop("ecx"); # clear variable from
|
---|
218 |
|
---|
219 | &function_end($name);
|
---|
220 | }
|
---|
221 |
|
---|
222 | sub bn_mul_words
|
---|
223 | {
|
---|
224 | local($name)=@_;
|
---|
225 |
|
---|
226 | &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
|
---|
227 |
|
---|
228 | $r="eax";
|
---|
229 | $a="edx";
|
---|
230 | $c="ecx";
|
---|
231 |
|
---|
232 | if ($sse2) {
|
---|
233 | &picmeup("eax","OPENSSL_ia32cap_P");
|
---|
234 | &bt(&DWP(0,"eax"),26);
|
---|
235 | &jnc(&label("mw_non_sse2"));
|
---|
236 |
|
---|
237 | &mov($r,&wparam(0));
|
---|
238 | &mov($a,&wparam(1));
|
---|
239 | &mov($c,&wparam(2));
|
---|
240 | &movd("mm0",&wparam(3)); # mm0 = w
|
---|
241 | &pxor("mm1","mm1"); # mm1 = carry = 0
|
---|
242 |
|
---|
243 | &set_label("mw_sse2_loop",16);
|
---|
244 | &movd("mm2",&DWP(0,$a)); # mm2 = a[i]
|
---|
245 | &pmuludq("mm2","mm0"); # a[i] *= w
|
---|
246 | &lea($a,&DWP(4,$a));
|
---|
247 | &paddq("mm1","mm2"); # carry += a[i]*w
|
---|
248 | &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low
|
---|
249 | &sub($c,1);
|
---|
250 | &psrlq("mm1",32); # carry = carry_high
|
---|
251 | &lea($r,&DWP(4,$r));
|
---|
252 | &jnz(&label("mw_sse2_loop"));
|
---|
253 |
|
---|
254 | &movd("eax","mm1"); # return carry
|
---|
255 | &emms();
|
---|
256 | &ret();
|
---|
257 | &set_label("mw_non_sse2",16);
|
---|
258 | }
|
---|
259 |
|
---|
260 | # function_begin prologue
|
---|
261 | &push("ebp");
|
---|
262 | &push("ebx");
|
---|
263 | &push("esi");
|
---|
264 | &push("edi");
|
---|
265 |
|
---|
266 | &comment("");
|
---|
267 | $Low="eax";
|
---|
268 | $High="edx";
|
---|
269 | $a="ebx";
|
---|
270 | $w="ecx";
|
---|
271 | $r="edi";
|
---|
272 | $c="esi";
|
---|
273 | $num="ebp";
|
---|
274 |
|
---|
275 | &xor($c,$c); # clear carry
|
---|
276 | &mov($r,&wparam(0)); #
|
---|
277 | &mov($a,&wparam(1)); #
|
---|
278 | &mov($num,&wparam(2)); #
|
---|
279 | &mov($w,&wparam(3)); #
|
---|
280 |
|
---|
281 | &and($num,0xfffffff8); # num / 8
|
---|
282 | &jz(&label("mw_finish"));
|
---|
283 |
|
---|
284 | &set_label("mw_loop",0);
|
---|
285 | for ($i=0; $i<32; $i+=4)
|
---|
286 | {
|
---|
287 | &comment("Round $i");
|
---|
288 |
|
---|
289 | &mov("eax",&DWP($i,$a,"",0)); # *a
|
---|
290 | &mul($w); # *a * w
|
---|
291 | &add("eax",$c); # L(t)+=c
|
---|
292 | # XXX
|
---|
293 |
|
---|
294 | &adc("edx",0); # H(t)+=carry
|
---|
295 | &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
|
---|
296 |
|
---|
297 | &mov($c,"edx"); # c= H(t);
|
---|
298 | }
|
---|
299 |
|
---|
300 | &comment("");
|
---|
301 | &add($a,32);
|
---|
302 | &add($r,32);
|
---|
303 | &sub($num,8);
|
---|
304 | &jz(&label("mw_finish"));
|
---|
305 | &jmp(&label("mw_loop"));
|
---|
306 |
|
---|
307 | &set_label("mw_finish",0);
|
---|
308 | &mov($num,&wparam(2)); # get num
|
---|
309 | &and($num,7);
|
---|
310 | &jnz(&label("mw_finish2"));
|
---|
311 | &jmp(&label("mw_end"));
|
---|
312 |
|
---|
313 | &set_label("mw_finish2",1);
|
---|
314 | for ($i=0; $i<7; $i++)
|
---|
315 | {
|
---|
316 | &comment("Tail Round $i");
|
---|
317 | &mov("eax",&DWP($i*4,$a,"",0));# *a
|
---|
318 | &mul($w); # *a * w
|
---|
319 | &add("eax",$c); # L(t)+=c
|
---|
320 | # XXX
|
---|
321 | &adc("edx",0); # H(t)+=carry
|
---|
322 | &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
|
---|
323 | &mov($c,"edx"); # c= H(t);
|
---|
324 | &dec($num) if ($i != 7-1);
|
---|
325 | &jz(&label("mw_end")) if ($i != 7-1);
|
---|
326 | }
|
---|
327 | &set_label("mw_end",0);
|
---|
328 | &mov("eax",$c);
|
---|
329 |
|
---|
330 | &function_end($name);
|
---|
331 | }
|
---|
332 |
|
---|
333 | sub bn_sqr_words
|
---|
334 | {
|
---|
335 | local($name)=@_;
|
---|
336 |
|
---|
337 | &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
|
---|
338 |
|
---|
339 | $r="eax";
|
---|
340 | $a="edx";
|
---|
341 | $c="ecx";
|
---|
342 |
|
---|
343 | if ($sse2) {
|
---|
344 | &picmeup("eax","OPENSSL_ia32cap_P");
|
---|
345 | &bt(&DWP(0,"eax"),26);
|
---|
346 | &jnc(&label("sqr_non_sse2"));
|
---|
347 |
|
---|
348 | &mov($r,&wparam(0));
|
---|
349 | &mov($a,&wparam(1));
|
---|
350 | &mov($c,&wparam(2));
|
---|
351 |
|
---|
352 | &set_label("sqr_sse2_loop",16);
|
---|
353 | &movd("mm0",&DWP(0,$a)); # mm0 = a[i]
|
---|
354 | &pmuludq("mm0","mm0"); # a[i] *= a[i]
|
---|
355 | &lea($a,&DWP(4,$a)); # a++
|
---|
356 | &movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i]
|
---|
357 | &sub($c,1);
|
---|
358 | &lea($r,&DWP(8,$r)); # r += 2
|
---|
359 | &jnz(&label("sqr_sse2_loop"));
|
---|
360 |
|
---|
361 | &emms();
|
---|
362 | &ret();
|
---|
363 | &set_label("sqr_non_sse2",16);
|
---|
364 | }
|
---|
365 |
|
---|
366 | # function_begin prologue
|
---|
367 | &push("ebp");
|
---|
368 | &push("ebx");
|
---|
369 | &push("esi");
|
---|
370 | &push("edi");
|
---|
371 |
|
---|
372 | &comment("");
|
---|
373 | $r="esi";
|
---|
374 | $a="edi";
|
---|
375 | $num="ebx";
|
---|
376 |
|
---|
377 | &mov($r,&wparam(0)); #
|
---|
378 | &mov($a,&wparam(1)); #
|
---|
379 | &mov($num,&wparam(2)); #
|
---|
380 |
|
---|
381 | &and($num,0xfffffff8); # num / 8
|
---|
382 | &jz(&label("sw_finish"));
|
---|
383 |
|
---|
384 | &set_label("sw_loop",0);
|
---|
385 | for ($i=0; $i<32; $i+=4)
|
---|
386 | {
|
---|
387 | &comment("Round $i");
|
---|
388 | &mov("eax",&DWP($i,$a,"",0)); # *a
|
---|
389 | # XXX
|
---|
390 | &mul("eax"); # *a * *a
|
---|
391 | &mov(&DWP($i*2,$r,"",0),"eax"); #
|
---|
392 | &mov(&DWP($i*2+4,$r,"",0),"edx");#
|
---|
393 | }
|
---|
394 |
|
---|
395 | &comment("");
|
---|
396 | &add($a,32);
|
---|
397 | &add($r,64);
|
---|
398 | &sub($num,8);
|
---|
399 | &jnz(&label("sw_loop"));
|
---|
400 |
|
---|
401 | &set_label("sw_finish",0);
|
---|
402 | &mov($num,&wparam(2)); # get num
|
---|
403 | &and($num,7);
|
---|
404 | &jz(&label("sw_end"));
|
---|
405 |
|
---|
406 | for ($i=0; $i<7; $i++)
|
---|
407 | {
|
---|
408 | &comment("Tail Round $i");
|
---|
409 | &mov("eax",&DWP($i*4,$a,"",0)); # *a
|
---|
410 | # XXX
|
---|
411 | &mul("eax"); # *a * *a
|
---|
412 | &mov(&DWP($i*8,$r,"",0),"eax"); #
|
---|
413 | &dec($num) if ($i != 7-1);
|
---|
414 | &mov(&DWP($i*8+4,$r,"",0),"edx");
|
---|
415 | &jz(&label("sw_end")) if ($i != 7-1);
|
---|
416 | }
|
---|
417 | &set_label("sw_end",0);
|
---|
418 |
|
---|
419 | &function_end($name);
|
---|
420 | }
|
---|
421 |
|
---|
422 | sub bn_div_words
|
---|
423 | {
|
---|
424 | local($name)=@_;
|
---|
425 |
|
---|
426 | &function_begin_B($name,"");
|
---|
427 | &mov("edx",&wparam(0)); #
|
---|
428 | &mov("eax",&wparam(1)); #
|
---|
429 | &mov("ecx",&wparam(2)); #
|
---|
430 | &div("ecx");
|
---|
431 | &ret();
|
---|
432 | &function_end_B($name);
|
---|
433 | }
|
---|
434 |
|
---|
435 | sub bn_add_words
|
---|
436 | {
|
---|
437 | local($name)=@_;
|
---|
438 |
|
---|
439 | &function_begin($name,"");
|
---|
440 |
|
---|
441 | &comment("");
|
---|
442 | $a="esi";
|
---|
443 | $b="edi";
|
---|
444 | $c="eax";
|
---|
445 | $r="ebx";
|
---|
446 | $tmp1="ecx";
|
---|
447 | $tmp2="edx";
|
---|
448 | $num="ebp";
|
---|
449 |
|
---|
450 | &mov($r,&wparam(0)); # get r
|
---|
451 | &mov($a,&wparam(1)); # get a
|
---|
452 | &mov($b,&wparam(2)); # get b
|
---|
453 | &mov($num,&wparam(3)); # get num
|
---|
454 | &xor($c,$c); # clear carry
|
---|
455 | &and($num,0xfffffff8); # num / 8
|
---|
456 |
|
---|
457 | &jz(&label("aw_finish"));
|
---|
458 |
|
---|
459 | &set_label("aw_loop",0);
|
---|
460 | for ($i=0; $i<8; $i++)
|
---|
461 | {
|
---|
462 | &comment("Round $i");
|
---|
463 |
|
---|
464 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
|
---|
465 | &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
|
---|
466 | &add($tmp1,$c);
|
---|
467 | &mov($c,0);
|
---|
468 | &adc($c,$c);
|
---|
469 | &add($tmp1,$tmp2);
|
---|
470 | &adc($c,0);
|
---|
471 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
---|
472 | }
|
---|
473 |
|
---|
474 | &comment("");
|
---|
475 | &add($a,32);
|
---|
476 | &add($b,32);
|
---|
477 | &add($r,32);
|
---|
478 | &sub($num,8);
|
---|
479 | &jnz(&label("aw_loop"));
|
---|
480 |
|
---|
481 | &set_label("aw_finish",0);
|
---|
482 | &mov($num,&wparam(3)); # get num
|
---|
483 | &and($num,7);
|
---|
484 | &jz(&label("aw_end"));
|
---|
485 |
|
---|
486 | for ($i=0; $i<7; $i++)
|
---|
487 | {
|
---|
488 | &comment("Tail Round $i");
|
---|
489 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
|
---|
490 | &mov($tmp2,&DWP($i*4,$b,"",0));# *b
|
---|
491 | &add($tmp1,$c);
|
---|
492 | &mov($c,0);
|
---|
493 | &adc($c,$c);
|
---|
494 | &add($tmp1,$tmp2);
|
---|
495 | &adc($c,0);
|
---|
496 | &dec($num) if ($i != 6);
|
---|
497 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
---|
498 | &jz(&label("aw_end")) if ($i != 6);
|
---|
499 | }
|
---|
500 | &set_label("aw_end",0);
|
---|
501 |
|
---|
502 | # &mov("eax",$c); # $c is "eax"
|
---|
503 |
|
---|
504 | &function_end($name);
|
---|
505 | }
|
---|
506 |
|
---|
507 | sub bn_sub_words
|
---|
508 | {
|
---|
509 | local($name)=@_;
|
---|
510 |
|
---|
511 | &function_begin($name,"");
|
---|
512 |
|
---|
513 | &comment("");
|
---|
514 | $a="esi";
|
---|
515 | $b="edi";
|
---|
516 | $c="eax";
|
---|
517 | $r="ebx";
|
---|
518 | $tmp1="ecx";
|
---|
519 | $tmp2="edx";
|
---|
520 | $num="ebp";
|
---|
521 |
|
---|
522 | &mov($r,&wparam(0)); # get r
|
---|
523 | &mov($a,&wparam(1)); # get a
|
---|
524 | &mov($b,&wparam(2)); # get b
|
---|
525 | &mov($num,&wparam(3)); # get num
|
---|
526 | &xor($c,$c); # clear carry
|
---|
527 | &and($num,0xfffffff8); # num / 8
|
---|
528 |
|
---|
529 | &jz(&label("aw_finish"));
|
---|
530 |
|
---|
531 | &set_label("aw_loop",0);
|
---|
532 | for ($i=0; $i<8; $i++)
|
---|
533 | {
|
---|
534 | &comment("Round $i");
|
---|
535 |
|
---|
536 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
|
---|
537 | &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
|
---|
538 | &sub($tmp1,$c);
|
---|
539 | &mov($c,0);
|
---|
540 | &adc($c,$c);
|
---|
541 | &sub($tmp1,$tmp2);
|
---|
542 | &adc($c,0);
|
---|
543 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
---|
544 | }
|
---|
545 |
|
---|
546 | &comment("");
|
---|
547 | &add($a,32);
|
---|
548 | &add($b,32);
|
---|
549 | &add($r,32);
|
---|
550 | &sub($num,8);
|
---|
551 | &jnz(&label("aw_loop"));
|
---|
552 |
|
---|
553 | &set_label("aw_finish",0);
|
---|
554 | &mov($num,&wparam(3)); # get num
|
---|
555 | &and($num,7);
|
---|
556 | &jz(&label("aw_end"));
|
---|
557 |
|
---|
558 | for ($i=0; $i<7; $i++)
|
---|
559 | {
|
---|
560 | &comment("Tail Round $i");
|
---|
561 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
|
---|
562 | &mov($tmp2,&DWP($i*4,$b,"",0));# *b
|
---|
563 | &sub($tmp1,$c);
|
---|
564 | &mov($c,0);
|
---|
565 | &adc($c,$c);
|
---|
566 | &sub($tmp1,$tmp2);
|
---|
567 | &adc($c,0);
|
---|
568 | &dec($num) if ($i != 6);
|
---|
569 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
---|
570 | &jz(&label("aw_end")) if ($i != 6);
|
---|
571 | }
|
---|
572 | &set_label("aw_end",0);
|
---|
573 |
|
---|
574 | # &mov("eax",$c); # $c is "eax"
|
---|
575 |
|
---|
576 | &function_end($name);
|
---|
577 | }
|
---|
578 |
|
---|
579 | sub bn_sub_part_words
|
---|
580 | {
|
---|
581 | local($name)=@_;
|
---|
582 |
|
---|
583 | &function_begin($name,"");
|
---|
584 |
|
---|
585 | &comment("");
|
---|
586 | $a="esi";
|
---|
587 | $b="edi";
|
---|
588 | $c="eax";
|
---|
589 | $r="ebx";
|
---|
590 | $tmp1="ecx";
|
---|
591 | $tmp2="edx";
|
---|
592 | $num="ebp";
|
---|
593 |
|
---|
594 | &mov($r,&wparam(0)); # get r
|
---|
595 | &mov($a,&wparam(1)); # get a
|
---|
596 | &mov($b,&wparam(2)); # get b
|
---|
597 | &mov($num,&wparam(3)); # get num
|
---|
598 | &xor($c,$c); # clear carry
|
---|
599 | &and($num,0xfffffff8); # num / 8
|
---|
600 |
|
---|
601 | &jz(&label("aw_finish"));
|
---|
602 |
|
---|
603 | &set_label("aw_loop",0);
|
---|
604 | for ($i=0; $i<8; $i++)
|
---|
605 | {
|
---|
606 | &comment("Round $i");
|
---|
607 |
|
---|
608 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
|
---|
609 | &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
|
---|
610 | &sub($tmp1,$c);
|
---|
611 | &mov($c,0);
|
---|
612 | &adc($c,$c);
|
---|
613 | &sub($tmp1,$tmp2);
|
---|
614 | &adc($c,0);
|
---|
615 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
---|
616 | }
|
---|
617 |
|
---|
618 | &comment("");
|
---|
619 | &add($a,32);
|
---|
620 | &add($b,32);
|
---|
621 | &add($r,32);
|
---|
622 | &sub($num,8);
|
---|
623 | &jnz(&label("aw_loop"));
|
---|
624 |
|
---|
625 | &set_label("aw_finish",0);
|
---|
626 | &mov($num,&wparam(3)); # get num
|
---|
627 | &and($num,7);
|
---|
628 | &jz(&label("aw_end"));
|
---|
629 |
|
---|
630 | for ($i=0; $i<7; $i++)
|
---|
631 | {
|
---|
632 | &comment("Tail Round $i");
|
---|
633 | &mov($tmp1,&DWP(0,$a,"",0)); # *a
|
---|
634 | &mov($tmp2,&DWP(0,$b,"",0));# *b
|
---|
635 | &sub($tmp1,$c);
|
---|
636 | &mov($c,0);
|
---|
637 | &adc($c,$c);
|
---|
638 | &sub($tmp1,$tmp2);
|
---|
639 | &adc($c,0);
|
---|
640 | &mov(&DWP(0,$r,"",0),$tmp1); # *r
|
---|
641 | &add($a, 4);
|
---|
642 | &add($b, 4);
|
---|
643 | &add($r, 4);
|
---|
644 | &dec($num) if ($i != 6);
|
---|
645 | &jz(&label("aw_end")) if ($i != 6);
|
---|
646 | }
|
---|
647 | &set_label("aw_end",0);
|
---|
648 |
|
---|
649 | &cmp(&wparam(4),0);
|
---|
650 | &je(&label("pw_end"));
|
---|
651 |
|
---|
652 | &mov($num,&wparam(4)); # get dl
|
---|
653 | &cmp($num,0);
|
---|
654 | &je(&label("pw_end"));
|
---|
655 | &jge(&label("pw_pos"));
|
---|
656 |
|
---|
657 | &comment("pw_neg");
|
---|
658 | &mov($tmp2,0);
|
---|
659 | &sub($tmp2,$num);
|
---|
660 | &mov($num,$tmp2);
|
---|
661 | &and($num,0xfffffff8); # num / 8
|
---|
662 | &jz(&label("pw_neg_finish"));
|
---|
663 |
|
---|
664 | &set_label("pw_neg_loop",0);
|
---|
665 | for ($i=0; $i<8; $i++)
|
---|
666 | {
|
---|
667 | &comment("dl<0 Round $i");
|
---|
668 |
|
---|
669 | &mov($tmp1,0);
|
---|
670 | &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
|
---|
671 | &sub($tmp1,$c);
|
---|
672 | &mov($c,0);
|
---|
673 | &adc($c,$c);
|
---|
674 | &sub($tmp1,$tmp2);
|
---|
675 | &adc($c,0);
|
---|
676 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
---|
677 | }
|
---|
678 |
|
---|
679 | &comment("");
|
---|
680 | &add($b,32);
|
---|
681 | &add($r,32);
|
---|
682 | &sub($num,8);
|
---|
683 | &jnz(&label("pw_neg_loop"));
|
---|
684 |
|
---|
685 | &set_label("pw_neg_finish",0);
|
---|
686 | &mov($tmp2,&wparam(4)); # get dl
|
---|
687 | &mov($num,0);
|
---|
688 | &sub($num,$tmp2);
|
---|
689 | &and($num,7);
|
---|
690 | &jz(&label("pw_end"));
|
---|
691 |
|
---|
692 | for ($i=0; $i<7; $i++)
|
---|
693 | {
|
---|
694 | &comment("dl<0 Tail Round $i");
|
---|
695 | &mov($tmp1,0);
|
---|
696 | &mov($tmp2,&DWP($i*4,$b,"",0));# *b
|
---|
697 | &sub($tmp1,$c);
|
---|
698 | &mov($c,0);
|
---|
699 | &adc($c,$c);
|
---|
700 | &sub($tmp1,$tmp2);
|
---|
701 | &adc($c,0);
|
---|
702 | &dec($num) if ($i != 6);
|
---|
703 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
---|
704 | &jz(&label("pw_end")) if ($i != 6);
|
---|
705 | }
|
---|
706 |
|
---|
707 | &jmp(&label("pw_end"));
|
---|
708 |
|
---|
709 | &set_label("pw_pos",0);
|
---|
710 |
|
---|
711 | &and($num,0xfffffff8); # num / 8
|
---|
712 | &jz(&label("pw_pos_finish"));
|
---|
713 |
|
---|
714 | &set_label("pw_pos_loop",0);
|
---|
715 |
|
---|
716 | for ($i=0; $i<8; $i++)
|
---|
717 | {
|
---|
718 | &comment("dl>0 Round $i");
|
---|
719 |
|
---|
720 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
|
---|
721 | &sub($tmp1,$c);
|
---|
722 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
---|
723 | &jnc(&label("pw_nc".$i));
|
---|
724 | }
|
---|
725 |
|
---|
726 | &comment("");
|
---|
727 | &add($a,32);
|
---|
728 | &add($r,32);
|
---|
729 | &sub($num,8);
|
---|
730 | &jnz(&label("pw_pos_loop"));
|
---|
731 |
|
---|
732 | &set_label("pw_pos_finish",0);
|
---|
733 | &mov($num,&wparam(4)); # get dl
|
---|
734 | &and($num,7);
|
---|
735 | &jz(&label("pw_end"));
|
---|
736 |
|
---|
737 | for ($i=0; $i<7; $i++)
|
---|
738 | {
|
---|
739 | &comment("dl>0 Tail Round $i");
|
---|
740 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
|
---|
741 | &sub($tmp1,$c);
|
---|
742 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
---|
743 | &jnc(&label("pw_tail_nc".$i));
|
---|
744 | &dec($num) if ($i != 6);
|
---|
745 | &jz(&label("pw_end")) if ($i != 6);
|
---|
746 | }
|
---|
747 | &mov($c,1);
|
---|
748 | &jmp(&label("pw_end"));
|
---|
749 |
|
---|
750 | &set_label("pw_nc_loop",0);
|
---|
751 | for ($i=0; $i<8; $i++)
|
---|
752 | {
|
---|
753 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
|
---|
754 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
---|
755 | &set_label("pw_nc".$i,0);
|
---|
756 | }
|
---|
757 |
|
---|
758 | &comment("");
|
---|
759 | &add($a,32);
|
---|
760 | &add($r,32);
|
---|
761 | &sub($num,8);
|
---|
762 | &jnz(&label("pw_nc_loop"));
|
---|
763 |
|
---|
764 | &mov($num,&wparam(4)); # get dl
|
---|
765 | &and($num,7);
|
---|
766 | &jz(&label("pw_nc_end"));
|
---|
767 |
|
---|
768 | for ($i=0; $i<7; $i++)
|
---|
769 | {
|
---|
770 | &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
|
---|
771 | &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
|
---|
772 | &set_label("pw_tail_nc".$i,0);
|
---|
773 | &dec($num) if ($i != 6);
|
---|
774 | &jz(&label("pw_nc_end")) if ($i != 6);
|
---|
775 | }
|
---|
776 |
|
---|
777 | &set_label("pw_nc_end",0);
|
---|
778 | &mov($c,0);
|
---|
779 |
|
---|
780 | &set_label("pw_end",0);
|
---|
781 |
|
---|
782 | # &mov("eax",$c); # $c is "eax"
|
---|
783 |
|
---|
784 | &function_end($name);
|
---|
785 | }
|
---|