VirtualBox

source: vbox/trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/chacha-x86_64.S@ 95221

Last change on this file since 95221 was 95221, checked in by vboxsync, 3 years ago

libs/openssl-3.0.3: Recreated openssl assembler files, bugref:10128

File size: 26.8 KB
Line 
1default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
7
8EXTERN OPENSSL_ia32cap_P
9
10ALIGN 64
11$L$zero:
12 DD 0,0,0,0
13$L$one:
14 DD 1,0,0,0
15$L$inc:
16 DD 0,1,2,3
17$L$four:
18 DD 4,4,4,4
19$L$incy:
20 DD 0,2,4,6,1,3,5,7
21$L$eight:
22 DD 8,8,8,8,8,8,8,8
23$L$rot16:
24DB 0x2,0x3,0x0,0x1,0x6,0x7,0x4,0x5,0xa,0xb,0x8,0x9,0xe,0xf,0xc,0xd
25$L$rot24:
26DB 0x3,0x0,0x1,0x2,0x7,0x4,0x5,0x6,0xb,0x8,0x9,0xa,0xf,0xc,0xd,0xe
27$L$twoy:
28 DD 2,0,0,0,2,0,0,0
29ALIGN 64
30$L$zeroz:
31 DD 0,0,0,0,1,0,0,0,2,0,0,0,3,0,0,0
32$L$fourz:
33 DD 4,0,0,0,4,0,0,0,4,0,0,0,4,0,0,0
34$L$incz:
35 DD 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
36$L$sixteen:
37 DD 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16
38$L$sigma:
39DB 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107
40DB 0
41DB 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
42DB 95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32
43DB 98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115
44DB 108,46,111,114,103,62,0
45global ChaCha20_ctr32
46
47ALIGN 64
48ChaCha20_ctr32:
49 mov QWORD[8+rsp],rdi ;WIN64 prologue
50 mov QWORD[16+rsp],rsi
51 mov rax,rsp
52$L$SEH_begin_ChaCha20_ctr32:
53 mov rdi,rcx
54 mov rsi,rdx
55 mov rdx,r8
56 mov rcx,r9
57 mov r8,QWORD[40+rsp]
58
59
60
61 cmp rdx,0
62 je NEAR $L$no_data
63 mov r10,QWORD[((OPENSSL_ia32cap_P+4))]
64 test r10d,512
65 jnz NEAR $L$ChaCha20_ssse3
66
67 push rbx
68
69 push rbp
70
71 push r12
72
73 push r13
74
75 push r14
76
77 push r15
78
79 sub rsp,64+24
80
81$L$ctr32_body:
82
83
84 movdqu xmm1,XMMWORD[rcx]
85 movdqu xmm2,XMMWORD[16+rcx]
86 movdqu xmm3,XMMWORD[r8]
87 movdqa xmm4,XMMWORD[$L$one]
88
89
90 movdqa XMMWORD[16+rsp],xmm1
91 movdqa XMMWORD[32+rsp],xmm2
92 movdqa XMMWORD[48+rsp],xmm3
93 mov rbp,rdx
94 jmp NEAR $L$oop_outer
95
96ALIGN 32
97$L$oop_outer:
98 mov eax,0x61707865
99 mov ebx,0x3320646e
100 mov ecx,0x79622d32
101 mov edx,0x6b206574
102 mov r8d,DWORD[16+rsp]
103 mov r9d,DWORD[20+rsp]
104 mov r10d,DWORD[24+rsp]
105 mov r11d,DWORD[28+rsp]
106 movd r12d,xmm3
107 mov r13d,DWORD[52+rsp]
108 mov r14d,DWORD[56+rsp]
109 mov r15d,DWORD[60+rsp]
110
111 mov QWORD[((64+0))+rsp],rbp
112 mov ebp,10
113 mov QWORD[((64+8))+rsp],rsi
114DB 102,72,15,126,214
115 mov QWORD[((64+16))+rsp],rdi
116 mov rdi,rsi
117 shr rdi,32
118 jmp NEAR $L$oop
119
120ALIGN 32
121$L$oop:
122 add eax,r8d
123 xor r12d,eax
124 rol r12d,16
125 add ebx,r9d
126 xor r13d,ebx
127 rol r13d,16
128 add esi,r12d
129 xor r8d,esi
130 rol r8d,12
131 add edi,r13d
132 xor r9d,edi
133 rol r9d,12
134 add eax,r8d
135 xor r12d,eax
136 rol r12d,8
137 add ebx,r9d
138 xor r13d,ebx
139 rol r13d,8
140 add esi,r12d
141 xor r8d,esi
142 rol r8d,7
143 add edi,r13d
144 xor r9d,edi
145 rol r9d,7
146 mov DWORD[32+rsp],esi
147 mov DWORD[36+rsp],edi
148 mov esi,DWORD[40+rsp]
149 mov edi,DWORD[44+rsp]
150 add ecx,r10d
151 xor r14d,ecx
152 rol r14d,16
153 add edx,r11d
154 xor r15d,edx
155 rol r15d,16
156 add esi,r14d
157 xor r10d,esi
158 rol r10d,12
159 add edi,r15d
160 xor r11d,edi
161 rol r11d,12
162 add ecx,r10d
163 xor r14d,ecx
164 rol r14d,8
165 add edx,r11d
166 xor r15d,edx
167 rol r15d,8
168 add esi,r14d
169 xor r10d,esi
170 rol r10d,7
171 add edi,r15d
172 xor r11d,edi
173 rol r11d,7
174 add eax,r9d
175 xor r15d,eax
176 rol r15d,16
177 add ebx,r10d
178 xor r12d,ebx
179 rol r12d,16
180 add esi,r15d
181 xor r9d,esi
182 rol r9d,12
183 add edi,r12d
184 xor r10d,edi
185 rol r10d,12
186 add eax,r9d
187 xor r15d,eax
188 rol r15d,8
189 add ebx,r10d
190 xor r12d,ebx
191 rol r12d,8
192 add esi,r15d
193 xor r9d,esi
194 rol r9d,7
195 add edi,r12d
196 xor r10d,edi
197 rol r10d,7
198 mov DWORD[40+rsp],esi
199 mov DWORD[44+rsp],edi
200 mov esi,DWORD[32+rsp]
201 mov edi,DWORD[36+rsp]
202 add ecx,r11d
203 xor r13d,ecx
204 rol r13d,16
205 add edx,r8d
206 xor r14d,edx
207 rol r14d,16
208 add esi,r13d
209 xor r11d,esi
210 rol r11d,12
211 add edi,r14d
212 xor r8d,edi
213 rol r8d,12
214 add ecx,r11d
215 xor r13d,ecx
216 rol r13d,8
217 add edx,r8d
218 xor r14d,edx
219 rol r14d,8
220 add esi,r13d
221 xor r11d,esi
222 rol r11d,7
223 add edi,r14d
224 xor r8d,edi
225 rol r8d,7
226 dec ebp
227 jnz NEAR $L$oop
228 mov DWORD[36+rsp],edi
229 mov DWORD[32+rsp],esi
230 mov rbp,QWORD[64+rsp]
231 movdqa xmm1,xmm2
232 mov rsi,QWORD[((64+8))+rsp]
233 paddd xmm3,xmm4
234 mov rdi,QWORD[((64+16))+rsp]
235
236 add eax,0x61707865
237 add ebx,0x3320646e
238 add ecx,0x79622d32
239 add edx,0x6b206574
240 add r8d,DWORD[16+rsp]
241 add r9d,DWORD[20+rsp]
242 add r10d,DWORD[24+rsp]
243 add r11d,DWORD[28+rsp]
244 add r12d,DWORD[48+rsp]
245 add r13d,DWORD[52+rsp]
246 add r14d,DWORD[56+rsp]
247 add r15d,DWORD[60+rsp]
248 paddd xmm1,XMMWORD[32+rsp]
249
250 cmp rbp,64
251 jb NEAR $L$tail
252
253 xor eax,DWORD[rsi]
254 xor ebx,DWORD[4+rsi]
255 xor ecx,DWORD[8+rsi]
256 xor edx,DWORD[12+rsi]
257 xor r8d,DWORD[16+rsi]
258 xor r9d,DWORD[20+rsi]
259 xor r10d,DWORD[24+rsi]
260 xor r11d,DWORD[28+rsi]
261 movdqu xmm0,XMMWORD[32+rsi]
262 xor r12d,DWORD[48+rsi]
263 xor r13d,DWORD[52+rsi]
264 xor r14d,DWORD[56+rsi]
265 xor r15d,DWORD[60+rsi]
266 lea rsi,[64+rsi]
267 pxor xmm0,xmm1
268
269 movdqa XMMWORD[32+rsp],xmm2
270 movd DWORD[48+rsp],xmm3
271
272 mov DWORD[rdi],eax
273 mov DWORD[4+rdi],ebx
274 mov DWORD[8+rdi],ecx
275 mov DWORD[12+rdi],edx
276 mov DWORD[16+rdi],r8d
277 mov DWORD[20+rdi],r9d
278 mov DWORD[24+rdi],r10d
279 mov DWORD[28+rdi],r11d
280 movdqu XMMWORD[32+rdi],xmm0
281 mov DWORD[48+rdi],r12d
282 mov DWORD[52+rdi],r13d
283 mov DWORD[56+rdi],r14d
284 mov DWORD[60+rdi],r15d
285 lea rdi,[64+rdi]
286
287 sub rbp,64
288 jnz NEAR $L$oop_outer
289
290 jmp NEAR $L$done
291
292ALIGN 16
293$L$tail:
294 mov DWORD[rsp],eax
295 mov DWORD[4+rsp],ebx
296 xor rbx,rbx
297 mov DWORD[8+rsp],ecx
298 mov DWORD[12+rsp],edx
299 mov DWORD[16+rsp],r8d
300 mov DWORD[20+rsp],r9d
301 mov DWORD[24+rsp],r10d
302 mov DWORD[28+rsp],r11d
303 movdqa XMMWORD[32+rsp],xmm1
304 mov DWORD[48+rsp],r12d
305 mov DWORD[52+rsp],r13d
306 mov DWORD[56+rsp],r14d
307 mov DWORD[60+rsp],r15d
308
309$L$oop_tail:
310 movzx eax,BYTE[rbx*1+rsi]
311 movzx edx,BYTE[rbx*1+rsp]
312 lea rbx,[1+rbx]
313 xor eax,edx
314 mov BYTE[((-1))+rbx*1+rdi],al
315 dec rbp
316 jnz NEAR $L$oop_tail
317
318$L$done:
319 lea rsi,[((64+24+48))+rsp]
320
321 mov r15,QWORD[((-48))+rsi]
322
323 mov r14,QWORD[((-40))+rsi]
324
325 mov r13,QWORD[((-32))+rsi]
326
327 mov r12,QWORD[((-24))+rsi]
328
329 mov rbp,QWORD[((-16))+rsi]
330
331 mov rbx,QWORD[((-8))+rsi]
332
333 lea rsp,[rsi]
334
335$L$no_data:
336 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
337 mov rsi,QWORD[16+rsp]
338 DB 0F3h,0C3h ;repret
339
340$L$SEH_end_ChaCha20_ctr32:
341
342ALIGN 32
343ChaCha20_ssse3:
344 mov QWORD[8+rsp],rdi ;WIN64 prologue
345 mov QWORD[16+rsp],rsi
346 mov rax,rsp
347$L$SEH_begin_ChaCha20_ssse3:
348 mov rdi,rcx
349 mov rsi,rdx
350 mov rdx,r8
351 mov rcx,r9
352 mov r8,QWORD[40+rsp]
353
354
355
356$L$ChaCha20_ssse3:
357 mov r9,rsp
358
359 cmp rdx,128
360 je NEAR $L$ChaCha20_128
361 ja NEAR $L$ChaCha20_4x
362
363$L$do_sse3_after_all:
364 sub rsp,64+168
365 movaps XMMWORD[(-40)+r9],xmm6
366 movaps XMMWORD[(-24)+r9],xmm7
367$L$ssse3_body:
368 movdqa xmm0,XMMWORD[$L$sigma]
369 movdqu xmm1,XMMWORD[rcx]
370 movdqu xmm2,XMMWORD[16+rcx]
371 movdqu xmm3,XMMWORD[r8]
372 movdqa xmm6,XMMWORD[$L$rot16]
373 movdqa xmm7,XMMWORD[$L$rot24]
374
375 movdqa XMMWORD[rsp],xmm0
376 movdqa XMMWORD[16+rsp],xmm1
377 movdqa XMMWORD[32+rsp],xmm2
378 movdqa XMMWORD[48+rsp],xmm3
379 mov r8,10
380 jmp NEAR $L$oop_ssse3
381
382ALIGN 32
383$L$oop_outer_ssse3:
384 movdqa xmm3,XMMWORD[$L$one]
385 movdqa xmm0,XMMWORD[rsp]
386 movdqa xmm1,XMMWORD[16+rsp]
387 movdqa xmm2,XMMWORD[32+rsp]
388 paddd xmm3,XMMWORD[48+rsp]
389 mov r8,10
390 movdqa XMMWORD[48+rsp],xmm3
391 jmp NEAR $L$oop_ssse3
392
393ALIGN 32
394$L$oop_ssse3:
395 paddd xmm0,xmm1
396 pxor xmm3,xmm0
397DB 102,15,56,0,222
398 paddd xmm2,xmm3
399 pxor xmm1,xmm2
400 movdqa xmm4,xmm1
401 psrld xmm1,20
402 pslld xmm4,12
403 por xmm1,xmm4
404 paddd xmm0,xmm1
405 pxor xmm3,xmm0
406DB 102,15,56,0,223
407 paddd xmm2,xmm3
408 pxor xmm1,xmm2
409 movdqa xmm4,xmm1
410 psrld xmm1,25
411 pslld xmm4,7
412 por xmm1,xmm4
413 pshufd xmm2,xmm2,78
414 pshufd xmm1,xmm1,57
415 pshufd xmm3,xmm3,147
416 nop
417 paddd xmm0,xmm1
418 pxor xmm3,xmm0
419DB 102,15,56,0,222
420 paddd xmm2,xmm3
421 pxor xmm1,xmm2
422 movdqa xmm4,xmm1
423 psrld xmm1,20
424 pslld xmm4,12
425 por xmm1,xmm4
426 paddd xmm0,xmm1
427 pxor xmm3,xmm0
428DB 102,15,56,0,223
429 paddd xmm2,xmm3
430 pxor xmm1,xmm2
431 movdqa xmm4,xmm1
432 psrld xmm1,25
433 pslld xmm4,7
434 por xmm1,xmm4
435 pshufd xmm2,xmm2,78
436 pshufd xmm1,xmm1,147
437 pshufd xmm3,xmm3,57
438 dec r8
439 jnz NEAR $L$oop_ssse3
440 paddd xmm0,XMMWORD[rsp]
441 paddd xmm1,XMMWORD[16+rsp]
442 paddd xmm2,XMMWORD[32+rsp]
443 paddd xmm3,XMMWORD[48+rsp]
444
445 cmp rdx,64
446 jb NEAR $L$tail_ssse3
447
448 movdqu xmm4,XMMWORD[rsi]
449 movdqu xmm5,XMMWORD[16+rsi]
450 pxor xmm0,xmm4
451 movdqu xmm4,XMMWORD[32+rsi]
452 pxor xmm1,xmm5
453 movdqu xmm5,XMMWORD[48+rsi]
454 lea rsi,[64+rsi]
455 pxor xmm2,xmm4
456 pxor xmm3,xmm5
457
458 movdqu XMMWORD[rdi],xmm0
459 movdqu XMMWORD[16+rdi],xmm1
460 movdqu XMMWORD[32+rdi],xmm2
461 movdqu XMMWORD[48+rdi],xmm3
462 lea rdi,[64+rdi]
463
464 sub rdx,64
465 jnz NEAR $L$oop_outer_ssse3
466
467 jmp NEAR $L$done_ssse3
468
469ALIGN 16
470$L$tail_ssse3:
471 movdqa XMMWORD[rsp],xmm0
472 movdqa XMMWORD[16+rsp],xmm1
473 movdqa XMMWORD[32+rsp],xmm2
474 movdqa XMMWORD[48+rsp],xmm3
475 xor r8,r8
476
477$L$oop_tail_ssse3:
478 movzx eax,BYTE[r8*1+rsi]
479 movzx ecx,BYTE[r8*1+rsp]
480 lea r8,[1+r8]
481 xor eax,ecx
482 mov BYTE[((-1))+r8*1+rdi],al
483 dec rdx
484 jnz NEAR $L$oop_tail_ssse3
485
486$L$done_ssse3:
487 movaps xmm6,XMMWORD[((-40))+r9]
488 movaps xmm7,XMMWORD[((-24))+r9]
489 lea rsp,[r9]
490
491$L$ssse3_epilogue:
492 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
493 mov rsi,QWORD[16+rsp]
494 DB 0F3h,0C3h ;repret
495
496$L$SEH_end_ChaCha20_ssse3:
497
498ALIGN 32
499ChaCha20_128:
500 mov QWORD[8+rsp],rdi ;WIN64 prologue
501 mov QWORD[16+rsp],rsi
502 mov rax,rsp
503$L$SEH_begin_ChaCha20_128:
504 mov rdi,rcx
505 mov rsi,rdx
506 mov rdx,r8
507 mov rcx,r9
508 mov r8,QWORD[40+rsp]
509
510
511
512$L$ChaCha20_128:
513 mov r9,rsp
514
515 sub rsp,64+104
516 movaps XMMWORD[(-104)+r9],xmm6
517 movaps XMMWORD[(-88)+r9],xmm7
518 movaps XMMWORD[(-72)+r9],xmm8
519 movaps XMMWORD[(-56)+r9],xmm9
520 movaps XMMWORD[(-40)+r9],xmm10
521 movaps XMMWORD[(-24)+r9],xmm11
522$L$128_body:
523 movdqa xmm8,XMMWORD[$L$sigma]
524 movdqu xmm9,XMMWORD[rcx]
525 movdqu xmm2,XMMWORD[16+rcx]
526 movdqu xmm3,XMMWORD[r8]
527 movdqa xmm1,XMMWORD[$L$one]
528 movdqa xmm6,XMMWORD[$L$rot16]
529 movdqa xmm7,XMMWORD[$L$rot24]
530
531 movdqa xmm10,xmm8
532 movdqa XMMWORD[rsp],xmm8
533 movdqa xmm11,xmm9
534 movdqa XMMWORD[16+rsp],xmm9
535 movdqa xmm0,xmm2
536 movdqa XMMWORD[32+rsp],xmm2
537 paddd xmm1,xmm3
538 movdqa XMMWORD[48+rsp],xmm3
539 mov r8,10
540 jmp NEAR $L$oop_128
541
542ALIGN 32
543$L$oop_128:
544 paddd xmm8,xmm9
545 pxor xmm3,xmm8
546 paddd xmm10,xmm11
547 pxor xmm1,xmm10
548DB 102,15,56,0,222
549DB 102,15,56,0,206
550 paddd xmm2,xmm3
551 paddd xmm0,xmm1
552 pxor xmm9,xmm2
553 pxor xmm11,xmm0
554 movdqa xmm4,xmm9
555 psrld xmm9,20
556 movdqa xmm5,xmm11
557 pslld xmm4,12
558 psrld xmm11,20
559 por xmm9,xmm4
560 pslld xmm5,12
561 por xmm11,xmm5
562 paddd xmm8,xmm9
563 pxor xmm3,xmm8
564 paddd xmm10,xmm11
565 pxor xmm1,xmm10
566DB 102,15,56,0,223
567DB 102,15,56,0,207
568 paddd xmm2,xmm3
569 paddd xmm0,xmm1
570 pxor xmm9,xmm2
571 pxor xmm11,xmm0
572 movdqa xmm4,xmm9
573 psrld xmm9,25
574 movdqa xmm5,xmm11
575 pslld xmm4,7
576 psrld xmm11,25
577 por xmm9,xmm4
578 pslld xmm5,7
579 por xmm11,xmm5
580 pshufd xmm2,xmm2,78
581 pshufd xmm9,xmm9,57
582 pshufd xmm3,xmm3,147
583 pshufd xmm0,xmm0,78
584 pshufd xmm11,xmm11,57
585 pshufd xmm1,xmm1,147
586 paddd xmm8,xmm9
587 pxor xmm3,xmm8
588 paddd xmm10,xmm11
589 pxor xmm1,xmm10
590DB 102,15,56,0,222
591DB 102,15,56,0,206
592 paddd xmm2,xmm3
593 paddd xmm0,xmm1
594 pxor xmm9,xmm2
595 pxor xmm11,xmm0
596 movdqa xmm4,xmm9
597 psrld xmm9,20
598 movdqa xmm5,xmm11
599 pslld xmm4,12
600 psrld xmm11,20
601 por xmm9,xmm4
602 pslld xmm5,12
603 por xmm11,xmm5
604 paddd xmm8,xmm9
605 pxor xmm3,xmm8
606 paddd xmm10,xmm11
607 pxor xmm1,xmm10
608DB 102,15,56,0,223
609DB 102,15,56,0,207
610 paddd xmm2,xmm3
611 paddd xmm0,xmm1
612 pxor xmm9,xmm2
613 pxor xmm11,xmm0
614 movdqa xmm4,xmm9
615 psrld xmm9,25
616 movdqa xmm5,xmm11
617 pslld xmm4,7
618 psrld xmm11,25
619 por xmm9,xmm4
620 pslld xmm5,7
621 por xmm11,xmm5
622 pshufd xmm2,xmm2,78
623 pshufd xmm9,xmm9,147
624 pshufd xmm3,xmm3,57
625 pshufd xmm0,xmm0,78
626 pshufd xmm11,xmm11,147
627 pshufd xmm1,xmm1,57
628 dec r8
629 jnz NEAR $L$oop_128
630 paddd xmm8,XMMWORD[rsp]
631 paddd xmm9,XMMWORD[16+rsp]
632 paddd xmm2,XMMWORD[32+rsp]
633 paddd xmm3,XMMWORD[48+rsp]
634 paddd xmm1,XMMWORD[$L$one]
635 paddd xmm10,XMMWORD[rsp]
636 paddd xmm11,XMMWORD[16+rsp]
637 paddd xmm0,XMMWORD[32+rsp]
638 paddd xmm1,XMMWORD[48+rsp]
639
640 movdqu xmm4,XMMWORD[rsi]
641 movdqu xmm5,XMMWORD[16+rsi]
642 pxor xmm8,xmm4
643 movdqu xmm4,XMMWORD[32+rsi]
644 pxor xmm9,xmm5
645 movdqu xmm5,XMMWORD[48+rsi]
646 pxor xmm2,xmm4
647 movdqu xmm4,XMMWORD[64+rsi]
648 pxor xmm3,xmm5
649 movdqu xmm5,XMMWORD[80+rsi]
650 pxor xmm10,xmm4
651 movdqu xmm4,XMMWORD[96+rsi]
652 pxor xmm11,xmm5
653 movdqu xmm5,XMMWORD[112+rsi]
654 pxor xmm0,xmm4
655 pxor xmm1,xmm5
656
657 movdqu XMMWORD[rdi],xmm8
658 movdqu XMMWORD[16+rdi],xmm9
659 movdqu XMMWORD[32+rdi],xmm2
660 movdqu XMMWORD[48+rdi],xmm3
661 movdqu XMMWORD[64+rdi],xmm10
662 movdqu XMMWORD[80+rdi],xmm11
663 movdqu XMMWORD[96+rdi],xmm0
664 movdqu XMMWORD[112+rdi],xmm1
665 movaps xmm6,XMMWORD[((-104))+r9]
666 movaps xmm7,XMMWORD[((-88))+r9]
667 movaps xmm8,XMMWORD[((-72))+r9]
668 movaps xmm9,XMMWORD[((-56))+r9]
669 movaps xmm10,XMMWORD[((-40))+r9]
670 movaps xmm11,XMMWORD[((-24))+r9]
671 lea rsp,[r9]
672
673$L$128_epilogue:
674 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
675 mov rsi,QWORD[16+rsp]
676 DB 0F3h,0C3h ;repret
677
678$L$SEH_end_ChaCha20_128:
679
680ALIGN 32
681ChaCha20_4x:
682 mov QWORD[8+rsp],rdi ;WIN64 prologue
683 mov QWORD[16+rsp],rsi
684 mov rax,rsp
685$L$SEH_begin_ChaCha20_4x:
686 mov rdi,rcx
687 mov rsi,rdx
688 mov rdx,r8
689 mov rcx,r9
690 mov r8,QWORD[40+rsp]
691
692
693
694$L$ChaCha20_4x:
695 mov r9,rsp
696
697 mov r11,r10
698 cmp rdx,192
699 ja NEAR $L$proceed4x
700
701 and r11,71303168
702 cmp r11,4194304
703 je NEAR $L$do_sse3_after_all
704
705$L$proceed4x:
706 sub rsp,0x140+168
707 movaps XMMWORD[(-168)+r9],xmm6
708 movaps XMMWORD[(-152)+r9],xmm7
709 movaps XMMWORD[(-136)+r9],xmm8
710 movaps XMMWORD[(-120)+r9],xmm9
711 movaps XMMWORD[(-104)+r9],xmm10
712 movaps XMMWORD[(-88)+r9],xmm11
713 movaps XMMWORD[(-72)+r9],xmm12
714 movaps XMMWORD[(-56)+r9],xmm13
715 movaps XMMWORD[(-40)+r9],xmm14
716 movaps XMMWORD[(-24)+r9],xmm15
717$L$4x_body:
718 movdqa xmm11,XMMWORD[$L$sigma]
719 movdqu xmm15,XMMWORD[rcx]
720 movdqu xmm7,XMMWORD[16+rcx]
721 movdqu xmm3,XMMWORD[r8]
722 lea rcx,[256+rsp]
723 lea r10,[$L$rot16]
724 lea r11,[$L$rot24]
725
726 pshufd xmm8,xmm11,0x00
727 pshufd xmm9,xmm11,0x55
728 movdqa XMMWORD[64+rsp],xmm8
729 pshufd xmm10,xmm11,0xaa
730 movdqa XMMWORD[80+rsp],xmm9
731 pshufd xmm11,xmm11,0xff
732 movdqa XMMWORD[96+rsp],xmm10
733 movdqa XMMWORD[112+rsp],xmm11
734
735 pshufd xmm12,xmm15,0x00
736 pshufd xmm13,xmm15,0x55
737 movdqa XMMWORD[(128-256)+rcx],xmm12
738 pshufd xmm14,xmm15,0xaa
739 movdqa XMMWORD[(144-256)+rcx],xmm13
740 pshufd xmm15,xmm15,0xff
741 movdqa XMMWORD[(160-256)+rcx],xmm14
742 movdqa XMMWORD[(176-256)+rcx],xmm15
743
744 pshufd xmm4,xmm7,0x00
745 pshufd xmm5,xmm7,0x55
746 movdqa XMMWORD[(192-256)+rcx],xmm4
747 pshufd xmm6,xmm7,0xaa
748 movdqa XMMWORD[(208-256)+rcx],xmm5
749 pshufd xmm7,xmm7,0xff
750 movdqa XMMWORD[(224-256)+rcx],xmm6
751 movdqa XMMWORD[(240-256)+rcx],xmm7
752
753 pshufd xmm0,xmm3,0x00
754 pshufd xmm1,xmm3,0x55
755 paddd xmm0,XMMWORD[$L$inc]
756 pshufd xmm2,xmm3,0xaa
757 movdqa XMMWORD[(272-256)+rcx],xmm1
758 pshufd xmm3,xmm3,0xff
759 movdqa XMMWORD[(288-256)+rcx],xmm2
760 movdqa XMMWORD[(304-256)+rcx],xmm3
761
762 jmp NEAR $L$oop_enter4x
763
764ALIGN 32
765$L$oop_outer4x:
766 movdqa xmm8,XMMWORD[64+rsp]
767 movdqa xmm9,XMMWORD[80+rsp]
768 movdqa xmm10,XMMWORD[96+rsp]
769 movdqa xmm11,XMMWORD[112+rsp]
770 movdqa xmm12,XMMWORD[((128-256))+rcx]
771 movdqa xmm13,XMMWORD[((144-256))+rcx]
772 movdqa xmm14,XMMWORD[((160-256))+rcx]
773 movdqa xmm15,XMMWORD[((176-256))+rcx]
774 movdqa xmm4,XMMWORD[((192-256))+rcx]
775 movdqa xmm5,XMMWORD[((208-256))+rcx]
776 movdqa xmm6,XMMWORD[((224-256))+rcx]
777 movdqa xmm7,XMMWORD[((240-256))+rcx]
778 movdqa xmm0,XMMWORD[((256-256))+rcx]
779 movdqa xmm1,XMMWORD[((272-256))+rcx]
780 movdqa xmm2,XMMWORD[((288-256))+rcx]
781 movdqa xmm3,XMMWORD[((304-256))+rcx]
782 paddd xmm0,XMMWORD[$L$four]
783
784$L$oop_enter4x:
785 movdqa XMMWORD[32+rsp],xmm6
786 movdqa XMMWORD[48+rsp],xmm7
787 movdqa xmm7,XMMWORD[r10]
788 mov eax,10
789 movdqa XMMWORD[(256-256)+rcx],xmm0
790 jmp NEAR $L$oop4x
791
792ALIGN 32
793$L$oop4x:
794 paddd xmm8,xmm12
795 paddd xmm9,xmm13
796 pxor xmm0,xmm8
797 pxor xmm1,xmm9
798DB 102,15,56,0,199
799DB 102,15,56,0,207
800 paddd xmm4,xmm0
801 paddd xmm5,xmm1
802 pxor xmm12,xmm4
803 pxor xmm13,xmm5
804 movdqa xmm6,xmm12
805 pslld xmm12,12
806 psrld xmm6,20
807 movdqa xmm7,xmm13
808 pslld xmm13,12
809 por xmm12,xmm6
810 psrld xmm7,20
811 movdqa xmm6,XMMWORD[r11]
812 por xmm13,xmm7
813 paddd xmm8,xmm12
814 paddd xmm9,xmm13
815 pxor xmm0,xmm8
816 pxor xmm1,xmm9
817DB 102,15,56,0,198
818DB 102,15,56,0,206
819 paddd xmm4,xmm0
820 paddd xmm5,xmm1
821 pxor xmm12,xmm4
822 pxor xmm13,xmm5
823 movdqa xmm7,xmm12
824 pslld xmm12,7
825 psrld xmm7,25
826 movdqa xmm6,xmm13
827 pslld xmm13,7
828 por xmm12,xmm7
829 psrld xmm6,25
830 movdqa xmm7,XMMWORD[r10]
831 por xmm13,xmm6
832 movdqa XMMWORD[rsp],xmm4
833 movdqa XMMWORD[16+rsp],xmm5
834 movdqa xmm4,XMMWORD[32+rsp]
835 movdqa xmm5,XMMWORD[48+rsp]
836 paddd xmm10,xmm14
837 paddd xmm11,xmm15
838 pxor xmm2,xmm10
839 pxor xmm3,xmm11
840DB 102,15,56,0,215
841DB 102,15,56,0,223
842 paddd xmm4,xmm2
843 paddd xmm5,xmm3
844 pxor xmm14,xmm4
845 pxor xmm15,xmm5
846 movdqa xmm6,xmm14
847 pslld xmm14,12
848 psrld xmm6,20
849 movdqa xmm7,xmm15
850 pslld xmm15,12
851 por xmm14,xmm6
852 psrld xmm7,20
853 movdqa xmm6,XMMWORD[r11]
854 por xmm15,xmm7
855 paddd xmm10,xmm14
856 paddd xmm11,xmm15
857 pxor xmm2,xmm10
858 pxor xmm3,xmm11
859DB 102,15,56,0,214
860DB 102,15,56,0,222
861 paddd xmm4,xmm2
862 paddd xmm5,xmm3
863 pxor xmm14,xmm4
864 pxor xmm15,xmm5
865 movdqa xmm7,xmm14
866 pslld xmm14,7
867 psrld xmm7,25
868 movdqa xmm6,xmm15
869 pslld xmm15,7
870 por xmm14,xmm7
871 psrld xmm6,25
872 movdqa xmm7,XMMWORD[r10]
873 por xmm15,xmm6
874 paddd xmm8,xmm13
875 paddd xmm9,xmm14
876 pxor xmm3,xmm8
877 pxor xmm0,xmm9
878DB 102,15,56,0,223
879DB 102,15,56,0,199
880 paddd xmm4,xmm3
881 paddd xmm5,xmm0
882 pxor xmm13,xmm4
883 pxor xmm14,xmm5
884 movdqa xmm6,xmm13
885 pslld xmm13,12
886 psrld xmm6,20
887 movdqa xmm7,xmm14
888 pslld xmm14,12
889 por xmm13,xmm6
890 psrld xmm7,20
891 movdqa xmm6,XMMWORD[r11]
892 por xmm14,xmm7
893 paddd xmm8,xmm13
894 paddd xmm9,xmm14
895 pxor xmm3,xmm8
896 pxor xmm0,xmm9
897DB 102,15,56,0,222
898DB 102,15,56,0,198
899 paddd xmm4,xmm3
900 paddd xmm5,xmm0
901 pxor xmm13,xmm4
902 pxor xmm14,xmm5
903 movdqa xmm7,xmm13
904 pslld xmm13,7
905 psrld xmm7,25
906 movdqa xmm6,xmm14
907 pslld xmm14,7
908 por xmm13,xmm7
909 psrld xmm6,25
910 movdqa xmm7,XMMWORD[r10]
911 por xmm14,xmm6
912 movdqa XMMWORD[32+rsp],xmm4
913 movdqa XMMWORD[48+rsp],xmm5
914 movdqa xmm4,XMMWORD[rsp]
915 movdqa xmm5,XMMWORD[16+rsp]
916 paddd xmm10,xmm15
917 paddd xmm11,xmm12
918 pxor xmm1,xmm10
919 pxor xmm2,xmm11
920DB 102,15,56,0,207
921DB 102,15,56,0,215
922 paddd xmm4,xmm1
923 paddd xmm5,xmm2
924 pxor xmm15,xmm4
925 pxor xmm12,xmm5
926 movdqa xmm6,xmm15
927 pslld xmm15,12
928 psrld xmm6,20
929 movdqa xmm7,xmm12
930 pslld xmm12,12
931 por xmm15,xmm6
932 psrld xmm7,20
933 movdqa xmm6,XMMWORD[r11]
934 por xmm12,xmm7
935 paddd xmm10,xmm15
936 paddd xmm11,xmm12
937 pxor xmm1,xmm10
938 pxor xmm2,xmm11
939DB 102,15,56,0,206
940DB 102,15,56,0,214
941 paddd xmm4,xmm1
942 paddd xmm5,xmm2
943 pxor xmm15,xmm4
944 pxor xmm12,xmm5
945 movdqa xmm7,xmm15
946 pslld xmm15,7
947 psrld xmm7,25
948 movdqa xmm6,xmm12
949 pslld xmm12,7
950 por xmm15,xmm7
951 psrld xmm6,25
952 movdqa xmm7,XMMWORD[r10]
953 por xmm12,xmm6
954 dec eax
955 jnz NEAR $L$oop4x
956
957 paddd xmm8,XMMWORD[64+rsp]
958 paddd xmm9,XMMWORD[80+rsp]
959 paddd xmm10,XMMWORD[96+rsp]
960 paddd xmm11,XMMWORD[112+rsp]
961
962 movdqa xmm6,xmm8
963 punpckldq xmm8,xmm9
964 movdqa xmm7,xmm10
965 punpckldq xmm10,xmm11
966 punpckhdq xmm6,xmm9
967 punpckhdq xmm7,xmm11
968 movdqa xmm9,xmm8
969 punpcklqdq xmm8,xmm10
970 movdqa xmm11,xmm6
971 punpcklqdq xmm6,xmm7
972 punpckhqdq xmm9,xmm10
973 punpckhqdq xmm11,xmm7
974 paddd xmm12,XMMWORD[((128-256))+rcx]
975 paddd xmm13,XMMWORD[((144-256))+rcx]
976 paddd xmm14,XMMWORD[((160-256))+rcx]
977 paddd xmm15,XMMWORD[((176-256))+rcx]
978
979 movdqa XMMWORD[rsp],xmm8
980 movdqa XMMWORD[16+rsp],xmm9
981 movdqa xmm8,XMMWORD[32+rsp]
982 movdqa xmm9,XMMWORD[48+rsp]
983
984 movdqa xmm10,xmm12
985 punpckldq xmm12,xmm13
986 movdqa xmm7,xmm14
987 punpckldq xmm14,xmm15
988 punpckhdq xmm10,xmm13
989 punpckhdq xmm7,xmm15
990 movdqa xmm13,xmm12
991 punpcklqdq xmm12,xmm14
992 movdqa xmm15,xmm10
993 punpcklqdq xmm10,xmm7
994 punpckhqdq xmm13,xmm14
995 punpckhqdq xmm15,xmm7
996 paddd xmm4,XMMWORD[((192-256))+rcx]
997 paddd xmm5,XMMWORD[((208-256))+rcx]
998 paddd xmm8,XMMWORD[((224-256))+rcx]
999 paddd xmm9,XMMWORD[((240-256))+rcx]
1000
1001 movdqa XMMWORD[32+rsp],xmm6
1002 movdqa XMMWORD[48+rsp],xmm11
1003
1004 movdqa xmm14,xmm4
1005 punpckldq xmm4,xmm5
1006 movdqa xmm7,xmm8
1007 punpckldq xmm8,xmm9
1008 punpckhdq xmm14,xmm5
1009 punpckhdq xmm7,xmm9
1010 movdqa xmm5,xmm4
1011 punpcklqdq xmm4,xmm8
1012 movdqa xmm9,xmm14
1013 punpcklqdq xmm14,xmm7
1014 punpckhqdq xmm5,xmm8
1015 punpckhqdq xmm9,xmm7
1016 paddd xmm0,XMMWORD[((256-256))+rcx]
1017 paddd xmm1,XMMWORD[((272-256))+rcx]
1018 paddd xmm2,XMMWORD[((288-256))+rcx]
1019 paddd xmm3,XMMWORD[((304-256))+rcx]
1020
1021 movdqa xmm8,xmm0
1022 punpckldq xmm0,xmm1
1023 movdqa xmm7,xmm2
1024 punpckldq xmm2,xmm3
1025 punpckhdq xmm8,xmm1
1026 punpckhdq xmm7,xmm3
1027 movdqa xmm1,xmm0
1028 punpcklqdq xmm0,xmm2
1029 movdqa xmm3,xmm8
1030 punpcklqdq xmm8,xmm7
1031 punpckhqdq xmm1,xmm2
1032 punpckhqdq xmm3,xmm7
1033 cmp rdx,64*4
1034 jb NEAR $L$tail4x
1035
1036 movdqu xmm6,XMMWORD[rsi]
1037 movdqu xmm11,XMMWORD[16+rsi]
1038 movdqu xmm2,XMMWORD[32+rsi]
1039 movdqu xmm7,XMMWORD[48+rsi]
1040 pxor xmm6,XMMWORD[rsp]
1041 pxor xmm11,xmm12
1042 pxor xmm2,xmm4
1043 pxor xmm7,xmm0
1044
1045 movdqu XMMWORD[rdi],xmm6
1046 movdqu xmm6,XMMWORD[64+rsi]
1047 movdqu XMMWORD[16+rdi],xmm11
1048 movdqu xmm11,XMMWORD[80+rsi]
1049 movdqu XMMWORD[32+rdi],xmm2
1050 movdqu xmm2,XMMWORD[96+rsi]
1051 movdqu XMMWORD[48+rdi],xmm7
1052 movdqu xmm7,XMMWORD[112+rsi]
1053 lea rsi,[128+rsi]
1054 pxor xmm6,XMMWORD[16+rsp]
1055 pxor xmm11,xmm13
1056 pxor xmm2,xmm5
1057 pxor xmm7,xmm1
1058
1059 movdqu XMMWORD[64+rdi],xmm6
1060 movdqu xmm6,XMMWORD[rsi]
1061 movdqu XMMWORD[80+rdi],xmm11
1062 movdqu xmm11,XMMWORD[16+rsi]
1063 movdqu XMMWORD[96+rdi],xmm2
1064 movdqu xmm2,XMMWORD[32+rsi]
1065 movdqu XMMWORD[112+rdi],xmm7
1066 lea rdi,[128+rdi]
1067 movdqu xmm7,XMMWORD[48+rsi]
1068 pxor xmm6,XMMWORD[32+rsp]
1069 pxor xmm11,xmm10
1070 pxor xmm2,xmm14
1071 pxor xmm7,xmm8
1072
1073 movdqu XMMWORD[rdi],xmm6
1074 movdqu xmm6,XMMWORD[64+rsi]
1075 movdqu XMMWORD[16+rdi],xmm11
1076 movdqu xmm11,XMMWORD[80+rsi]
1077 movdqu XMMWORD[32+rdi],xmm2
1078 movdqu xmm2,XMMWORD[96+rsi]
1079 movdqu XMMWORD[48+rdi],xmm7
1080 movdqu xmm7,XMMWORD[112+rsi]
1081 lea rsi,[128+rsi]
1082 pxor xmm6,XMMWORD[48+rsp]
1083 pxor xmm11,xmm15
1084 pxor xmm2,xmm9
1085 pxor xmm7,xmm3
1086 movdqu XMMWORD[64+rdi],xmm6
1087 movdqu XMMWORD[80+rdi],xmm11
1088 movdqu XMMWORD[96+rdi],xmm2
1089 movdqu XMMWORD[112+rdi],xmm7
1090 lea rdi,[128+rdi]
1091
1092 sub rdx,64*4
1093 jnz NEAR $L$oop_outer4x
1094
1095 jmp NEAR $L$done4x
1096
1097$L$tail4x:
1098 cmp rdx,192
1099 jae NEAR $L$192_or_more4x
1100 cmp rdx,128
1101 jae NEAR $L$128_or_more4x
1102 cmp rdx,64
1103 jae NEAR $L$64_or_more4x
1104
1105
1106 xor r10,r10
1107
1108 movdqa XMMWORD[16+rsp],xmm12
1109 movdqa XMMWORD[32+rsp],xmm4
1110 movdqa XMMWORD[48+rsp],xmm0
1111 jmp NEAR $L$oop_tail4x
1112
1113ALIGN 32
1114$L$64_or_more4x:
1115 movdqu xmm6,XMMWORD[rsi]
1116 movdqu xmm11,XMMWORD[16+rsi]
1117 movdqu xmm2,XMMWORD[32+rsi]
1118 movdqu xmm7,XMMWORD[48+rsi]
1119 pxor xmm6,XMMWORD[rsp]
1120 pxor xmm11,xmm12
1121 pxor xmm2,xmm4
1122 pxor xmm7,xmm0
1123 movdqu XMMWORD[rdi],xmm6
1124 movdqu XMMWORD[16+rdi],xmm11
1125 movdqu XMMWORD[32+rdi],xmm2
1126 movdqu XMMWORD[48+rdi],xmm7
1127 je NEAR $L$done4x
1128
1129 movdqa xmm6,XMMWORD[16+rsp]
1130 lea rsi,[64+rsi]
1131 xor r10,r10
1132 movdqa XMMWORD[rsp],xmm6
1133 movdqa XMMWORD[16+rsp],xmm13
1134 lea rdi,[64+rdi]
1135 movdqa XMMWORD[32+rsp],xmm5
1136 sub rdx,64
1137 movdqa XMMWORD[48+rsp],xmm1
1138 jmp NEAR $L$oop_tail4x
1139
1140ALIGN 32
1141$L$128_or_more4x:
1142 movdqu xmm6,XMMWORD[rsi]
1143 movdqu xmm11,XMMWORD[16+rsi]
1144 movdqu xmm2,XMMWORD[32+rsi]
1145 movdqu xmm7,XMMWORD[48+rsi]
1146 pxor xmm6,XMMWORD[rsp]
1147 pxor xmm11,xmm12
1148 pxor xmm2,xmm4
1149 pxor xmm7,xmm0
1150
1151 movdqu XMMWORD[rdi],xmm6
1152 movdqu xmm6,XMMWORD[64+rsi]
1153 movdqu XMMWORD[16+rdi],xmm11
1154 movdqu xmm11,XMMWORD[80+rsi]
1155 movdqu XMMWORD[32+rdi],xmm2
1156 movdqu xmm2,XMMWORD[96+rsi]
1157 movdqu XMMWORD[48+rdi],xmm7
1158 movdqu xmm7,XMMWORD[112+rsi]
1159 pxor xmm6,XMMWORD[16+rsp]
1160 pxor xmm11,xmm13
1161 pxor xmm2,xmm5
1162 pxor xmm7,xmm1
1163 movdqu XMMWORD[64+rdi],xmm6
1164 movdqu XMMWORD[80+rdi],xmm11
1165 movdqu XMMWORD[96+rdi],xmm2
1166 movdqu XMMWORD[112+rdi],xmm7
1167 je NEAR $L$done4x
1168
1169 movdqa xmm6,XMMWORD[32+rsp]
1170 lea rsi,[128+rsi]
1171 xor r10,r10
1172 movdqa XMMWORD[rsp],xmm6
1173 movdqa XMMWORD[16+rsp],xmm10
1174 lea rdi,[128+rdi]
1175 movdqa XMMWORD[32+rsp],xmm14
1176 sub rdx,128
1177 movdqa XMMWORD[48+rsp],xmm8
1178 jmp NEAR $L$oop_tail4x
1179
1180ALIGN 32
1181$L$192_or_more4x:
1182 movdqu xmm6,XMMWORD[rsi]
1183 movdqu xmm11,XMMWORD[16+rsi]
1184 movdqu xmm2,XMMWORD[32+rsi]
1185 movdqu xmm7,XMMWORD[48+rsi]
1186 pxor xmm6,XMMWORD[rsp]
1187 pxor xmm11,xmm12
1188 pxor xmm2,xmm4
1189 pxor xmm7,xmm0
1190
1191 movdqu XMMWORD[rdi],xmm6
1192 movdqu xmm6,XMMWORD[64+rsi]
1193 movdqu XMMWORD[16+rdi],xmm11
1194 movdqu xmm11,XMMWORD[80+rsi]
1195 movdqu XMMWORD[32+rdi],xmm2
1196 movdqu xmm2,XMMWORD[96+rsi]
1197 movdqu XMMWORD[48+rdi],xmm7
1198 movdqu xmm7,XMMWORD[112+rsi]
1199 lea rsi,[128+rsi]
1200 pxor xmm6,XMMWORD[16+rsp]
1201 pxor xmm11,xmm13
1202 pxor xmm2,xmm5
1203 pxor xmm7,xmm1
1204
1205 movdqu XMMWORD[64+rdi],xmm6
1206 movdqu xmm6,XMMWORD[rsi]
1207 movdqu XMMWORD[80+rdi],xmm11
1208 movdqu xmm11,XMMWORD[16+rsi]
1209 movdqu XMMWORD[96+rdi],xmm2
1210 movdqu xmm2,XMMWORD[32+rsi]
1211 movdqu XMMWORD[112+rdi],xmm7
1212 lea rdi,[128+rdi]
1213 movdqu xmm7,XMMWORD[48+rsi]
1214 pxor xmm6,XMMWORD[32+rsp]
1215 pxor xmm11,xmm10
1216 pxor xmm2,xmm14
1217 pxor xmm7,xmm8
1218 movdqu XMMWORD[rdi],xmm6
1219 movdqu XMMWORD[16+rdi],xmm11
1220 movdqu XMMWORD[32+rdi],xmm2
1221 movdqu XMMWORD[48+rdi],xmm7
1222 je NEAR $L$done4x
1223
1224 movdqa xmm6,XMMWORD[48+rsp]
1225 lea rsi,[64+rsi]
1226 xor r10,r10
1227 movdqa XMMWORD[rsp],xmm6
1228 movdqa XMMWORD[16+rsp],xmm15
1229 lea rdi,[64+rdi]
1230 movdqa XMMWORD[32+rsp],xmm9
1231 sub rdx,192
1232 movdqa XMMWORD[48+rsp],xmm3
1233
1234$L$oop_tail4x:
1235 movzx eax,BYTE[r10*1+rsi]
1236 movzx ecx,BYTE[r10*1+rsp]
1237 lea r10,[1+r10]
1238 xor eax,ecx
1239 mov BYTE[((-1))+r10*1+rdi],al
1240 dec rdx
1241 jnz NEAR $L$oop_tail4x
1242
1243$L$done4x:
1244 movaps xmm6,XMMWORD[((-168))+r9]
1245 movaps xmm7,XMMWORD[((-152))+r9]
1246 movaps xmm8,XMMWORD[((-136))+r9]
1247 movaps xmm9,XMMWORD[((-120))+r9]
1248 movaps xmm10,XMMWORD[((-104))+r9]
1249 movaps xmm11,XMMWORD[((-88))+r9]
1250 movaps xmm12,XMMWORD[((-72))+r9]
1251 movaps xmm13,XMMWORD[((-56))+r9]
1252 movaps xmm14,XMMWORD[((-40))+r9]
1253 movaps xmm15,XMMWORD[((-24))+r9]
1254 lea rsp,[r9]
1255
1256$L$4x_epilogue:
1257 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1258 mov rsi,QWORD[16+rsp]
1259 DB 0F3h,0C3h ;repret
1260
1261$L$SEH_end_ChaCha20_4x:
1262EXTERN __imp_RtlVirtualUnwind
1263
1264ALIGN 16
1265se_handler:
1266 push rsi
1267 push rdi
1268 push rbx
1269 push rbp
1270 push r12
1271 push r13
1272 push r14
1273 push r15
1274 pushfq
1275 sub rsp,64
1276
1277 mov rax,QWORD[120+r8]
1278 mov rbx,QWORD[248+r8]
1279
1280 mov rsi,QWORD[8+r9]
1281 mov r11,QWORD[56+r9]
1282
1283 lea r10,[$L$ctr32_body]
1284 cmp rbx,r10
1285 jb NEAR $L$common_seh_tail
1286
1287 mov rax,QWORD[152+r8]
1288
1289 lea r10,[$L$no_data]
1290 cmp rbx,r10
1291 jae NEAR $L$common_seh_tail
1292
1293 lea rax,[((64+24+48))+rax]
1294
1295 mov rbx,QWORD[((-8))+rax]
1296 mov rbp,QWORD[((-16))+rax]
1297 mov r12,QWORD[((-24))+rax]
1298 mov r13,QWORD[((-32))+rax]
1299 mov r14,QWORD[((-40))+rax]
1300 mov r15,QWORD[((-48))+rax]
1301 mov QWORD[144+r8],rbx
1302 mov QWORD[160+r8],rbp
1303 mov QWORD[216+r8],r12
1304 mov QWORD[224+r8],r13
1305 mov QWORD[232+r8],r14
1306 mov QWORD[240+r8],r15
1307
1308$L$common_seh_tail:
1309 mov rdi,QWORD[8+rax]
1310 mov rsi,QWORD[16+rax]
1311 mov QWORD[152+r8],rax
1312 mov QWORD[168+r8],rsi
1313 mov QWORD[176+r8],rdi
1314
1315 mov rdi,QWORD[40+r9]
1316 mov rsi,r8
1317 mov ecx,154
1318 DD 0xa548f3fc
1319
1320 mov rsi,r9
1321 xor rcx,rcx
1322 mov rdx,QWORD[8+rsi]
1323 mov r8,QWORD[rsi]
1324 mov r9,QWORD[16+rsi]
1325 mov r10,QWORD[40+rsi]
1326 lea r11,[56+rsi]
1327 lea r12,[24+rsi]
1328 mov QWORD[32+rsp],r10
1329 mov QWORD[40+rsp],r11
1330 mov QWORD[48+rsp],r12
1331 mov QWORD[56+rsp],rcx
1332 call QWORD[__imp_RtlVirtualUnwind]
1333
1334 mov eax,1
1335 add rsp,64
1336 popfq
1337 pop r15
1338 pop r14
1339 pop r13
1340 pop r12
1341 pop rbp
1342 pop rbx
1343 pop rdi
1344 pop rsi
1345 DB 0F3h,0C3h ;repret
1346
1347
1348
1349ALIGN 16
1350simd_handler:
1351 push rsi
1352 push rdi
1353 push rbx
1354 push rbp
1355 push r12
1356 push r13
1357 push r14
1358 push r15
1359 pushfq
1360 sub rsp,64
1361
1362 mov rax,QWORD[120+r8]
1363 mov rbx,QWORD[248+r8]
1364
1365 mov rsi,QWORD[8+r9]
1366 mov r11,QWORD[56+r9]
1367
1368 mov r10d,DWORD[r11]
1369 lea r10,[r10*1+rsi]
1370 cmp rbx,r10
1371 jb NEAR $L$common_seh_tail
1372
1373 mov rax,QWORD[192+r8]
1374
1375 mov r10d,DWORD[4+r11]
1376 mov ecx,DWORD[8+r11]
1377 lea r10,[r10*1+rsi]
1378 cmp rbx,r10
1379 jae NEAR $L$common_seh_tail
1380
1381 neg rcx
1382 lea rsi,[((-8))+rcx*1+rax]
1383 lea rdi,[512+r8]
1384 neg ecx
1385 shr ecx,3
1386 DD 0xa548f3fc
1387
1388 jmp NEAR $L$common_seh_tail
1389
1390
1391section .pdata rdata align=4
1392ALIGN 4
1393 DD $L$SEH_begin_ChaCha20_ctr32 wrt ..imagebase
1394 DD $L$SEH_end_ChaCha20_ctr32 wrt ..imagebase
1395 DD $L$SEH_info_ChaCha20_ctr32 wrt ..imagebase
1396
1397 DD $L$SEH_begin_ChaCha20_ssse3 wrt ..imagebase
1398 DD $L$SEH_end_ChaCha20_ssse3 wrt ..imagebase
1399 DD $L$SEH_info_ChaCha20_ssse3 wrt ..imagebase
1400
1401 DD $L$SEH_begin_ChaCha20_128 wrt ..imagebase
1402 DD $L$SEH_end_ChaCha20_128 wrt ..imagebase
1403 DD $L$SEH_info_ChaCha20_128 wrt ..imagebase
1404
1405 DD $L$SEH_begin_ChaCha20_4x wrt ..imagebase
1406 DD $L$SEH_end_ChaCha20_4x wrt ..imagebase
1407 DD $L$SEH_info_ChaCha20_4x wrt ..imagebase
1408section .xdata rdata align=8
1409ALIGN 8
1410$L$SEH_info_ChaCha20_ctr32:
1411DB 9,0,0,0
1412 DD se_handler wrt ..imagebase
1413
1414$L$SEH_info_ChaCha20_ssse3:
1415DB 9,0,0,0
1416 DD simd_handler wrt ..imagebase
1417 DD $L$ssse3_body wrt ..imagebase,$L$ssse3_epilogue wrt ..imagebase
1418 DD 0x20,0
1419
1420$L$SEH_info_ChaCha20_128:
1421DB 9,0,0,0
1422 DD simd_handler wrt ..imagebase
1423 DD $L$128_body wrt ..imagebase,$L$128_epilogue wrt ..imagebase
1424 DD 0x60,0
1425
1426$L$SEH_info_ChaCha20_4x:
1427DB 9,0,0,0
1428 DD simd_handler wrt ..imagebase
1429 DD $L$4x_body wrt ..imagebase,$L$4x_epilogue wrt ..imagebase
1430 DD 0xa0,0
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette