VirtualBox

source: vbox/trunk/src/libs/openssl-1.1.0g/crypto/genasm-nasm/ghash-x86_64.S@ 69881

Last change on this file since 69881 was 69881, checked in by vboxsync, 7 years ago

Update OpenSSL to 1.1.0g.
bugref:8070: src/libs maintenance

  • Property svn:eol-style set to native
File size: 38.3 KB
Line 
1default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
7EXTERN OPENSSL_ia32cap_P
8
9global gcm_gmult_4bit
10
11ALIGN 16
12gcm_gmult_4bit:
13 mov QWORD[8+rsp],rdi ;WIN64 prologue
14 mov QWORD[16+rsp],rsi
15 mov rax,rsp
16$L$SEH_begin_gcm_gmult_4bit:
17 mov rdi,rcx
18 mov rsi,rdx
19
20
21 push rbx
22 push rbp
23 push r12
24$L$gmult_prologue:
25
26 movzx r8,BYTE[15+rdi]
27 lea r11,[$L$rem_4bit]
28 xor rax,rax
29 xor rbx,rbx
30 mov al,r8b
31 mov bl,r8b
32 shl al,4
33 mov rcx,14
34 mov r8,QWORD[8+rax*1+rsi]
35 mov r9,QWORD[rax*1+rsi]
36 and bl,0xf0
37 mov rdx,r8
38 jmp NEAR $L$oop1
39
40ALIGN 16
41$L$oop1:
42 shr r8,4
43 and rdx,0xf
44 mov r10,r9
45 mov al,BYTE[rcx*1+rdi]
46 shr r9,4
47 xor r8,QWORD[8+rbx*1+rsi]
48 shl r10,60
49 xor r9,QWORD[rbx*1+rsi]
50 mov bl,al
51 xor r9,QWORD[rdx*8+r11]
52 mov rdx,r8
53 shl al,4
54 xor r8,r10
55 dec rcx
56 js NEAR $L$break1
57
58 shr r8,4
59 and rdx,0xf
60 mov r10,r9
61 shr r9,4
62 xor r8,QWORD[8+rax*1+rsi]
63 shl r10,60
64 xor r9,QWORD[rax*1+rsi]
65 and bl,0xf0
66 xor r9,QWORD[rdx*8+r11]
67 mov rdx,r8
68 xor r8,r10
69 jmp NEAR $L$oop1
70
71ALIGN 16
72$L$break1:
73 shr r8,4
74 and rdx,0xf
75 mov r10,r9
76 shr r9,4
77 xor r8,QWORD[8+rax*1+rsi]
78 shl r10,60
79 xor r9,QWORD[rax*1+rsi]
80 and bl,0xf0
81 xor r9,QWORD[rdx*8+r11]
82 mov rdx,r8
83 xor r8,r10
84
85 shr r8,4
86 and rdx,0xf
87 mov r10,r9
88 shr r9,4
89 xor r8,QWORD[8+rbx*1+rsi]
90 shl r10,60
91 xor r9,QWORD[rbx*1+rsi]
92 xor r8,r10
93 xor r9,QWORD[rdx*8+r11]
94
95 bswap r8
96 bswap r9
97 mov QWORD[8+rdi],r8
98 mov QWORD[rdi],r9
99
100 mov rbx,QWORD[16+rsp]
101 lea rsp,[24+rsp]
102$L$gmult_epilogue:
103 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
104 mov rsi,QWORD[16+rsp]
105 DB 0F3h,0C3h ;repret
106$L$SEH_end_gcm_gmult_4bit:
107global gcm_ghash_4bit
108
109ALIGN 16
110gcm_ghash_4bit:
111 mov QWORD[8+rsp],rdi ;WIN64 prologue
112 mov QWORD[16+rsp],rsi
113 mov rax,rsp
114$L$SEH_begin_gcm_ghash_4bit:
115 mov rdi,rcx
116 mov rsi,rdx
117 mov rdx,r8
118 mov rcx,r9
119
120
121 push rbx
122 push rbp
123 push r12
124 push r13
125 push r14
126 push r15
127 sub rsp,280
128$L$ghash_prologue:
129 mov r14,rdx
130 mov r15,rcx
131 sub rsi,-128
132 lea rbp,[((16+128))+rsp]
133 xor edx,edx
134 mov r8,QWORD[((0+0-128))+rsi]
135 mov rax,QWORD[((0+8-128))+rsi]
136 mov dl,al
137 shr rax,4
138 mov r10,r8
139 shr r8,4
140 mov r9,QWORD[((16+0-128))+rsi]
141 shl dl,4
142 mov rbx,QWORD[((16+8-128))+rsi]
143 shl r10,60
144 mov BYTE[rsp],dl
145 or rax,r10
146 mov dl,bl
147 shr rbx,4
148 mov r10,r9
149 shr r9,4
150 mov QWORD[rbp],r8
151 mov r8,QWORD[((32+0-128))+rsi]
152 shl dl,4
153 mov QWORD[((0-128))+rbp],rax
154 mov rax,QWORD[((32+8-128))+rsi]
155 shl r10,60
156 mov BYTE[1+rsp],dl
157 or rbx,r10
158 mov dl,al
159 shr rax,4
160 mov r10,r8
161 shr r8,4
162 mov QWORD[8+rbp],r9
163 mov r9,QWORD[((48+0-128))+rsi]
164 shl dl,4
165 mov QWORD[((8-128))+rbp],rbx
166 mov rbx,QWORD[((48+8-128))+rsi]
167 shl r10,60
168 mov BYTE[2+rsp],dl
169 or rax,r10
170 mov dl,bl
171 shr rbx,4
172 mov r10,r9
173 shr r9,4
174 mov QWORD[16+rbp],r8
175 mov r8,QWORD[((64+0-128))+rsi]
176 shl dl,4
177 mov QWORD[((16-128))+rbp],rax
178 mov rax,QWORD[((64+8-128))+rsi]
179 shl r10,60
180 mov BYTE[3+rsp],dl
181 or rbx,r10
182 mov dl,al
183 shr rax,4
184 mov r10,r8
185 shr r8,4
186 mov QWORD[24+rbp],r9
187 mov r9,QWORD[((80+0-128))+rsi]
188 shl dl,4
189 mov QWORD[((24-128))+rbp],rbx
190 mov rbx,QWORD[((80+8-128))+rsi]
191 shl r10,60
192 mov BYTE[4+rsp],dl
193 or rax,r10
194 mov dl,bl
195 shr rbx,4
196 mov r10,r9
197 shr r9,4
198 mov QWORD[32+rbp],r8
199 mov r8,QWORD[((96+0-128))+rsi]
200 shl dl,4
201 mov QWORD[((32-128))+rbp],rax
202 mov rax,QWORD[((96+8-128))+rsi]
203 shl r10,60
204 mov BYTE[5+rsp],dl
205 or rbx,r10
206 mov dl,al
207 shr rax,4
208 mov r10,r8
209 shr r8,4
210 mov QWORD[40+rbp],r9
211 mov r9,QWORD[((112+0-128))+rsi]
212 shl dl,4
213 mov QWORD[((40-128))+rbp],rbx
214 mov rbx,QWORD[((112+8-128))+rsi]
215 shl r10,60
216 mov BYTE[6+rsp],dl
217 or rax,r10
218 mov dl,bl
219 shr rbx,4
220 mov r10,r9
221 shr r9,4
222 mov QWORD[48+rbp],r8
223 mov r8,QWORD[((128+0-128))+rsi]
224 shl dl,4
225 mov QWORD[((48-128))+rbp],rax
226 mov rax,QWORD[((128+8-128))+rsi]
227 shl r10,60
228 mov BYTE[7+rsp],dl
229 or rbx,r10
230 mov dl,al
231 shr rax,4
232 mov r10,r8
233 shr r8,4
234 mov QWORD[56+rbp],r9
235 mov r9,QWORD[((144+0-128))+rsi]
236 shl dl,4
237 mov QWORD[((56-128))+rbp],rbx
238 mov rbx,QWORD[((144+8-128))+rsi]
239 shl r10,60
240 mov BYTE[8+rsp],dl
241 or rax,r10
242 mov dl,bl
243 shr rbx,4
244 mov r10,r9
245 shr r9,4
246 mov QWORD[64+rbp],r8
247 mov r8,QWORD[((160+0-128))+rsi]
248 shl dl,4
249 mov QWORD[((64-128))+rbp],rax
250 mov rax,QWORD[((160+8-128))+rsi]
251 shl r10,60
252 mov BYTE[9+rsp],dl
253 or rbx,r10
254 mov dl,al
255 shr rax,4
256 mov r10,r8
257 shr r8,4
258 mov QWORD[72+rbp],r9
259 mov r9,QWORD[((176+0-128))+rsi]
260 shl dl,4
261 mov QWORD[((72-128))+rbp],rbx
262 mov rbx,QWORD[((176+8-128))+rsi]
263 shl r10,60
264 mov BYTE[10+rsp],dl
265 or rax,r10
266 mov dl,bl
267 shr rbx,4
268 mov r10,r9
269 shr r9,4
270 mov QWORD[80+rbp],r8
271 mov r8,QWORD[((192+0-128))+rsi]
272 shl dl,4
273 mov QWORD[((80-128))+rbp],rax
274 mov rax,QWORD[((192+8-128))+rsi]
275 shl r10,60
276 mov BYTE[11+rsp],dl
277 or rbx,r10
278 mov dl,al
279 shr rax,4
280 mov r10,r8
281 shr r8,4
282 mov QWORD[88+rbp],r9
283 mov r9,QWORD[((208+0-128))+rsi]
284 shl dl,4
285 mov QWORD[((88-128))+rbp],rbx
286 mov rbx,QWORD[((208+8-128))+rsi]
287 shl r10,60
288 mov BYTE[12+rsp],dl
289 or rax,r10
290 mov dl,bl
291 shr rbx,4
292 mov r10,r9
293 shr r9,4
294 mov QWORD[96+rbp],r8
295 mov r8,QWORD[((224+0-128))+rsi]
296 shl dl,4
297 mov QWORD[((96-128))+rbp],rax
298 mov rax,QWORD[((224+8-128))+rsi]
299 shl r10,60
300 mov BYTE[13+rsp],dl
301 or rbx,r10
302 mov dl,al
303 shr rax,4
304 mov r10,r8
305 shr r8,4
306 mov QWORD[104+rbp],r9
307 mov r9,QWORD[((240+0-128))+rsi]
308 shl dl,4
309 mov QWORD[((104-128))+rbp],rbx
310 mov rbx,QWORD[((240+8-128))+rsi]
311 shl r10,60
312 mov BYTE[14+rsp],dl
313 or rax,r10
314 mov dl,bl
315 shr rbx,4
316 mov r10,r9
317 shr r9,4
318 mov QWORD[112+rbp],r8
319 shl dl,4
320 mov QWORD[((112-128))+rbp],rax
321 shl r10,60
322 mov BYTE[15+rsp],dl
323 or rbx,r10
324 mov QWORD[120+rbp],r9
325 mov QWORD[((120-128))+rbp],rbx
326 add rsi,-128
327 mov r8,QWORD[8+rdi]
328 mov r9,QWORD[rdi]
329 add r15,r14
330 lea r11,[$L$rem_8bit]
331 jmp NEAR $L$outer_loop
332ALIGN 16
333$L$outer_loop:
334 xor r9,QWORD[r14]
335 mov rdx,QWORD[8+r14]
336 lea r14,[16+r14]
337 xor rdx,r8
338 mov QWORD[rdi],r9
339 mov QWORD[8+rdi],rdx
340 shr rdx,32
341 xor rax,rax
342 rol edx,8
343 mov al,dl
344 movzx ebx,dl
345 shl al,4
346 shr ebx,4
347 rol edx,8
348 mov r8,QWORD[8+rax*1+rsi]
349 mov r9,QWORD[rax*1+rsi]
350 mov al,dl
351 movzx ecx,dl
352 shl al,4
353 movzx r12,BYTE[rbx*1+rsp]
354 shr ecx,4
355 xor r12,r8
356 mov r10,r9
357 shr r8,8
358 movzx r12,r12b
359 shr r9,8
360 xor r8,QWORD[((-128))+rbx*8+rbp]
361 shl r10,56
362 xor r9,QWORD[rbx*8+rbp]
363 rol edx,8
364 xor r8,QWORD[8+rax*1+rsi]
365 xor r9,QWORD[rax*1+rsi]
366 mov al,dl
367 xor r8,r10
368 movzx r12,WORD[r12*2+r11]
369 movzx ebx,dl
370 shl al,4
371 movzx r13,BYTE[rcx*1+rsp]
372 shr ebx,4
373 shl r12,48
374 xor r13,r8
375 mov r10,r9
376 xor r9,r12
377 shr r8,8
378 movzx r13,r13b
379 shr r9,8
380 xor r8,QWORD[((-128))+rcx*8+rbp]
381 shl r10,56
382 xor r9,QWORD[rcx*8+rbp]
383 rol edx,8
384 xor r8,QWORD[8+rax*1+rsi]
385 xor r9,QWORD[rax*1+rsi]
386 mov al,dl
387 xor r8,r10
388 movzx r13,WORD[r13*2+r11]
389 movzx ecx,dl
390 shl al,4
391 movzx r12,BYTE[rbx*1+rsp]
392 shr ecx,4
393 shl r13,48
394 xor r12,r8
395 mov r10,r9
396 xor r9,r13
397 shr r8,8
398 movzx r12,r12b
399 mov edx,DWORD[8+rdi]
400 shr r9,8
401 xor r8,QWORD[((-128))+rbx*8+rbp]
402 shl r10,56
403 xor r9,QWORD[rbx*8+rbp]
404 rol edx,8
405 xor r8,QWORD[8+rax*1+rsi]
406 xor r9,QWORD[rax*1+rsi]
407 mov al,dl
408 xor r8,r10
409 movzx r12,WORD[r12*2+r11]
410 movzx ebx,dl
411 shl al,4
412 movzx r13,BYTE[rcx*1+rsp]
413 shr ebx,4
414 shl r12,48
415 xor r13,r8
416 mov r10,r9
417 xor r9,r12
418 shr r8,8
419 movzx r13,r13b
420 shr r9,8
421 xor r8,QWORD[((-128))+rcx*8+rbp]
422 shl r10,56
423 xor r9,QWORD[rcx*8+rbp]
424 rol edx,8
425 xor r8,QWORD[8+rax*1+rsi]
426 xor r9,QWORD[rax*1+rsi]
427 mov al,dl
428 xor r8,r10
429 movzx r13,WORD[r13*2+r11]
430 movzx ecx,dl
431 shl al,4
432 movzx r12,BYTE[rbx*1+rsp]
433 shr ecx,4
434 shl r13,48
435 xor r12,r8
436 mov r10,r9
437 xor r9,r13
438 shr r8,8
439 movzx r12,r12b
440 shr r9,8
441 xor r8,QWORD[((-128))+rbx*8+rbp]
442 shl r10,56
443 xor r9,QWORD[rbx*8+rbp]
444 rol edx,8
445 xor r8,QWORD[8+rax*1+rsi]
446 xor r9,QWORD[rax*1+rsi]
447 mov al,dl
448 xor r8,r10
449 movzx r12,WORD[r12*2+r11]
450 movzx ebx,dl
451 shl al,4
452 movzx r13,BYTE[rcx*1+rsp]
453 shr ebx,4
454 shl r12,48
455 xor r13,r8
456 mov r10,r9
457 xor r9,r12
458 shr r8,8
459 movzx r13,r13b
460 shr r9,8
461 xor r8,QWORD[((-128))+rcx*8+rbp]
462 shl r10,56
463 xor r9,QWORD[rcx*8+rbp]
464 rol edx,8
465 xor r8,QWORD[8+rax*1+rsi]
466 xor r9,QWORD[rax*1+rsi]
467 mov al,dl
468 xor r8,r10
469 movzx r13,WORD[r13*2+r11]
470 movzx ecx,dl
471 shl al,4
472 movzx r12,BYTE[rbx*1+rsp]
473 shr ecx,4
474 shl r13,48
475 xor r12,r8
476 mov r10,r9
477 xor r9,r13
478 shr r8,8
479 movzx r12,r12b
480 mov edx,DWORD[4+rdi]
481 shr r9,8
482 xor r8,QWORD[((-128))+rbx*8+rbp]
483 shl r10,56
484 xor r9,QWORD[rbx*8+rbp]
485 rol edx,8
486 xor r8,QWORD[8+rax*1+rsi]
487 xor r9,QWORD[rax*1+rsi]
488 mov al,dl
489 xor r8,r10
490 movzx r12,WORD[r12*2+r11]
491 movzx ebx,dl
492 shl al,4
493 movzx r13,BYTE[rcx*1+rsp]
494 shr ebx,4
495 shl r12,48
496 xor r13,r8
497 mov r10,r9
498 xor r9,r12
499 shr r8,8
500 movzx r13,r13b
501 shr r9,8
502 xor r8,QWORD[((-128))+rcx*8+rbp]
503 shl r10,56
504 xor r9,QWORD[rcx*8+rbp]
505 rol edx,8
506 xor r8,QWORD[8+rax*1+rsi]
507 xor r9,QWORD[rax*1+rsi]
508 mov al,dl
509 xor r8,r10
510 movzx r13,WORD[r13*2+r11]
511 movzx ecx,dl
512 shl al,4
513 movzx r12,BYTE[rbx*1+rsp]
514 shr ecx,4
515 shl r13,48
516 xor r12,r8
517 mov r10,r9
518 xor r9,r13
519 shr r8,8
520 movzx r12,r12b
521 shr r9,8
522 xor r8,QWORD[((-128))+rbx*8+rbp]
523 shl r10,56
524 xor r9,QWORD[rbx*8+rbp]
525 rol edx,8
526 xor r8,QWORD[8+rax*1+rsi]
527 xor r9,QWORD[rax*1+rsi]
528 mov al,dl
529 xor r8,r10
530 movzx r12,WORD[r12*2+r11]
531 movzx ebx,dl
532 shl al,4
533 movzx r13,BYTE[rcx*1+rsp]
534 shr ebx,4
535 shl r12,48
536 xor r13,r8
537 mov r10,r9
538 xor r9,r12
539 shr r8,8
540 movzx r13,r13b
541 shr r9,8
542 xor r8,QWORD[((-128))+rcx*8+rbp]
543 shl r10,56
544 xor r9,QWORD[rcx*8+rbp]
545 rol edx,8
546 xor r8,QWORD[8+rax*1+rsi]
547 xor r9,QWORD[rax*1+rsi]
548 mov al,dl
549 xor r8,r10
550 movzx r13,WORD[r13*2+r11]
551 movzx ecx,dl
552 shl al,4
553 movzx r12,BYTE[rbx*1+rsp]
554 shr ecx,4
555 shl r13,48
556 xor r12,r8
557 mov r10,r9
558 xor r9,r13
559 shr r8,8
560 movzx r12,r12b
561 mov edx,DWORD[rdi]
562 shr r9,8
563 xor r8,QWORD[((-128))+rbx*8+rbp]
564 shl r10,56
565 xor r9,QWORD[rbx*8+rbp]
566 rol edx,8
567 xor r8,QWORD[8+rax*1+rsi]
568 xor r9,QWORD[rax*1+rsi]
569 mov al,dl
570 xor r8,r10
571 movzx r12,WORD[r12*2+r11]
572 movzx ebx,dl
573 shl al,4
574 movzx r13,BYTE[rcx*1+rsp]
575 shr ebx,4
576 shl r12,48
577 xor r13,r8
578 mov r10,r9
579 xor r9,r12
580 shr r8,8
581 movzx r13,r13b
582 shr r9,8
583 xor r8,QWORD[((-128))+rcx*8+rbp]
584 shl r10,56
585 xor r9,QWORD[rcx*8+rbp]
586 rol edx,8
587 xor r8,QWORD[8+rax*1+rsi]
588 xor r9,QWORD[rax*1+rsi]
589 mov al,dl
590 xor r8,r10
591 movzx r13,WORD[r13*2+r11]
592 movzx ecx,dl
593 shl al,4
594 movzx r12,BYTE[rbx*1+rsp]
595 shr ecx,4
596 shl r13,48
597 xor r12,r8
598 mov r10,r9
599 xor r9,r13
600 shr r8,8
601 movzx r12,r12b
602 shr r9,8
603 xor r8,QWORD[((-128))+rbx*8+rbp]
604 shl r10,56
605 xor r9,QWORD[rbx*8+rbp]
606 rol edx,8
607 xor r8,QWORD[8+rax*1+rsi]
608 xor r9,QWORD[rax*1+rsi]
609 mov al,dl
610 xor r8,r10
611 movzx r12,WORD[r12*2+r11]
612 movzx ebx,dl
613 shl al,4
614 movzx r13,BYTE[rcx*1+rsp]
615 shr ebx,4
616 shl r12,48
617 xor r13,r8
618 mov r10,r9
619 xor r9,r12
620 shr r8,8
621 movzx r13,r13b
622 shr r9,8
623 xor r8,QWORD[((-128))+rcx*8+rbp]
624 shl r10,56
625 xor r9,QWORD[rcx*8+rbp]
626 rol edx,8
627 xor r8,QWORD[8+rax*1+rsi]
628 xor r9,QWORD[rax*1+rsi]
629 mov al,dl
630 xor r8,r10
631 movzx r13,WORD[r13*2+r11]
632 movzx ecx,dl
633 shl al,4
634 movzx r12,BYTE[rbx*1+rsp]
635 and ecx,240
636 shl r13,48
637 xor r12,r8
638 mov r10,r9
639 xor r9,r13
640 shr r8,8
641 movzx r12,r12b
642 mov edx,DWORD[((-4))+rdi]
643 shr r9,8
644 xor r8,QWORD[((-128))+rbx*8+rbp]
645 shl r10,56
646 xor r9,QWORD[rbx*8+rbp]
647 movzx r12,WORD[r12*2+r11]
648 xor r8,QWORD[8+rax*1+rsi]
649 xor r9,QWORD[rax*1+rsi]
650 shl r12,48
651 xor r8,r10
652 xor r9,r12
653 movzx r13,r8b
654 shr r8,4
655 mov r10,r9
656 shl r13b,4
657 shr r9,4
658 xor r8,QWORD[8+rcx*1+rsi]
659 movzx r13,WORD[r13*2+r11]
660 shl r10,60
661 xor r9,QWORD[rcx*1+rsi]
662 xor r8,r10
663 shl r13,48
664 bswap r8
665 xor r9,r13
666 bswap r9
667 cmp r14,r15
668 jb NEAR $L$outer_loop
669 mov QWORD[8+rdi],r8
670 mov QWORD[rdi],r9
671
672 lea rsi,[280+rsp]
673 mov r15,QWORD[rsi]
674 mov r14,QWORD[8+rsi]
675 mov r13,QWORD[16+rsi]
676 mov r12,QWORD[24+rsi]
677 mov rbp,QWORD[32+rsi]
678 mov rbx,QWORD[40+rsi]
679 lea rsp,[48+rsi]
680$L$ghash_epilogue:
681 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
682 mov rsi,QWORD[16+rsp]
683 DB 0F3h,0C3h ;repret
684$L$SEH_end_gcm_ghash_4bit:
685global gcm_init_clmul
686
687ALIGN 16
688gcm_init_clmul:
689$L$_init_clmul:
690$L$SEH_begin_gcm_init_clmul:
691
692DB 0x48,0x83,0xec,0x18
693DB 0x0f,0x29,0x34,0x24
694 movdqu xmm2,XMMWORD[rdx]
695 pshufd xmm2,xmm2,78
696
697
698 pshufd xmm4,xmm2,255
699 movdqa xmm3,xmm2
700 psllq xmm2,1
701 pxor xmm5,xmm5
702 psrlq xmm3,63
703 pcmpgtd xmm5,xmm4
704 pslldq xmm3,8
705 por xmm2,xmm3
706
707
708 pand xmm5,XMMWORD[$L$0x1c2_polynomial]
709 pxor xmm2,xmm5
710
711
712 pshufd xmm6,xmm2,78
713 movdqa xmm0,xmm2
714 pxor xmm6,xmm2
715 movdqa xmm1,xmm0
716 pshufd xmm3,xmm0,78
717 pxor xmm3,xmm0
718DB 102,15,58,68,194,0
719DB 102,15,58,68,202,17
720DB 102,15,58,68,222,0
721 pxor xmm3,xmm0
722 pxor xmm3,xmm1
723
724 movdqa xmm4,xmm3
725 psrldq xmm3,8
726 pslldq xmm4,8
727 pxor xmm1,xmm3
728 pxor xmm0,xmm4
729
730 movdqa xmm4,xmm0
731 movdqa xmm3,xmm0
732 psllq xmm0,5
733 pxor xmm3,xmm0
734 psllq xmm0,1
735 pxor xmm0,xmm3
736 psllq xmm0,57
737 movdqa xmm3,xmm0
738 pslldq xmm0,8
739 psrldq xmm3,8
740 pxor xmm0,xmm4
741 pxor xmm1,xmm3
742
743
744 movdqa xmm4,xmm0
745 psrlq xmm0,1
746 pxor xmm1,xmm4
747 pxor xmm4,xmm0
748 psrlq xmm0,5
749 pxor xmm0,xmm4
750 psrlq xmm0,1
751 pxor xmm0,xmm1
752 pshufd xmm3,xmm2,78
753 pshufd xmm4,xmm0,78
754 pxor xmm3,xmm2
755 movdqu XMMWORD[rcx],xmm2
756 pxor xmm4,xmm0
757 movdqu XMMWORD[16+rcx],xmm0
758DB 102,15,58,15,227,8
759 movdqu XMMWORD[32+rcx],xmm4
760 movdqa xmm1,xmm0
761 pshufd xmm3,xmm0,78
762 pxor xmm3,xmm0
763DB 102,15,58,68,194,0
764DB 102,15,58,68,202,17
765DB 102,15,58,68,222,0
766 pxor xmm3,xmm0
767 pxor xmm3,xmm1
768
769 movdqa xmm4,xmm3
770 psrldq xmm3,8
771 pslldq xmm4,8
772 pxor xmm1,xmm3
773 pxor xmm0,xmm4
774
775 movdqa xmm4,xmm0
776 movdqa xmm3,xmm0
777 psllq xmm0,5
778 pxor xmm3,xmm0
779 psllq xmm0,1
780 pxor xmm0,xmm3
781 psllq xmm0,57
782 movdqa xmm3,xmm0
783 pslldq xmm0,8
784 psrldq xmm3,8
785 pxor xmm0,xmm4
786 pxor xmm1,xmm3
787
788
789 movdqa xmm4,xmm0
790 psrlq xmm0,1
791 pxor xmm1,xmm4
792 pxor xmm4,xmm0
793 psrlq xmm0,5
794 pxor xmm0,xmm4
795 psrlq xmm0,1
796 pxor xmm0,xmm1
797 movdqa xmm5,xmm0
798 movdqa xmm1,xmm0
799 pshufd xmm3,xmm0,78
800 pxor xmm3,xmm0
801DB 102,15,58,68,194,0
802DB 102,15,58,68,202,17
803DB 102,15,58,68,222,0
804 pxor xmm3,xmm0
805 pxor xmm3,xmm1
806
807 movdqa xmm4,xmm3
808 psrldq xmm3,8
809 pslldq xmm4,8
810 pxor xmm1,xmm3
811 pxor xmm0,xmm4
812
813 movdqa xmm4,xmm0
814 movdqa xmm3,xmm0
815 psllq xmm0,5
816 pxor xmm3,xmm0
817 psllq xmm0,1
818 pxor xmm0,xmm3
819 psllq xmm0,57
820 movdqa xmm3,xmm0
821 pslldq xmm0,8
822 psrldq xmm3,8
823 pxor xmm0,xmm4
824 pxor xmm1,xmm3
825
826
827 movdqa xmm4,xmm0
828 psrlq xmm0,1
829 pxor xmm1,xmm4
830 pxor xmm4,xmm0
831 psrlq xmm0,5
832 pxor xmm0,xmm4
833 psrlq xmm0,1
834 pxor xmm0,xmm1
835 pshufd xmm3,xmm5,78
836 pshufd xmm4,xmm0,78
837 pxor xmm3,xmm5
838 movdqu XMMWORD[48+rcx],xmm5
839 pxor xmm4,xmm0
840 movdqu XMMWORD[64+rcx],xmm0
841DB 102,15,58,15,227,8
842 movdqu XMMWORD[80+rcx],xmm4
843 movaps xmm6,XMMWORD[rsp]
844 lea rsp,[24+rsp]
845$L$SEH_end_gcm_init_clmul:
846 DB 0F3h,0C3h ;repret
847
848global gcm_gmult_clmul
849
850ALIGN 16
851gcm_gmult_clmul:
852$L$_gmult_clmul:
853 movdqu xmm0,XMMWORD[rcx]
854 movdqa xmm5,XMMWORD[$L$bswap_mask]
855 movdqu xmm2,XMMWORD[rdx]
856 movdqu xmm4,XMMWORD[32+rdx]
857DB 102,15,56,0,197
858 movdqa xmm1,xmm0
859 pshufd xmm3,xmm0,78
860 pxor xmm3,xmm0
861DB 102,15,58,68,194,0
862DB 102,15,58,68,202,17
863DB 102,15,58,68,220,0
864 pxor xmm3,xmm0
865 pxor xmm3,xmm1
866
867 movdqa xmm4,xmm3
868 psrldq xmm3,8
869 pslldq xmm4,8
870 pxor xmm1,xmm3
871 pxor xmm0,xmm4
872
873 movdqa xmm4,xmm0
874 movdqa xmm3,xmm0
875 psllq xmm0,5
876 pxor xmm3,xmm0
877 psllq xmm0,1
878 pxor xmm0,xmm3
879 psllq xmm0,57
880 movdqa xmm3,xmm0
881 pslldq xmm0,8
882 psrldq xmm3,8
883 pxor xmm0,xmm4
884 pxor xmm1,xmm3
885
886
887 movdqa xmm4,xmm0
888 psrlq xmm0,1
889 pxor xmm1,xmm4
890 pxor xmm4,xmm0
891 psrlq xmm0,5
892 pxor xmm0,xmm4
893 psrlq xmm0,1
894 pxor xmm0,xmm1
895DB 102,15,56,0,197
896 movdqu XMMWORD[rcx],xmm0
897 DB 0F3h,0C3h ;repret
898
899global gcm_ghash_clmul
900
901ALIGN 32
902gcm_ghash_clmul:
903$L$_ghash_clmul:
904 lea rax,[((-136))+rsp]
905$L$SEH_begin_gcm_ghash_clmul:
906
907DB 0x48,0x8d,0x60,0xe0
908DB 0x0f,0x29,0x70,0xe0
909DB 0x0f,0x29,0x78,0xf0
910DB 0x44,0x0f,0x29,0x00
911DB 0x44,0x0f,0x29,0x48,0x10
912DB 0x44,0x0f,0x29,0x50,0x20
913DB 0x44,0x0f,0x29,0x58,0x30
914DB 0x44,0x0f,0x29,0x60,0x40
915DB 0x44,0x0f,0x29,0x68,0x50
916DB 0x44,0x0f,0x29,0x70,0x60
917DB 0x44,0x0f,0x29,0x78,0x70
918 movdqa xmm10,XMMWORD[$L$bswap_mask]
919
920 movdqu xmm0,XMMWORD[rcx]
921 movdqu xmm2,XMMWORD[rdx]
922 movdqu xmm7,XMMWORD[32+rdx]
923DB 102,65,15,56,0,194
924
925 sub r9,0x10
926 jz NEAR $L$odd_tail
927
928 movdqu xmm6,XMMWORD[16+rdx]
929 mov eax,DWORD[((OPENSSL_ia32cap_P+4))]
930 cmp r9,0x30
931 jb NEAR $L$skip4x
932
933 and eax,71303168
934 cmp eax,4194304
935 je NEAR $L$skip4x
936
937 sub r9,0x30
938 mov rax,0xA040608020C0E000
939 movdqu xmm14,XMMWORD[48+rdx]
940 movdqu xmm15,XMMWORD[64+rdx]
941
942
943
944
945 movdqu xmm3,XMMWORD[48+r8]
946 movdqu xmm11,XMMWORD[32+r8]
947DB 102,65,15,56,0,218
948DB 102,69,15,56,0,218
949 movdqa xmm5,xmm3
950 pshufd xmm4,xmm3,78
951 pxor xmm4,xmm3
952DB 102,15,58,68,218,0
953DB 102,15,58,68,234,17
954DB 102,15,58,68,231,0
955
956 movdqa xmm13,xmm11
957 pshufd xmm12,xmm11,78
958 pxor xmm12,xmm11
959DB 102,68,15,58,68,222,0
960DB 102,68,15,58,68,238,17
961DB 102,68,15,58,68,231,16
962 xorps xmm3,xmm11
963 xorps xmm5,xmm13
964 movups xmm7,XMMWORD[80+rdx]
965 xorps xmm4,xmm12
966
967 movdqu xmm11,XMMWORD[16+r8]
968 movdqu xmm8,XMMWORD[r8]
969DB 102,69,15,56,0,218
970DB 102,69,15,56,0,194
971 movdqa xmm13,xmm11
972 pshufd xmm12,xmm11,78
973 pxor xmm0,xmm8
974 pxor xmm12,xmm11
975DB 102,69,15,58,68,222,0
976 movdqa xmm1,xmm0
977 pshufd xmm8,xmm0,78
978 pxor xmm8,xmm0
979DB 102,69,15,58,68,238,17
980DB 102,68,15,58,68,231,0
981 xorps xmm3,xmm11
982 xorps xmm5,xmm13
983
984 lea r8,[64+r8]
985 sub r9,0x40
986 jc NEAR $L$tail4x
987
988 jmp NEAR $L$mod4_loop
989ALIGN 32
990$L$mod4_loop:
991DB 102,65,15,58,68,199,0
992 xorps xmm4,xmm12
993 movdqu xmm11,XMMWORD[48+r8]
994DB 102,69,15,56,0,218
995DB 102,65,15,58,68,207,17
996 xorps xmm0,xmm3
997 movdqu xmm3,XMMWORD[32+r8]
998 movdqa xmm13,xmm11
999DB 102,68,15,58,68,199,16
1000 pshufd xmm12,xmm11,78
1001 xorps xmm1,xmm5
1002 pxor xmm12,xmm11
1003DB 102,65,15,56,0,218
1004 movups xmm7,XMMWORD[32+rdx]
1005 xorps xmm8,xmm4
1006DB 102,68,15,58,68,218,0
1007 pshufd xmm4,xmm3,78
1008
1009 pxor xmm8,xmm0
1010 movdqa xmm5,xmm3
1011 pxor xmm8,xmm1
1012 pxor xmm4,xmm3
1013 movdqa xmm9,xmm8
1014DB 102,68,15,58,68,234,17
1015 pslldq xmm8,8
1016 psrldq xmm9,8
1017 pxor xmm0,xmm8
1018 movdqa xmm8,XMMWORD[$L$7_mask]
1019 pxor xmm1,xmm9
1020DB 102,76,15,110,200
1021
1022 pand xmm8,xmm0
1023DB 102,69,15,56,0,200
1024 pxor xmm9,xmm0
1025DB 102,68,15,58,68,231,0
1026 psllq xmm9,57
1027 movdqa xmm8,xmm9
1028 pslldq xmm9,8
1029DB 102,15,58,68,222,0
1030 psrldq xmm8,8
1031 pxor xmm0,xmm9
1032 pxor xmm1,xmm8
1033 movdqu xmm8,XMMWORD[r8]
1034
1035 movdqa xmm9,xmm0
1036 psrlq xmm0,1
1037DB 102,15,58,68,238,17
1038 xorps xmm3,xmm11
1039 movdqu xmm11,XMMWORD[16+r8]
1040DB 102,69,15,56,0,218
1041DB 102,15,58,68,231,16
1042 xorps xmm5,xmm13
1043 movups xmm7,XMMWORD[80+rdx]
1044DB 102,69,15,56,0,194
1045 pxor xmm1,xmm9
1046 pxor xmm9,xmm0
1047 psrlq xmm0,5
1048
1049 movdqa xmm13,xmm11
1050 pxor xmm4,xmm12
1051 pshufd xmm12,xmm11,78
1052 pxor xmm0,xmm9
1053 pxor xmm1,xmm8
1054 pxor xmm12,xmm11
1055DB 102,69,15,58,68,222,0
1056 psrlq xmm0,1
1057 pxor xmm0,xmm1
1058 movdqa xmm1,xmm0
1059DB 102,69,15,58,68,238,17
1060 xorps xmm3,xmm11
1061 pshufd xmm8,xmm0,78
1062 pxor xmm8,xmm0
1063
1064DB 102,68,15,58,68,231,0
1065 xorps xmm5,xmm13
1066
1067 lea r8,[64+r8]
1068 sub r9,0x40
1069 jnc NEAR $L$mod4_loop
1070
1071$L$tail4x:
1072DB 102,65,15,58,68,199,0
1073DB 102,65,15,58,68,207,17
1074DB 102,68,15,58,68,199,16
1075 xorps xmm4,xmm12
1076 xorps xmm0,xmm3
1077 xorps xmm1,xmm5
1078 pxor xmm1,xmm0
1079 pxor xmm8,xmm4
1080
1081 pxor xmm8,xmm1
1082 pxor xmm1,xmm0
1083
1084 movdqa xmm9,xmm8
1085 psrldq xmm8,8
1086 pslldq xmm9,8
1087 pxor xmm1,xmm8
1088 pxor xmm0,xmm9
1089
1090 movdqa xmm4,xmm0
1091 movdqa xmm3,xmm0
1092 psllq xmm0,5
1093 pxor xmm3,xmm0
1094 psllq xmm0,1
1095 pxor xmm0,xmm3
1096 psllq xmm0,57
1097 movdqa xmm3,xmm0
1098 pslldq xmm0,8
1099 psrldq xmm3,8
1100 pxor xmm0,xmm4
1101 pxor xmm1,xmm3
1102
1103
1104 movdqa xmm4,xmm0
1105 psrlq xmm0,1
1106 pxor xmm1,xmm4
1107 pxor xmm4,xmm0
1108 psrlq xmm0,5
1109 pxor xmm0,xmm4
1110 psrlq xmm0,1
1111 pxor xmm0,xmm1
1112 add r9,0x40
1113 jz NEAR $L$done
1114 movdqu xmm7,XMMWORD[32+rdx]
1115 sub r9,0x10
1116 jz NEAR $L$odd_tail
1117$L$skip4x:
1118
1119
1120
1121
1122
1123 movdqu xmm8,XMMWORD[r8]
1124 movdqu xmm3,XMMWORD[16+r8]
1125DB 102,69,15,56,0,194
1126DB 102,65,15,56,0,218
1127 pxor xmm0,xmm8
1128
1129 movdqa xmm5,xmm3
1130 pshufd xmm4,xmm3,78
1131 pxor xmm4,xmm3
1132DB 102,15,58,68,218,0
1133DB 102,15,58,68,234,17
1134DB 102,15,58,68,231,0
1135
1136 lea r8,[32+r8]
1137 nop
1138 sub r9,0x20
1139 jbe NEAR $L$even_tail
1140 nop
1141 jmp NEAR $L$mod_loop
1142
1143ALIGN 32
1144$L$mod_loop:
1145 movdqa xmm1,xmm0
1146 movdqa xmm8,xmm4
1147 pshufd xmm4,xmm0,78
1148 pxor xmm4,xmm0
1149
1150DB 102,15,58,68,198,0
1151DB 102,15,58,68,206,17
1152DB 102,15,58,68,231,16
1153
1154 pxor xmm0,xmm3
1155 pxor xmm1,xmm5
1156 movdqu xmm9,XMMWORD[r8]
1157 pxor xmm8,xmm0
1158DB 102,69,15,56,0,202
1159 movdqu xmm3,XMMWORD[16+r8]
1160
1161 pxor xmm8,xmm1
1162 pxor xmm1,xmm9
1163 pxor xmm4,xmm8
1164DB 102,65,15,56,0,218
1165 movdqa xmm8,xmm4
1166 psrldq xmm8,8
1167 pslldq xmm4,8
1168 pxor xmm1,xmm8
1169 pxor xmm0,xmm4
1170
1171 movdqa xmm5,xmm3
1172
1173 movdqa xmm9,xmm0
1174 movdqa xmm8,xmm0
1175 psllq xmm0,5
1176 pxor xmm8,xmm0
1177DB 102,15,58,68,218,0
1178 psllq xmm0,1
1179 pxor xmm0,xmm8
1180 psllq xmm0,57
1181 movdqa xmm8,xmm0
1182 pslldq xmm0,8
1183 psrldq xmm8,8
1184 pxor xmm0,xmm9
1185 pshufd xmm4,xmm5,78
1186 pxor xmm1,xmm8
1187 pxor xmm4,xmm5
1188
1189 movdqa xmm9,xmm0
1190 psrlq xmm0,1
1191DB 102,15,58,68,234,17
1192 pxor xmm1,xmm9
1193 pxor xmm9,xmm0
1194 psrlq xmm0,5
1195 pxor xmm0,xmm9
1196 lea r8,[32+r8]
1197 psrlq xmm0,1
1198DB 102,15,58,68,231,0
1199 pxor xmm0,xmm1
1200
1201 sub r9,0x20
1202 ja NEAR $L$mod_loop
1203
1204$L$even_tail:
1205 movdqa xmm1,xmm0
1206 movdqa xmm8,xmm4
1207 pshufd xmm4,xmm0,78
1208 pxor xmm4,xmm0
1209
1210DB 102,15,58,68,198,0
1211DB 102,15,58,68,206,17
1212DB 102,15,58,68,231,16
1213
1214 pxor xmm0,xmm3
1215 pxor xmm1,xmm5
1216 pxor xmm8,xmm0
1217 pxor xmm8,xmm1
1218 pxor xmm4,xmm8
1219 movdqa xmm8,xmm4
1220 psrldq xmm8,8
1221 pslldq xmm4,8
1222 pxor xmm1,xmm8
1223 pxor xmm0,xmm4
1224
1225 movdqa xmm4,xmm0
1226 movdqa xmm3,xmm0
1227 psllq xmm0,5
1228 pxor xmm3,xmm0
1229 psllq xmm0,1
1230 pxor xmm0,xmm3
1231 psllq xmm0,57
1232 movdqa xmm3,xmm0
1233 pslldq xmm0,8
1234 psrldq xmm3,8
1235 pxor xmm0,xmm4
1236 pxor xmm1,xmm3
1237
1238
1239 movdqa xmm4,xmm0
1240 psrlq xmm0,1
1241 pxor xmm1,xmm4
1242 pxor xmm4,xmm0
1243 psrlq xmm0,5
1244 pxor xmm0,xmm4
1245 psrlq xmm0,1
1246 pxor xmm0,xmm1
1247 test r9,r9
1248 jnz NEAR $L$done
1249
1250$L$odd_tail:
1251 movdqu xmm8,XMMWORD[r8]
1252DB 102,69,15,56,0,194
1253 pxor xmm0,xmm8
1254 movdqa xmm1,xmm0
1255 pshufd xmm3,xmm0,78
1256 pxor xmm3,xmm0
1257DB 102,15,58,68,194,0
1258DB 102,15,58,68,202,17
1259DB 102,15,58,68,223,0
1260 pxor xmm3,xmm0
1261 pxor xmm3,xmm1
1262
1263 movdqa xmm4,xmm3
1264 psrldq xmm3,8
1265 pslldq xmm4,8
1266 pxor xmm1,xmm3
1267 pxor xmm0,xmm4
1268
1269 movdqa xmm4,xmm0
1270 movdqa xmm3,xmm0
1271 psllq xmm0,5
1272 pxor xmm3,xmm0
1273 psllq xmm0,1
1274 pxor xmm0,xmm3
1275 psllq xmm0,57
1276 movdqa xmm3,xmm0
1277 pslldq xmm0,8
1278 psrldq xmm3,8
1279 pxor xmm0,xmm4
1280 pxor xmm1,xmm3
1281
1282
1283 movdqa xmm4,xmm0
1284 psrlq xmm0,1
1285 pxor xmm1,xmm4
1286 pxor xmm4,xmm0
1287 psrlq xmm0,5
1288 pxor xmm0,xmm4
1289 psrlq xmm0,1
1290 pxor xmm0,xmm1
1291$L$done:
1292DB 102,65,15,56,0,194
1293 movdqu XMMWORD[rcx],xmm0
1294 movaps xmm6,XMMWORD[rsp]
1295 movaps xmm7,XMMWORD[16+rsp]
1296 movaps xmm8,XMMWORD[32+rsp]
1297 movaps xmm9,XMMWORD[48+rsp]
1298 movaps xmm10,XMMWORD[64+rsp]
1299 movaps xmm11,XMMWORD[80+rsp]
1300 movaps xmm12,XMMWORD[96+rsp]
1301 movaps xmm13,XMMWORD[112+rsp]
1302 movaps xmm14,XMMWORD[128+rsp]
1303 movaps xmm15,XMMWORD[144+rsp]
1304 lea rsp,[168+rsp]
1305$L$SEH_end_gcm_ghash_clmul:
1306 DB 0F3h,0C3h ;repret
1307
1308global gcm_init_avx
1309
1310ALIGN 32
1311gcm_init_avx:
1312$L$SEH_begin_gcm_init_avx:
1313
1314DB 0x48,0x83,0xec,0x18
1315DB 0x0f,0x29,0x34,0x24
1316 vzeroupper
1317
1318 vmovdqu xmm2,XMMWORD[rdx]
1319 vpshufd xmm2,xmm2,78
1320
1321
1322 vpshufd xmm4,xmm2,255
1323 vpsrlq xmm3,xmm2,63
1324 vpsllq xmm2,xmm2,1
1325 vpxor xmm5,xmm5,xmm5
1326 vpcmpgtd xmm5,xmm5,xmm4
1327 vpslldq xmm3,xmm3,8
1328 vpor xmm2,xmm2,xmm3
1329
1330
1331 vpand xmm5,xmm5,XMMWORD[$L$0x1c2_polynomial]
1332 vpxor xmm2,xmm2,xmm5
1333
1334 vpunpckhqdq xmm6,xmm2,xmm2
1335 vmovdqa xmm0,xmm2
1336 vpxor xmm6,xmm6,xmm2
1337 mov r10,4
1338 jmp NEAR $L$init_start_avx
1339ALIGN 32
1340$L$init_loop_avx:
1341 vpalignr xmm5,xmm4,xmm3,8
1342 vmovdqu XMMWORD[(-16)+rcx],xmm5
1343 vpunpckhqdq xmm3,xmm0,xmm0
1344 vpxor xmm3,xmm3,xmm0
1345 vpclmulqdq xmm1,xmm0,xmm2,0x11
1346 vpclmulqdq xmm0,xmm0,xmm2,0x00
1347 vpclmulqdq xmm3,xmm3,xmm6,0x00
1348 vpxor xmm4,xmm1,xmm0
1349 vpxor xmm3,xmm3,xmm4
1350
1351 vpslldq xmm4,xmm3,8
1352 vpsrldq xmm3,xmm3,8
1353 vpxor xmm0,xmm0,xmm4
1354 vpxor xmm1,xmm1,xmm3
1355 vpsllq xmm3,xmm0,57
1356 vpsllq xmm4,xmm0,62
1357 vpxor xmm4,xmm4,xmm3
1358 vpsllq xmm3,xmm0,63
1359 vpxor xmm4,xmm4,xmm3
1360 vpslldq xmm3,xmm4,8
1361 vpsrldq xmm4,xmm4,8
1362 vpxor xmm0,xmm0,xmm3
1363 vpxor xmm1,xmm1,xmm4
1364
1365 vpsrlq xmm4,xmm0,1
1366 vpxor xmm1,xmm1,xmm0
1367 vpxor xmm0,xmm0,xmm4
1368 vpsrlq xmm4,xmm4,5
1369 vpxor xmm0,xmm0,xmm4
1370 vpsrlq xmm0,xmm0,1
1371 vpxor xmm0,xmm0,xmm1
1372$L$init_start_avx:
1373 vmovdqa xmm5,xmm0
1374 vpunpckhqdq xmm3,xmm0,xmm0
1375 vpxor xmm3,xmm3,xmm0
1376 vpclmulqdq xmm1,xmm0,xmm2,0x11
1377 vpclmulqdq xmm0,xmm0,xmm2,0x00
1378 vpclmulqdq xmm3,xmm3,xmm6,0x00
1379 vpxor xmm4,xmm1,xmm0
1380 vpxor xmm3,xmm3,xmm4
1381
1382 vpslldq xmm4,xmm3,8
1383 vpsrldq xmm3,xmm3,8
1384 vpxor xmm0,xmm0,xmm4
1385 vpxor xmm1,xmm1,xmm3
1386 vpsllq xmm3,xmm0,57
1387 vpsllq xmm4,xmm0,62
1388 vpxor xmm4,xmm4,xmm3
1389 vpsllq xmm3,xmm0,63
1390 vpxor xmm4,xmm4,xmm3
1391 vpslldq xmm3,xmm4,8
1392 vpsrldq xmm4,xmm4,8
1393 vpxor xmm0,xmm0,xmm3
1394 vpxor xmm1,xmm1,xmm4
1395
1396 vpsrlq xmm4,xmm0,1
1397 vpxor xmm1,xmm1,xmm0
1398 vpxor xmm0,xmm0,xmm4
1399 vpsrlq xmm4,xmm4,5
1400 vpxor xmm0,xmm0,xmm4
1401 vpsrlq xmm0,xmm0,1
1402 vpxor xmm0,xmm0,xmm1
1403 vpshufd xmm3,xmm5,78
1404 vpshufd xmm4,xmm0,78
1405 vpxor xmm3,xmm3,xmm5
1406 vmovdqu XMMWORD[rcx],xmm5
1407 vpxor xmm4,xmm4,xmm0
1408 vmovdqu XMMWORD[16+rcx],xmm0
1409 lea rcx,[48+rcx]
1410 sub r10,1
1411 jnz NEAR $L$init_loop_avx
1412
1413 vpalignr xmm5,xmm3,xmm4,8
1414 vmovdqu XMMWORD[(-16)+rcx],xmm5
1415
1416 vzeroupper
1417 movaps xmm6,XMMWORD[rsp]
1418 lea rsp,[24+rsp]
1419$L$SEH_end_gcm_init_avx:
1420 DB 0F3h,0C3h ;repret
1421
1422global gcm_gmult_avx
1423
1424ALIGN 32
1425gcm_gmult_avx:
1426 jmp NEAR $L$_gmult_clmul
1427
1428global gcm_ghash_avx
1429
1430ALIGN 32
1431gcm_ghash_avx:
1432 lea rax,[((-136))+rsp]
1433$L$SEH_begin_gcm_ghash_avx:
1434
1435DB 0x48,0x8d,0x60,0xe0
1436DB 0x0f,0x29,0x70,0xe0
1437DB 0x0f,0x29,0x78,0xf0
1438DB 0x44,0x0f,0x29,0x00
1439DB 0x44,0x0f,0x29,0x48,0x10
1440DB 0x44,0x0f,0x29,0x50,0x20
1441DB 0x44,0x0f,0x29,0x58,0x30
1442DB 0x44,0x0f,0x29,0x60,0x40
1443DB 0x44,0x0f,0x29,0x68,0x50
1444DB 0x44,0x0f,0x29,0x70,0x60
1445DB 0x44,0x0f,0x29,0x78,0x70
1446 vzeroupper
1447
1448 vmovdqu xmm10,XMMWORD[rcx]
1449 lea r10,[$L$0x1c2_polynomial]
1450 lea rdx,[64+rdx]
1451 vmovdqu xmm13,XMMWORD[$L$bswap_mask]
1452 vpshufb xmm10,xmm10,xmm13
1453 cmp r9,0x80
1454 jb NEAR $L$short_avx
1455 sub r9,0x80
1456
1457 vmovdqu xmm14,XMMWORD[112+r8]
1458 vmovdqu xmm6,XMMWORD[((0-64))+rdx]
1459 vpshufb xmm14,xmm14,xmm13
1460 vmovdqu xmm7,XMMWORD[((32-64))+rdx]
1461
1462 vpunpckhqdq xmm9,xmm14,xmm14
1463 vmovdqu xmm15,XMMWORD[96+r8]
1464 vpclmulqdq xmm0,xmm14,xmm6,0x00
1465 vpxor xmm9,xmm9,xmm14
1466 vpshufb xmm15,xmm15,xmm13
1467 vpclmulqdq xmm1,xmm14,xmm6,0x11
1468 vmovdqu xmm6,XMMWORD[((16-64))+rdx]
1469 vpunpckhqdq xmm8,xmm15,xmm15
1470 vmovdqu xmm14,XMMWORD[80+r8]
1471 vpclmulqdq xmm2,xmm9,xmm7,0x00
1472 vpxor xmm8,xmm8,xmm15
1473
1474 vpshufb xmm14,xmm14,xmm13
1475 vpclmulqdq xmm3,xmm15,xmm6,0x00
1476 vpunpckhqdq xmm9,xmm14,xmm14
1477 vpclmulqdq xmm4,xmm15,xmm6,0x11
1478 vmovdqu xmm6,XMMWORD[((48-64))+rdx]
1479 vpxor xmm9,xmm9,xmm14
1480 vmovdqu xmm15,XMMWORD[64+r8]
1481 vpclmulqdq xmm5,xmm8,xmm7,0x10
1482 vmovdqu xmm7,XMMWORD[((80-64))+rdx]
1483
1484 vpshufb xmm15,xmm15,xmm13
1485 vpxor xmm3,xmm3,xmm0
1486 vpclmulqdq xmm0,xmm14,xmm6,0x00
1487 vpxor xmm4,xmm4,xmm1
1488 vpunpckhqdq xmm8,xmm15,xmm15
1489 vpclmulqdq xmm1,xmm14,xmm6,0x11
1490 vmovdqu xmm6,XMMWORD[((64-64))+rdx]
1491 vpxor xmm5,xmm5,xmm2
1492 vpclmulqdq xmm2,xmm9,xmm7,0x00
1493 vpxor xmm8,xmm8,xmm15
1494
1495 vmovdqu xmm14,XMMWORD[48+r8]
1496 vpxor xmm0,xmm0,xmm3
1497 vpclmulqdq xmm3,xmm15,xmm6,0x00
1498 vpxor xmm1,xmm1,xmm4
1499 vpshufb xmm14,xmm14,xmm13
1500 vpclmulqdq xmm4,xmm15,xmm6,0x11
1501 vmovdqu xmm6,XMMWORD[((96-64))+rdx]
1502 vpxor xmm2,xmm2,xmm5
1503 vpunpckhqdq xmm9,xmm14,xmm14
1504 vpclmulqdq xmm5,xmm8,xmm7,0x10
1505 vmovdqu xmm7,XMMWORD[((128-64))+rdx]
1506 vpxor xmm9,xmm9,xmm14
1507
1508 vmovdqu xmm15,XMMWORD[32+r8]
1509 vpxor xmm3,xmm3,xmm0
1510 vpclmulqdq xmm0,xmm14,xmm6,0x00
1511 vpxor xmm4,xmm4,xmm1
1512 vpshufb xmm15,xmm15,xmm13
1513 vpclmulqdq xmm1,xmm14,xmm6,0x11
1514 vmovdqu xmm6,XMMWORD[((112-64))+rdx]
1515 vpxor xmm5,xmm5,xmm2
1516 vpunpckhqdq xmm8,xmm15,xmm15
1517 vpclmulqdq xmm2,xmm9,xmm7,0x00
1518 vpxor xmm8,xmm8,xmm15
1519
1520 vmovdqu xmm14,XMMWORD[16+r8]
1521 vpxor xmm0,xmm0,xmm3
1522 vpclmulqdq xmm3,xmm15,xmm6,0x00
1523 vpxor xmm1,xmm1,xmm4
1524 vpshufb xmm14,xmm14,xmm13
1525 vpclmulqdq xmm4,xmm15,xmm6,0x11
1526 vmovdqu xmm6,XMMWORD[((144-64))+rdx]
1527 vpxor xmm2,xmm2,xmm5
1528 vpunpckhqdq xmm9,xmm14,xmm14
1529 vpclmulqdq xmm5,xmm8,xmm7,0x10
1530 vmovdqu xmm7,XMMWORD[((176-64))+rdx]
1531 vpxor xmm9,xmm9,xmm14
1532
1533 vmovdqu xmm15,XMMWORD[r8]
1534 vpxor xmm3,xmm3,xmm0
1535 vpclmulqdq xmm0,xmm14,xmm6,0x00
1536 vpxor xmm4,xmm4,xmm1
1537 vpshufb xmm15,xmm15,xmm13
1538 vpclmulqdq xmm1,xmm14,xmm6,0x11
1539 vmovdqu xmm6,XMMWORD[((160-64))+rdx]
1540 vpxor xmm5,xmm5,xmm2
1541 vpclmulqdq xmm2,xmm9,xmm7,0x10
1542
1543 lea r8,[128+r8]
1544 cmp r9,0x80
1545 jb NEAR $L$tail_avx
1546
1547 vpxor xmm15,xmm15,xmm10
1548 sub r9,0x80
1549 jmp NEAR $L$oop8x_avx
1550
1551ALIGN 32
1552$L$oop8x_avx:
1553 vpunpckhqdq xmm8,xmm15,xmm15
1554 vmovdqu xmm14,XMMWORD[112+r8]
1555 vpxor xmm3,xmm3,xmm0
1556 vpxor xmm8,xmm8,xmm15
1557 vpclmulqdq xmm10,xmm15,xmm6,0x00
1558 vpshufb xmm14,xmm14,xmm13
1559 vpxor xmm4,xmm4,xmm1
1560 vpclmulqdq xmm11,xmm15,xmm6,0x11
1561 vmovdqu xmm6,XMMWORD[((0-64))+rdx]
1562 vpunpckhqdq xmm9,xmm14,xmm14
1563 vpxor xmm5,xmm5,xmm2
1564 vpclmulqdq xmm12,xmm8,xmm7,0x00
1565 vmovdqu xmm7,XMMWORD[((32-64))+rdx]
1566 vpxor xmm9,xmm9,xmm14
1567
1568 vmovdqu xmm15,XMMWORD[96+r8]
1569 vpclmulqdq xmm0,xmm14,xmm6,0x00
1570 vpxor xmm10,xmm10,xmm3
1571 vpshufb xmm15,xmm15,xmm13
1572 vpclmulqdq xmm1,xmm14,xmm6,0x11
1573 vxorps xmm11,xmm11,xmm4
1574 vmovdqu xmm6,XMMWORD[((16-64))+rdx]
1575 vpunpckhqdq xmm8,xmm15,xmm15
1576 vpclmulqdq xmm2,xmm9,xmm7,0x00
1577 vpxor xmm12,xmm12,xmm5
1578 vxorps xmm8,xmm8,xmm15
1579
1580 vmovdqu xmm14,XMMWORD[80+r8]
1581 vpxor xmm12,xmm12,xmm10
1582 vpclmulqdq xmm3,xmm15,xmm6,0x00
1583 vpxor xmm12,xmm12,xmm11
1584 vpslldq xmm9,xmm12,8
1585 vpxor xmm3,xmm3,xmm0
1586 vpclmulqdq xmm4,xmm15,xmm6,0x11
1587 vpsrldq xmm12,xmm12,8
1588 vpxor xmm10,xmm10,xmm9
1589 vmovdqu xmm6,XMMWORD[((48-64))+rdx]
1590 vpshufb xmm14,xmm14,xmm13
1591 vxorps xmm11,xmm11,xmm12
1592 vpxor xmm4,xmm4,xmm1
1593 vpunpckhqdq xmm9,xmm14,xmm14
1594 vpclmulqdq xmm5,xmm8,xmm7,0x10
1595 vmovdqu xmm7,XMMWORD[((80-64))+rdx]
1596 vpxor xmm9,xmm9,xmm14
1597 vpxor xmm5,xmm5,xmm2
1598
1599 vmovdqu xmm15,XMMWORD[64+r8]
1600 vpalignr xmm12,xmm10,xmm10,8
1601 vpclmulqdq xmm0,xmm14,xmm6,0x00
1602 vpshufb xmm15,xmm15,xmm13
1603 vpxor xmm0,xmm0,xmm3
1604 vpclmulqdq xmm1,xmm14,xmm6,0x11
1605 vmovdqu xmm6,XMMWORD[((64-64))+rdx]
1606 vpunpckhqdq xmm8,xmm15,xmm15
1607 vpxor xmm1,xmm1,xmm4
1608 vpclmulqdq xmm2,xmm9,xmm7,0x00
1609 vxorps xmm8,xmm8,xmm15
1610 vpxor xmm2,xmm2,xmm5
1611
1612 vmovdqu xmm14,XMMWORD[48+r8]
1613 vpclmulqdq xmm10,xmm10,XMMWORD[r10],0x10
1614 vpclmulqdq xmm3,xmm15,xmm6,0x00
1615 vpshufb xmm14,xmm14,xmm13
1616 vpxor xmm3,xmm3,xmm0
1617 vpclmulqdq xmm4,xmm15,xmm6,0x11
1618 vmovdqu xmm6,XMMWORD[((96-64))+rdx]
1619 vpunpckhqdq xmm9,xmm14,xmm14
1620 vpxor xmm4,xmm4,xmm1
1621 vpclmulqdq xmm5,xmm8,xmm7,0x10
1622 vmovdqu xmm7,XMMWORD[((128-64))+rdx]
1623 vpxor xmm9,xmm9,xmm14
1624 vpxor xmm5,xmm5,xmm2
1625
1626 vmovdqu xmm15,XMMWORD[32+r8]
1627 vpclmulqdq xmm0,xmm14,xmm6,0x00
1628 vpshufb xmm15,xmm15,xmm13
1629 vpxor xmm0,xmm0,xmm3
1630 vpclmulqdq xmm1,xmm14,xmm6,0x11
1631 vmovdqu xmm6,XMMWORD[((112-64))+rdx]
1632 vpunpckhqdq xmm8,xmm15,xmm15
1633 vpxor xmm1,xmm1,xmm4
1634 vpclmulqdq xmm2,xmm9,xmm7,0x00
1635 vpxor xmm8,xmm8,xmm15
1636 vpxor xmm2,xmm2,xmm5
1637 vxorps xmm10,xmm10,xmm12
1638
1639 vmovdqu xmm14,XMMWORD[16+r8]
1640 vpalignr xmm12,xmm10,xmm10,8
1641 vpclmulqdq xmm3,xmm15,xmm6,0x00
1642 vpshufb xmm14,xmm14,xmm13
1643 vpxor xmm3,xmm3,xmm0
1644 vpclmulqdq xmm4,xmm15,xmm6,0x11
1645 vmovdqu xmm6,XMMWORD[((144-64))+rdx]
1646 vpclmulqdq xmm10,xmm10,XMMWORD[r10],0x10
1647 vxorps xmm12,xmm12,xmm11
1648 vpunpckhqdq xmm9,xmm14,xmm14
1649 vpxor xmm4,xmm4,xmm1
1650 vpclmulqdq xmm5,xmm8,xmm7,0x10
1651 vmovdqu xmm7,XMMWORD[((176-64))+rdx]
1652 vpxor xmm9,xmm9,xmm14
1653 vpxor xmm5,xmm5,xmm2
1654
1655 vmovdqu xmm15,XMMWORD[r8]
1656 vpclmulqdq xmm0,xmm14,xmm6,0x00
1657 vpshufb xmm15,xmm15,xmm13
1658 vpclmulqdq xmm1,xmm14,xmm6,0x11
1659 vmovdqu xmm6,XMMWORD[((160-64))+rdx]
1660 vpxor xmm15,xmm15,xmm12
1661 vpclmulqdq xmm2,xmm9,xmm7,0x10
1662 vpxor xmm15,xmm15,xmm10
1663
1664 lea r8,[128+r8]
1665 sub r9,0x80
1666 jnc NEAR $L$oop8x_avx
1667
1668 add r9,0x80
1669 jmp NEAR $L$tail_no_xor_avx
1670
1671ALIGN 32
1672$L$short_avx:
1673 vmovdqu xmm14,XMMWORD[((-16))+r9*1+r8]
1674 lea r8,[r9*1+r8]
1675 vmovdqu xmm6,XMMWORD[((0-64))+rdx]
1676 vmovdqu xmm7,XMMWORD[((32-64))+rdx]
1677 vpshufb xmm15,xmm14,xmm13
1678
1679 vmovdqa xmm3,xmm0
1680 vmovdqa xmm4,xmm1
1681 vmovdqa xmm5,xmm2
1682 sub r9,0x10
1683 jz NEAR $L$tail_avx
1684
1685 vpunpckhqdq xmm8,xmm15,xmm15
1686 vpxor xmm3,xmm3,xmm0
1687 vpclmulqdq xmm0,xmm15,xmm6,0x00
1688 vpxor xmm8,xmm8,xmm15
1689 vmovdqu xmm14,XMMWORD[((-32))+r8]
1690 vpxor xmm4,xmm4,xmm1
1691 vpclmulqdq xmm1,xmm15,xmm6,0x11
1692 vmovdqu xmm6,XMMWORD[((16-64))+rdx]
1693 vpshufb xmm15,xmm14,xmm13
1694 vpxor xmm5,xmm5,xmm2
1695 vpclmulqdq xmm2,xmm8,xmm7,0x00
1696 vpsrldq xmm7,xmm7,8
1697 sub r9,0x10
1698 jz NEAR $L$tail_avx
1699
1700 vpunpckhqdq xmm8,xmm15,xmm15
1701 vpxor xmm3,xmm3,xmm0
1702 vpclmulqdq xmm0,xmm15,xmm6,0x00
1703 vpxor xmm8,xmm8,xmm15
1704 vmovdqu xmm14,XMMWORD[((-48))+r8]
1705 vpxor xmm4,xmm4,xmm1
1706 vpclmulqdq xmm1,xmm15,xmm6,0x11
1707 vmovdqu xmm6,XMMWORD[((48-64))+rdx]
1708 vpshufb xmm15,xmm14,xmm13
1709 vpxor xmm5,xmm5,xmm2
1710 vpclmulqdq xmm2,xmm8,xmm7,0x00
1711 vmovdqu xmm7,XMMWORD[((80-64))+rdx]
1712 sub r9,0x10
1713 jz NEAR $L$tail_avx
1714
1715 vpunpckhqdq xmm8,xmm15,xmm15
1716 vpxor xmm3,xmm3,xmm0
1717 vpclmulqdq xmm0,xmm15,xmm6,0x00
1718 vpxor xmm8,xmm8,xmm15
1719 vmovdqu xmm14,XMMWORD[((-64))+r8]
1720 vpxor xmm4,xmm4,xmm1
1721 vpclmulqdq xmm1,xmm15,xmm6,0x11
1722 vmovdqu xmm6,XMMWORD[((64-64))+rdx]
1723 vpshufb xmm15,xmm14,xmm13
1724 vpxor xmm5,xmm5,xmm2
1725 vpclmulqdq xmm2,xmm8,xmm7,0x00
1726 vpsrldq xmm7,xmm7,8
1727 sub r9,0x10
1728 jz NEAR $L$tail_avx
1729
1730 vpunpckhqdq xmm8,xmm15,xmm15
1731 vpxor xmm3,xmm3,xmm0
1732 vpclmulqdq xmm0,xmm15,xmm6,0x00
1733 vpxor xmm8,xmm8,xmm15
1734 vmovdqu xmm14,XMMWORD[((-80))+r8]
1735 vpxor xmm4,xmm4,xmm1
1736 vpclmulqdq xmm1,xmm15,xmm6,0x11
1737 vmovdqu xmm6,XMMWORD[((96-64))+rdx]
1738 vpshufb xmm15,xmm14,xmm13
1739 vpxor xmm5,xmm5,xmm2
1740 vpclmulqdq xmm2,xmm8,xmm7,0x00
1741 vmovdqu xmm7,XMMWORD[((128-64))+rdx]
1742 sub r9,0x10
1743 jz NEAR $L$tail_avx
1744
1745 vpunpckhqdq xmm8,xmm15,xmm15
1746 vpxor xmm3,xmm3,xmm0
1747 vpclmulqdq xmm0,xmm15,xmm6,0x00
1748 vpxor xmm8,xmm8,xmm15
1749 vmovdqu xmm14,XMMWORD[((-96))+r8]
1750 vpxor xmm4,xmm4,xmm1
1751 vpclmulqdq xmm1,xmm15,xmm6,0x11
1752 vmovdqu xmm6,XMMWORD[((112-64))+rdx]
1753 vpshufb xmm15,xmm14,xmm13
1754 vpxor xmm5,xmm5,xmm2
1755 vpclmulqdq xmm2,xmm8,xmm7,0x00
1756 vpsrldq xmm7,xmm7,8
1757 sub r9,0x10
1758 jz NEAR $L$tail_avx
1759
1760 vpunpckhqdq xmm8,xmm15,xmm15
1761 vpxor xmm3,xmm3,xmm0
1762 vpclmulqdq xmm0,xmm15,xmm6,0x00
1763 vpxor xmm8,xmm8,xmm15
1764 vmovdqu xmm14,XMMWORD[((-112))+r8]
1765 vpxor xmm4,xmm4,xmm1
1766 vpclmulqdq xmm1,xmm15,xmm6,0x11
1767 vmovdqu xmm6,XMMWORD[((144-64))+rdx]
1768 vpshufb xmm15,xmm14,xmm13
1769 vpxor xmm5,xmm5,xmm2
1770 vpclmulqdq xmm2,xmm8,xmm7,0x00
1771 vmovq xmm7,QWORD[((184-64))+rdx]
1772 sub r9,0x10
1773 jmp NEAR $L$tail_avx
1774
1775ALIGN 32
1776$L$tail_avx:
1777 vpxor xmm15,xmm15,xmm10
1778$L$tail_no_xor_avx:
1779 vpunpckhqdq xmm8,xmm15,xmm15
1780 vpxor xmm3,xmm3,xmm0
1781 vpclmulqdq xmm0,xmm15,xmm6,0x00
1782 vpxor xmm8,xmm8,xmm15
1783 vpxor xmm4,xmm4,xmm1
1784 vpclmulqdq xmm1,xmm15,xmm6,0x11
1785 vpxor xmm5,xmm5,xmm2
1786 vpclmulqdq xmm2,xmm8,xmm7,0x00
1787
1788 vmovdqu xmm12,XMMWORD[r10]
1789
1790 vpxor xmm10,xmm3,xmm0
1791 vpxor xmm11,xmm4,xmm1
1792 vpxor xmm5,xmm5,xmm2
1793
1794 vpxor xmm5,xmm5,xmm10
1795 vpxor xmm5,xmm5,xmm11
1796 vpslldq xmm9,xmm5,8
1797 vpsrldq xmm5,xmm5,8
1798 vpxor xmm10,xmm10,xmm9
1799 vpxor xmm11,xmm11,xmm5
1800
1801 vpclmulqdq xmm9,xmm10,xmm12,0x10
1802 vpalignr xmm10,xmm10,xmm10,8
1803 vpxor xmm10,xmm10,xmm9
1804
1805 vpclmulqdq xmm9,xmm10,xmm12,0x10
1806 vpalignr xmm10,xmm10,xmm10,8
1807 vpxor xmm10,xmm10,xmm11
1808 vpxor xmm10,xmm10,xmm9
1809
1810 cmp r9,0
1811 jne NEAR $L$short_avx
1812
1813 vpshufb xmm10,xmm10,xmm13
1814 vmovdqu XMMWORD[rcx],xmm10
1815 vzeroupper
1816 movaps xmm6,XMMWORD[rsp]
1817 movaps xmm7,XMMWORD[16+rsp]
1818 movaps xmm8,XMMWORD[32+rsp]
1819 movaps xmm9,XMMWORD[48+rsp]
1820 movaps xmm10,XMMWORD[64+rsp]
1821 movaps xmm11,XMMWORD[80+rsp]
1822 movaps xmm12,XMMWORD[96+rsp]
1823 movaps xmm13,XMMWORD[112+rsp]
1824 movaps xmm14,XMMWORD[128+rsp]
1825 movaps xmm15,XMMWORD[144+rsp]
1826 lea rsp,[168+rsp]
1827$L$SEH_end_gcm_ghash_avx:
1828 DB 0F3h,0C3h ;repret
1829
1830ALIGN 64
1831$L$bswap_mask:
1832DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1833$L$0x1c2_polynomial:
1834DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1835$L$7_mask:
1836 DD 7,0,7,0
1837$L$7_mask_poly:
1838 DD 7,0,450,0
1839ALIGN 64
1840
1841$L$rem_4bit:
1842 DD 0,0,0,471859200,0,943718400,0,610271232
1843 DD 0,1887436800,0,1822425088,0,1220542464,0,1423966208
1844 DD 0,3774873600,0,4246732800,0,3644850176,0,3311403008
1845 DD 0,2441084928,0,2376073216,0,2847932416,0,3051356160
1846
1847$L$rem_8bit:
1848 DW 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
1849 DW 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
1850 DW 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
1851 DW 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
1852 DW 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
1853 DW 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
1854 DW 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
1855 DW 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
1856 DW 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
1857 DW 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
1858 DW 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
1859 DW 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
1860 DW 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
1861 DW 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
1862 DW 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
1863 DW 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
1864 DW 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
1865 DW 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
1866 DW 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
1867 DW 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
1868 DW 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
1869 DW 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
1870 DW 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
1871 DW 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
1872 DW 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
1873 DW 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
1874 DW 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
1875 DW 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
1876 DW 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
1877 DW 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
1878 DW 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
1879 DW 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
1880
1881DB 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
1882DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1883DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1884DB 114,103,62,0
1885ALIGN 64
1886EXTERN __imp_RtlVirtualUnwind
1887
1888ALIGN 16
1889se_handler:
1890 push rsi
1891 push rdi
1892 push rbx
1893 push rbp
1894 push r12
1895 push r13
1896 push r14
1897 push r15
1898 pushfq
1899 sub rsp,64
1900
1901 mov rax,QWORD[120+r8]
1902 mov rbx,QWORD[248+r8]
1903
1904 mov rsi,QWORD[8+r9]
1905 mov r11,QWORD[56+r9]
1906
1907 mov r10d,DWORD[r11]
1908 lea r10,[r10*1+rsi]
1909 cmp rbx,r10
1910 jb NEAR $L$in_prologue
1911
1912 mov rax,QWORD[152+r8]
1913
1914 mov r10d,DWORD[4+r11]
1915 lea r10,[r10*1+rsi]
1916 cmp rbx,r10
1917 jae NEAR $L$in_prologue
1918
1919 lea rax,[24+rax]
1920
1921 mov rbx,QWORD[((-8))+rax]
1922 mov rbp,QWORD[((-16))+rax]
1923 mov r12,QWORD[((-24))+rax]
1924 mov QWORD[144+r8],rbx
1925 mov QWORD[160+r8],rbp
1926 mov QWORD[216+r8],r12
1927
1928$L$in_prologue:
1929 mov rdi,QWORD[8+rax]
1930 mov rsi,QWORD[16+rax]
1931 mov QWORD[152+r8],rax
1932 mov QWORD[168+r8],rsi
1933 mov QWORD[176+r8],rdi
1934
1935 mov rdi,QWORD[40+r9]
1936 mov rsi,r8
1937 mov ecx,154
1938 DD 0xa548f3fc
1939
1940 mov rsi,r9
1941 xor rcx,rcx
1942 mov rdx,QWORD[8+rsi]
1943 mov r8,QWORD[rsi]
1944 mov r9,QWORD[16+rsi]
1945 mov r10,QWORD[40+rsi]
1946 lea r11,[56+rsi]
1947 lea r12,[24+rsi]
1948 mov QWORD[32+rsp],r10
1949 mov QWORD[40+rsp],r11
1950 mov QWORD[48+rsp],r12
1951 mov QWORD[56+rsp],rcx
1952 call QWORD[__imp_RtlVirtualUnwind]
1953
1954 mov eax,1
1955 add rsp,64
1956 popfq
1957 pop r15
1958 pop r14
1959 pop r13
1960 pop r12
1961 pop rbp
1962 pop rbx
1963 pop rdi
1964 pop rsi
1965 DB 0F3h,0C3h ;repret
1966
1967
1968section .pdata rdata align=4
1969ALIGN 4
1970 DD $L$SEH_begin_gcm_gmult_4bit wrt ..imagebase
1971 DD $L$SEH_end_gcm_gmult_4bit wrt ..imagebase
1972 DD $L$SEH_info_gcm_gmult_4bit wrt ..imagebase
1973
1974 DD $L$SEH_begin_gcm_ghash_4bit wrt ..imagebase
1975 DD $L$SEH_end_gcm_ghash_4bit wrt ..imagebase
1976 DD $L$SEH_info_gcm_ghash_4bit wrt ..imagebase
1977
1978 DD $L$SEH_begin_gcm_init_clmul wrt ..imagebase
1979 DD $L$SEH_end_gcm_init_clmul wrt ..imagebase
1980 DD $L$SEH_info_gcm_init_clmul wrt ..imagebase
1981
1982 DD $L$SEH_begin_gcm_ghash_clmul wrt ..imagebase
1983 DD $L$SEH_end_gcm_ghash_clmul wrt ..imagebase
1984 DD $L$SEH_info_gcm_ghash_clmul wrt ..imagebase
1985 DD $L$SEH_begin_gcm_init_avx wrt ..imagebase
1986 DD $L$SEH_end_gcm_init_avx wrt ..imagebase
1987 DD $L$SEH_info_gcm_init_clmul wrt ..imagebase
1988
1989 DD $L$SEH_begin_gcm_ghash_avx wrt ..imagebase
1990 DD $L$SEH_end_gcm_ghash_avx wrt ..imagebase
1991 DD $L$SEH_info_gcm_ghash_clmul wrt ..imagebase
1992section .xdata rdata align=8
1993ALIGN 8
1994$L$SEH_info_gcm_gmult_4bit:
1995DB 9,0,0,0
1996 DD se_handler wrt ..imagebase
1997 DD $L$gmult_prologue wrt ..imagebase,$L$gmult_epilogue wrt ..imagebase
1998$L$SEH_info_gcm_ghash_4bit:
1999DB 9,0,0,0
2000 DD se_handler wrt ..imagebase
2001 DD $L$ghash_prologue wrt ..imagebase,$L$ghash_epilogue wrt ..imagebase
2002$L$SEH_info_gcm_init_clmul:
2003DB 0x01,0x08,0x03,0x00
2004DB 0x08,0x68,0x00,0x00
2005DB 0x04,0x22,0x00,0x00
2006$L$SEH_info_gcm_ghash_clmul:
2007DB 0x01,0x33,0x16,0x00
2008DB 0x33,0xf8,0x09,0x00
2009DB 0x2e,0xe8,0x08,0x00
2010DB 0x29,0xd8,0x07,0x00
2011DB 0x24,0xc8,0x06,0x00
2012DB 0x1f,0xb8,0x05,0x00
2013DB 0x1a,0xa8,0x04,0x00
2014DB 0x15,0x98,0x03,0x00
2015DB 0x10,0x88,0x02,0x00
2016DB 0x0c,0x78,0x01,0x00
2017DB 0x08,0x68,0x00,0x00
2018DB 0x04,0x01,0x15,0x00
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette