VirtualBox

source: vbox/trunk/src/libs/openssl-3.0.1/crypto/genasm-nasm/aesni-gcm-x86_64.S@ 94083

Last change on this file since 94083 was 94083, checked in by vboxsync, 3 years ago

libs/openssl-3.0.1: Recreate asm files, bugref:10128

File size: 21.7 KB
Line 
1default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
7
8
9ALIGN 32
10_aesni_ctr32_ghash_6x:
11
12 vmovdqu xmm2,XMMWORD[32+r11]
13 sub rdx,6
14 vpxor xmm4,xmm4,xmm4
15 vmovdqu xmm15,XMMWORD[((0-128))+rcx]
16 vpaddb xmm10,xmm1,xmm2
17 vpaddb xmm11,xmm10,xmm2
18 vpaddb xmm12,xmm11,xmm2
19 vpaddb xmm13,xmm12,xmm2
20 vpaddb xmm14,xmm13,xmm2
21 vpxor xmm9,xmm1,xmm15
22 vmovdqu XMMWORD[(16+8)+rsp],xmm4
23 jmp NEAR $L$oop6x
24
25ALIGN 32
26$L$oop6x:
27 add ebx,100663296
28 jc NEAR $L$handle_ctr32
29 vmovdqu xmm3,XMMWORD[((0-32))+r9]
30 vpaddb xmm1,xmm14,xmm2
31 vpxor xmm10,xmm10,xmm15
32 vpxor xmm11,xmm11,xmm15
33
34$L$resume_ctr32:
35 vmovdqu XMMWORD[r8],xmm1
36 vpclmulqdq xmm5,xmm7,xmm3,0x10
37 vpxor xmm12,xmm12,xmm15
38 vmovups xmm2,XMMWORD[((16-128))+rcx]
39 vpclmulqdq xmm6,xmm7,xmm3,0x01
40 xor r12,r12
41 cmp r15,r14
42
43 vaesenc xmm9,xmm9,xmm2
44 vmovdqu xmm0,XMMWORD[((48+8))+rsp]
45 vpxor xmm13,xmm13,xmm15
46 vpclmulqdq xmm1,xmm7,xmm3,0x00
47 vaesenc xmm10,xmm10,xmm2
48 vpxor xmm14,xmm14,xmm15
49 setnc r12b
50 vpclmulqdq xmm7,xmm7,xmm3,0x11
51 vaesenc xmm11,xmm11,xmm2
52 vmovdqu xmm3,XMMWORD[((16-32))+r9]
53 neg r12
54 vaesenc xmm12,xmm12,xmm2
55 vpxor xmm6,xmm6,xmm5
56 vpclmulqdq xmm5,xmm0,xmm3,0x00
57 vpxor xmm8,xmm8,xmm4
58 vaesenc xmm13,xmm13,xmm2
59 vpxor xmm4,xmm1,xmm5
60 and r12,0x60
61 vmovups xmm15,XMMWORD[((32-128))+rcx]
62 vpclmulqdq xmm1,xmm0,xmm3,0x10
63 vaesenc xmm14,xmm14,xmm2
64
65 vpclmulqdq xmm2,xmm0,xmm3,0x01
66 lea r14,[r12*1+r14]
67 vaesenc xmm9,xmm9,xmm15
68 vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp]
69 vpclmulqdq xmm3,xmm0,xmm3,0x11
70 vmovdqu xmm0,XMMWORD[((64+8))+rsp]
71 vaesenc xmm10,xmm10,xmm15
72 movbe r13,QWORD[88+r14]
73 vaesenc xmm11,xmm11,xmm15
74 movbe r12,QWORD[80+r14]
75 vaesenc xmm12,xmm12,xmm15
76 mov QWORD[((32+8))+rsp],r13
77 vaesenc xmm13,xmm13,xmm15
78 mov QWORD[((40+8))+rsp],r12
79 vmovdqu xmm5,XMMWORD[((48-32))+r9]
80 vaesenc xmm14,xmm14,xmm15
81
82 vmovups xmm15,XMMWORD[((48-128))+rcx]
83 vpxor xmm6,xmm6,xmm1
84 vpclmulqdq xmm1,xmm0,xmm5,0x00
85 vaesenc xmm9,xmm9,xmm15
86 vpxor xmm6,xmm6,xmm2
87 vpclmulqdq xmm2,xmm0,xmm5,0x10
88 vaesenc xmm10,xmm10,xmm15
89 vpxor xmm7,xmm7,xmm3
90 vpclmulqdq xmm3,xmm0,xmm5,0x01
91 vaesenc xmm11,xmm11,xmm15
92 vpclmulqdq xmm5,xmm0,xmm5,0x11
93 vmovdqu xmm0,XMMWORD[((80+8))+rsp]
94 vaesenc xmm12,xmm12,xmm15
95 vaesenc xmm13,xmm13,xmm15
96 vpxor xmm4,xmm4,xmm1
97 vmovdqu xmm1,XMMWORD[((64-32))+r9]
98 vaesenc xmm14,xmm14,xmm15
99
100 vmovups xmm15,XMMWORD[((64-128))+rcx]
101 vpxor xmm6,xmm6,xmm2
102 vpclmulqdq xmm2,xmm0,xmm1,0x00
103 vaesenc xmm9,xmm9,xmm15
104 vpxor xmm6,xmm6,xmm3
105 vpclmulqdq xmm3,xmm0,xmm1,0x10
106 vaesenc xmm10,xmm10,xmm15
107 movbe r13,QWORD[72+r14]
108 vpxor xmm7,xmm7,xmm5
109 vpclmulqdq xmm5,xmm0,xmm1,0x01
110 vaesenc xmm11,xmm11,xmm15
111 movbe r12,QWORD[64+r14]
112 vpclmulqdq xmm1,xmm0,xmm1,0x11
113 vmovdqu xmm0,XMMWORD[((96+8))+rsp]
114 vaesenc xmm12,xmm12,xmm15
115 mov QWORD[((48+8))+rsp],r13
116 vaesenc xmm13,xmm13,xmm15
117 mov QWORD[((56+8))+rsp],r12
118 vpxor xmm4,xmm4,xmm2
119 vmovdqu xmm2,XMMWORD[((96-32))+r9]
120 vaesenc xmm14,xmm14,xmm15
121
122 vmovups xmm15,XMMWORD[((80-128))+rcx]
123 vpxor xmm6,xmm6,xmm3
124 vpclmulqdq xmm3,xmm0,xmm2,0x00
125 vaesenc xmm9,xmm9,xmm15
126 vpxor xmm6,xmm6,xmm5
127 vpclmulqdq xmm5,xmm0,xmm2,0x10
128 vaesenc xmm10,xmm10,xmm15
129 movbe r13,QWORD[56+r14]
130 vpxor xmm7,xmm7,xmm1
131 vpclmulqdq xmm1,xmm0,xmm2,0x01
132 vpxor xmm8,xmm8,XMMWORD[((112+8))+rsp]
133 vaesenc xmm11,xmm11,xmm15
134 movbe r12,QWORD[48+r14]
135 vpclmulqdq xmm2,xmm0,xmm2,0x11
136 vaesenc xmm12,xmm12,xmm15
137 mov QWORD[((64+8))+rsp],r13
138 vaesenc xmm13,xmm13,xmm15
139 mov QWORD[((72+8))+rsp],r12
140 vpxor xmm4,xmm4,xmm3
141 vmovdqu xmm3,XMMWORD[((112-32))+r9]
142 vaesenc xmm14,xmm14,xmm15
143
144 vmovups xmm15,XMMWORD[((96-128))+rcx]
145 vpxor xmm6,xmm6,xmm5
146 vpclmulqdq xmm5,xmm8,xmm3,0x10
147 vaesenc xmm9,xmm9,xmm15
148 vpxor xmm6,xmm6,xmm1
149 vpclmulqdq xmm1,xmm8,xmm3,0x01
150 vaesenc xmm10,xmm10,xmm15
151 movbe r13,QWORD[40+r14]
152 vpxor xmm7,xmm7,xmm2
153 vpclmulqdq xmm2,xmm8,xmm3,0x00
154 vaesenc xmm11,xmm11,xmm15
155 movbe r12,QWORD[32+r14]
156 vpclmulqdq xmm8,xmm8,xmm3,0x11
157 vaesenc xmm12,xmm12,xmm15
158 mov QWORD[((80+8))+rsp],r13
159 vaesenc xmm13,xmm13,xmm15
160 mov QWORD[((88+8))+rsp],r12
161 vpxor xmm6,xmm6,xmm5
162 vaesenc xmm14,xmm14,xmm15
163 vpxor xmm6,xmm6,xmm1
164
165 vmovups xmm15,XMMWORD[((112-128))+rcx]
166 vpslldq xmm5,xmm6,8
167 vpxor xmm4,xmm4,xmm2
168 vmovdqu xmm3,XMMWORD[16+r11]
169
170 vaesenc xmm9,xmm9,xmm15
171 vpxor xmm7,xmm7,xmm8
172 vaesenc xmm10,xmm10,xmm15
173 vpxor xmm4,xmm4,xmm5
174 movbe r13,QWORD[24+r14]
175 vaesenc xmm11,xmm11,xmm15
176 movbe r12,QWORD[16+r14]
177 vpalignr xmm0,xmm4,xmm4,8
178 vpclmulqdq xmm4,xmm4,xmm3,0x10
179 mov QWORD[((96+8))+rsp],r13
180 vaesenc xmm12,xmm12,xmm15
181 mov QWORD[((104+8))+rsp],r12
182 vaesenc xmm13,xmm13,xmm15
183 vmovups xmm1,XMMWORD[((128-128))+rcx]
184 vaesenc xmm14,xmm14,xmm15
185
186 vaesenc xmm9,xmm9,xmm1
187 vmovups xmm15,XMMWORD[((144-128))+rcx]
188 vaesenc xmm10,xmm10,xmm1
189 vpsrldq xmm6,xmm6,8
190 vaesenc xmm11,xmm11,xmm1
191 vpxor xmm7,xmm7,xmm6
192 vaesenc xmm12,xmm12,xmm1
193 vpxor xmm4,xmm4,xmm0
194 movbe r13,QWORD[8+r14]
195 vaesenc xmm13,xmm13,xmm1
196 movbe r12,QWORD[r14]
197 vaesenc xmm14,xmm14,xmm1
198 vmovups xmm1,XMMWORD[((160-128))+rcx]
199 cmp ebp,11
200 jb NEAR $L$enc_tail
201
202 vaesenc xmm9,xmm9,xmm15
203 vaesenc xmm10,xmm10,xmm15
204 vaesenc xmm11,xmm11,xmm15
205 vaesenc xmm12,xmm12,xmm15
206 vaesenc xmm13,xmm13,xmm15
207 vaesenc xmm14,xmm14,xmm15
208
209 vaesenc xmm9,xmm9,xmm1
210 vaesenc xmm10,xmm10,xmm1
211 vaesenc xmm11,xmm11,xmm1
212 vaesenc xmm12,xmm12,xmm1
213 vaesenc xmm13,xmm13,xmm1
214 vmovups xmm15,XMMWORD[((176-128))+rcx]
215 vaesenc xmm14,xmm14,xmm1
216 vmovups xmm1,XMMWORD[((192-128))+rcx]
217 je NEAR $L$enc_tail
218
219 vaesenc xmm9,xmm9,xmm15
220 vaesenc xmm10,xmm10,xmm15
221 vaesenc xmm11,xmm11,xmm15
222 vaesenc xmm12,xmm12,xmm15
223 vaesenc xmm13,xmm13,xmm15
224 vaesenc xmm14,xmm14,xmm15
225
226 vaesenc xmm9,xmm9,xmm1
227 vaesenc xmm10,xmm10,xmm1
228 vaesenc xmm11,xmm11,xmm1
229 vaesenc xmm12,xmm12,xmm1
230 vaesenc xmm13,xmm13,xmm1
231 vmovups xmm15,XMMWORD[((208-128))+rcx]
232 vaesenc xmm14,xmm14,xmm1
233 vmovups xmm1,XMMWORD[((224-128))+rcx]
234 jmp NEAR $L$enc_tail
235
236ALIGN 32
237$L$handle_ctr32:
238 vmovdqu xmm0,XMMWORD[r11]
239 vpshufb xmm6,xmm1,xmm0
240 vmovdqu xmm5,XMMWORD[48+r11]
241 vpaddd xmm10,xmm6,XMMWORD[64+r11]
242 vpaddd xmm11,xmm6,xmm5
243 vmovdqu xmm3,XMMWORD[((0-32))+r9]
244 vpaddd xmm12,xmm10,xmm5
245 vpshufb xmm10,xmm10,xmm0
246 vpaddd xmm13,xmm11,xmm5
247 vpshufb xmm11,xmm11,xmm0
248 vpxor xmm10,xmm10,xmm15
249 vpaddd xmm14,xmm12,xmm5
250 vpshufb xmm12,xmm12,xmm0
251 vpxor xmm11,xmm11,xmm15
252 vpaddd xmm1,xmm13,xmm5
253 vpshufb xmm13,xmm13,xmm0
254 vpshufb xmm14,xmm14,xmm0
255 vpshufb xmm1,xmm1,xmm0
256 jmp NEAR $L$resume_ctr32
257
258ALIGN 32
259$L$enc_tail:
260 vaesenc xmm9,xmm9,xmm15
261 vmovdqu XMMWORD[(16+8)+rsp],xmm7
262 vpalignr xmm8,xmm4,xmm4,8
263 vaesenc xmm10,xmm10,xmm15
264 vpclmulqdq xmm4,xmm4,xmm3,0x10
265 vpxor xmm2,xmm1,XMMWORD[rdi]
266 vaesenc xmm11,xmm11,xmm15
267 vpxor xmm0,xmm1,XMMWORD[16+rdi]
268 vaesenc xmm12,xmm12,xmm15
269 vpxor xmm5,xmm1,XMMWORD[32+rdi]
270 vaesenc xmm13,xmm13,xmm15
271 vpxor xmm6,xmm1,XMMWORD[48+rdi]
272 vaesenc xmm14,xmm14,xmm15
273 vpxor xmm7,xmm1,XMMWORD[64+rdi]
274 vpxor xmm3,xmm1,XMMWORD[80+rdi]
275 vmovdqu xmm1,XMMWORD[r8]
276
277 vaesenclast xmm9,xmm9,xmm2
278 vmovdqu xmm2,XMMWORD[32+r11]
279 vaesenclast xmm10,xmm10,xmm0
280 vpaddb xmm0,xmm1,xmm2
281 mov QWORD[((112+8))+rsp],r13
282 lea rdi,[96+rdi]
283 vaesenclast xmm11,xmm11,xmm5
284 vpaddb xmm5,xmm0,xmm2
285 mov QWORD[((120+8))+rsp],r12
286 lea rsi,[96+rsi]
287 vmovdqu xmm15,XMMWORD[((0-128))+rcx]
288 vaesenclast xmm12,xmm12,xmm6
289 vpaddb xmm6,xmm5,xmm2
290 vaesenclast xmm13,xmm13,xmm7
291 vpaddb xmm7,xmm6,xmm2
292 vaesenclast xmm14,xmm14,xmm3
293 vpaddb xmm3,xmm7,xmm2
294
295 add r10,0x60
296 sub rdx,0x6
297 jc NEAR $L$6x_done
298
299 vmovups XMMWORD[(-96)+rsi],xmm9
300 vpxor xmm9,xmm1,xmm15
301 vmovups XMMWORD[(-80)+rsi],xmm10
302 vmovdqa xmm10,xmm0
303 vmovups XMMWORD[(-64)+rsi],xmm11
304 vmovdqa xmm11,xmm5
305 vmovups XMMWORD[(-48)+rsi],xmm12
306 vmovdqa xmm12,xmm6
307 vmovups XMMWORD[(-32)+rsi],xmm13
308 vmovdqa xmm13,xmm7
309 vmovups XMMWORD[(-16)+rsi],xmm14
310 vmovdqa xmm14,xmm3
311 vmovdqu xmm7,XMMWORD[((32+8))+rsp]
312 jmp NEAR $L$oop6x
313
314$L$6x_done:
315 vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp]
316 vpxor xmm8,xmm8,xmm4
317
318 DB 0F3h,0C3h ;repret
319
320
321global aesni_gcm_decrypt
322
323ALIGN 32
324aesni_gcm_decrypt:
325 mov QWORD[8+rsp],rdi ;WIN64 prologue
326 mov QWORD[16+rsp],rsi
327 mov rax,rsp
328$L$SEH_begin_aesni_gcm_decrypt:
329 mov rdi,rcx
330 mov rsi,rdx
331 mov rdx,r8
332 mov rcx,r9
333 mov r8,QWORD[40+rsp]
334 mov r9,QWORD[48+rsp]
335
336
337
338 xor r10,r10
339 cmp rdx,0x60
340 jb NEAR $L$gcm_dec_abort
341
342 lea rax,[rsp]
343
344 push rbx
345
346 push rbp
347
348 push r12
349
350 push r13
351
352 push r14
353
354 push r15
355
356 lea rsp,[((-168))+rsp]
357 movaps XMMWORD[(-216)+rax],xmm6
358 movaps XMMWORD[(-200)+rax],xmm7
359 movaps XMMWORD[(-184)+rax],xmm8
360 movaps XMMWORD[(-168)+rax],xmm9
361 movaps XMMWORD[(-152)+rax],xmm10
362 movaps XMMWORD[(-136)+rax],xmm11
363 movaps XMMWORD[(-120)+rax],xmm12
364 movaps XMMWORD[(-104)+rax],xmm13
365 movaps XMMWORD[(-88)+rax],xmm14
366 movaps XMMWORD[(-72)+rax],xmm15
367$L$gcm_dec_body:
368 vzeroupper
369
370 vmovdqu xmm1,XMMWORD[r8]
371 add rsp,-128
372 mov ebx,DWORD[12+r8]
373 lea r11,[$L$bswap_mask]
374 lea r14,[((-128))+rcx]
375 mov r15,0xf80
376 vmovdqu xmm8,XMMWORD[r9]
377 and rsp,-128
378 vmovdqu xmm0,XMMWORD[r11]
379 lea rcx,[128+rcx]
380 lea r9,[((32+32))+r9]
381 mov ebp,DWORD[((240-128))+rcx]
382 vpshufb xmm8,xmm8,xmm0
383
384 and r14,r15
385 and r15,rsp
386 sub r15,r14
387 jc NEAR $L$dec_no_key_aliasing
388 cmp r15,768
389 jnc NEAR $L$dec_no_key_aliasing
390 sub rsp,r15
391$L$dec_no_key_aliasing:
392
393 vmovdqu xmm7,XMMWORD[80+rdi]
394 lea r14,[rdi]
395 vmovdqu xmm4,XMMWORD[64+rdi]
396 lea r15,[((-192))+rdx*1+rdi]
397 vmovdqu xmm5,XMMWORD[48+rdi]
398 shr rdx,4
399 xor r10,r10
400 vmovdqu xmm6,XMMWORD[32+rdi]
401 vpshufb xmm7,xmm7,xmm0
402 vmovdqu xmm2,XMMWORD[16+rdi]
403 vpshufb xmm4,xmm4,xmm0
404 vmovdqu xmm3,XMMWORD[rdi]
405 vpshufb xmm5,xmm5,xmm0
406 vmovdqu XMMWORD[48+rsp],xmm4
407 vpshufb xmm6,xmm6,xmm0
408 vmovdqu XMMWORD[64+rsp],xmm5
409 vpshufb xmm2,xmm2,xmm0
410 vmovdqu XMMWORD[80+rsp],xmm6
411 vpshufb xmm3,xmm3,xmm0
412 vmovdqu XMMWORD[96+rsp],xmm2
413 vmovdqu XMMWORD[112+rsp],xmm3
414
415 call _aesni_ctr32_ghash_6x
416
417 vmovups XMMWORD[(-96)+rsi],xmm9
418 vmovups XMMWORD[(-80)+rsi],xmm10
419 vmovups XMMWORD[(-64)+rsi],xmm11
420 vmovups XMMWORD[(-48)+rsi],xmm12
421 vmovups XMMWORD[(-32)+rsi],xmm13
422 vmovups XMMWORD[(-16)+rsi],xmm14
423
424 vpshufb xmm8,xmm8,XMMWORD[r11]
425 vmovdqu XMMWORD[(-64)+r9],xmm8
426
427 vzeroupper
428 movaps xmm6,XMMWORD[((-216))+rax]
429 movaps xmm7,XMMWORD[((-200))+rax]
430 movaps xmm8,XMMWORD[((-184))+rax]
431 movaps xmm9,XMMWORD[((-168))+rax]
432 movaps xmm10,XMMWORD[((-152))+rax]
433 movaps xmm11,XMMWORD[((-136))+rax]
434 movaps xmm12,XMMWORD[((-120))+rax]
435 movaps xmm13,XMMWORD[((-104))+rax]
436 movaps xmm14,XMMWORD[((-88))+rax]
437 movaps xmm15,XMMWORD[((-72))+rax]
438 mov r15,QWORD[((-48))+rax]
439
440 mov r14,QWORD[((-40))+rax]
441
442 mov r13,QWORD[((-32))+rax]
443
444 mov r12,QWORD[((-24))+rax]
445
446 mov rbp,QWORD[((-16))+rax]
447
448 mov rbx,QWORD[((-8))+rax]
449
450 lea rsp,[rax]
451
452$L$gcm_dec_abort:
453 mov rax,r10
454 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
455 mov rsi,QWORD[16+rsp]
456 DB 0F3h,0C3h ;repret
457
458$L$SEH_end_aesni_gcm_decrypt:
459
460ALIGN 32
461_aesni_ctr32_6x:
462
463 vmovdqu xmm4,XMMWORD[((0-128))+rcx]
464 vmovdqu xmm2,XMMWORD[32+r11]
465 lea r13,[((-1))+rbp]
466 vmovups xmm15,XMMWORD[((16-128))+rcx]
467 lea r12,[((32-128))+rcx]
468 vpxor xmm9,xmm1,xmm4
469 add ebx,100663296
470 jc NEAR $L$handle_ctr32_2
471 vpaddb xmm10,xmm1,xmm2
472 vpaddb xmm11,xmm10,xmm2
473 vpxor xmm10,xmm10,xmm4
474 vpaddb xmm12,xmm11,xmm2
475 vpxor xmm11,xmm11,xmm4
476 vpaddb xmm13,xmm12,xmm2
477 vpxor xmm12,xmm12,xmm4
478 vpaddb xmm14,xmm13,xmm2
479 vpxor xmm13,xmm13,xmm4
480 vpaddb xmm1,xmm14,xmm2
481 vpxor xmm14,xmm14,xmm4
482 jmp NEAR $L$oop_ctr32
483
484ALIGN 16
485$L$oop_ctr32:
486 vaesenc xmm9,xmm9,xmm15
487 vaesenc xmm10,xmm10,xmm15
488 vaesenc xmm11,xmm11,xmm15
489 vaesenc xmm12,xmm12,xmm15
490 vaesenc xmm13,xmm13,xmm15
491 vaesenc xmm14,xmm14,xmm15
492 vmovups xmm15,XMMWORD[r12]
493 lea r12,[16+r12]
494 dec r13d
495 jnz NEAR $L$oop_ctr32
496
497 vmovdqu xmm3,XMMWORD[r12]
498 vaesenc xmm9,xmm9,xmm15
499 vpxor xmm4,xmm3,XMMWORD[rdi]
500 vaesenc xmm10,xmm10,xmm15
501 vpxor xmm5,xmm3,XMMWORD[16+rdi]
502 vaesenc xmm11,xmm11,xmm15
503 vpxor xmm6,xmm3,XMMWORD[32+rdi]
504 vaesenc xmm12,xmm12,xmm15
505 vpxor xmm8,xmm3,XMMWORD[48+rdi]
506 vaesenc xmm13,xmm13,xmm15
507 vpxor xmm2,xmm3,XMMWORD[64+rdi]
508 vaesenc xmm14,xmm14,xmm15
509 vpxor xmm3,xmm3,XMMWORD[80+rdi]
510 lea rdi,[96+rdi]
511
512 vaesenclast xmm9,xmm9,xmm4
513 vaesenclast xmm10,xmm10,xmm5
514 vaesenclast xmm11,xmm11,xmm6
515 vaesenclast xmm12,xmm12,xmm8
516 vaesenclast xmm13,xmm13,xmm2
517 vaesenclast xmm14,xmm14,xmm3
518 vmovups XMMWORD[rsi],xmm9
519 vmovups XMMWORD[16+rsi],xmm10
520 vmovups XMMWORD[32+rsi],xmm11
521 vmovups XMMWORD[48+rsi],xmm12
522 vmovups XMMWORD[64+rsi],xmm13
523 vmovups XMMWORD[80+rsi],xmm14
524 lea rsi,[96+rsi]
525
526 DB 0F3h,0C3h ;repret
527ALIGN 32
528$L$handle_ctr32_2:
529 vpshufb xmm6,xmm1,xmm0
530 vmovdqu xmm5,XMMWORD[48+r11]
531 vpaddd xmm10,xmm6,XMMWORD[64+r11]
532 vpaddd xmm11,xmm6,xmm5
533 vpaddd xmm12,xmm10,xmm5
534 vpshufb xmm10,xmm10,xmm0
535 vpaddd xmm13,xmm11,xmm5
536 vpshufb xmm11,xmm11,xmm0
537 vpxor xmm10,xmm10,xmm4
538 vpaddd xmm14,xmm12,xmm5
539 vpshufb xmm12,xmm12,xmm0
540 vpxor xmm11,xmm11,xmm4
541 vpaddd xmm1,xmm13,xmm5
542 vpshufb xmm13,xmm13,xmm0
543 vpxor xmm12,xmm12,xmm4
544 vpshufb xmm14,xmm14,xmm0
545 vpxor xmm13,xmm13,xmm4
546 vpshufb xmm1,xmm1,xmm0
547 vpxor xmm14,xmm14,xmm4
548 jmp NEAR $L$oop_ctr32
549
550
551
552global aesni_gcm_encrypt
553
554ALIGN 32
555aesni_gcm_encrypt:
556 mov QWORD[8+rsp],rdi ;WIN64 prologue
557 mov QWORD[16+rsp],rsi
558 mov rax,rsp
559$L$SEH_begin_aesni_gcm_encrypt:
560 mov rdi,rcx
561 mov rsi,rdx
562 mov rdx,r8
563 mov rcx,r9
564 mov r8,QWORD[40+rsp]
565 mov r9,QWORD[48+rsp]
566
567
568
569 xor r10,r10
570 cmp rdx,0x60*3
571 jb NEAR $L$gcm_enc_abort
572
573 lea rax,[rsp]
574
575 push rbx
576
577 push rbp
578
579 push r12
580
581 push r13
582
583 push r14
584
585 push r15
586
587 lea rsp,[((-168))+rsp]
588 movaps XMMWORD[(-216)+rax],xmm6
589 movaps XMMWORD[(-200)+rax],xmm7
590 movaps XMMWORD[(-184)+rax],xmm8
591 movaps XMMWORD[(-168)+rax],xmm9
592 movaps XMMWORD[(-152)+rax],xmm10
593 movaps XMMWORD[(-136)+rax],xmm11
594 movaps XMMWORD[(-120)+rax],xmm12
595 movaps XMMWORD[(-104)+rax],xmm13
596 movaps XMMWORD[(-88)+rax],xmm14
597 movaps XMMWORD[(-72)+rax],xmm15
598$L$gcm_enc_body:
599 vzeroupper
600
601 vmovdqu xmm1,XMMWORD[r8]
602 add rsp,-128
603 mov ebx,DWORD[12+r8]
604 lea r11,[$L$bswap_mask]
605 lea r14,[((-128))+rcx]
606 mov r15,0xf80
607 lea rcx,[128+rcx]
608 vmovdqu xmm0,XMMWORD[r11]
609 and rsp,-128
610 mov ebp,DWORD[((240-128))+rcx]
611
612 and r14,r15
613 and r15,rsp
614 sub r15,r14
615 jc NEAR $L$enc_no_key_aliasing
616 cmp r15,768
617 jnc NEAR $L$enc_no_key_aliasing
618 sub rsp,r15
619$L$enc_no_key_aliasing:
620
621 lea r14,[rsi]
622 lea r15,[((-192))+rdx*1+rsi]
623 shr rdx,4
624
625 call _aesni_ctr32_6x
626 vpshufb xmm8,xmm9,xmm0
627 vpshufb xmm2,xmm10,xmm0
628 vmovdqu XMMWORD[112+rsp],xmm8
629 vpshufb xmm4,xmm11,xmm0
630 vmovdqu XMMWORD[96+rsp],xmm2
631 vpshufb xmm5,xmm12,xmm0
632 vmovdqu XMMWORD[80+rsp],xmm4
633 vpshufb xmm6,xmm13,xmm0
634 vmovdqu XMMWORD[64+rsp],xmm5
635 vpshufb xmm7,xmm14,xmm0
636 vmovdqu XMMWORD[48+rsp],xmm6
637
638 call _aesni_ctr32_6x
639
640 vmovdqu xmm8,XMMWORD[r9]
641 lea r9,[((32+32))+r9]
642 sub rdx,12
643 mov r10,0x60*2
644 vpshufb xmm8,xmm8,xmm0
645
646 call _aesni_ctr32_ghash_6x
647 vmovdqu xmm7,XMMWORD[32+rsp]
648 vmovdqu xmm0,XMMWORD[r11]
649 vmovdqu xmm3,XMMWORD[((0-32))+r9]
650 vpunpckhqdq xmm1,xmm7,xmm7
651 vmovdqu xmm15,XMMWORD[((32-32))+r9]
652 vmovups XMMWORD[(-96)+rsi],xmm9
653 vpshufb xmm9,xmm9,xmm0
654 vpxor xmm1,xmm1,xmm7
655 vmovups XMMWORD[(-80)+rsi],xmm10
656 vpshufb xmm10,xmm10,xmm0
657 vmovups XMMWORD[(-64)+rsi],xmm11
658 vpshufb xmm11,xmm11,xmm0
659 vmovups XMMWORD[(-48)+rsi],xmm12
660 vpshufb xmm12,xmm12,xmm0
661 vmovups XMMWORD[(-32)+rsi],xmm13
662 vpshufb xmm13,xmm13,xmm0
663 vmovups XMMWORD[(-16)+rsi],xmm14
664 vpshufb xmm14,xmm14,xmm0
665 vmovdqu XMMWORD[16+rsp],xmm9
666 vmovdqu xmm6,XMMWORD[48+rsp]
667 vmovdqu xmm0,XMMWORD[((16-32))+r9]
668 vpunpckhqdq xmm2,xmm6,xmm6
669 vpclmulqdq xmm5,xmm7,xmm3,0x00
670 vpxor xmm2,xmm2,xmm6
671 vpclmulqdq xmm7,xmm7,xmm3,0x11
672 vpclmulqdq xmm1,xmm1,xmm15,0x00
673
674 vmovdqu xmm9,XMMWORD[64+rsp]
675 vpclmulqdq xmm4,xmm6,xmm0,0x00
676 vmovdqu xmm3,XMMWORD[((48-32))+r9]
677 vpxor xmm4,xmm4,xmm5
678 vpunpckhqdq xmm5,xmm9,xmm9
679 vpclmulqdq xmm6,xmm6,xmm0,0x11
680 vpxor xmm5,xmm5,xmm9
681 vpxor xmm6,xmm6,xmm7
682 vpclmulqdq xmm2,xmm2,xmm15,0x10
683 vmovdqu xmm15,XMMWORD[((80-32))+r9]
684 vpxor xmm2,xmm2,xmm1
685
686 vmovdqu xmm1,XMMWORD[80+rsp]
687 vpclmulqdq xmm7,xmm9,xmm3,0x00
688 vmovdqu xmm0,XMMWORD[((64-32))+r9]
689 vpxor xmm7,xmm7,xmm4
690 vpunpckhqdq xmm4,xmm1,xmm1
691 vpclmulqdq xmm9,xmm9,xmm3,0x11
692 vpxor xmm4,xmm4,xmm1
693 vpxor xmm9,xmm9,xmm6
694 vpclmulqdq xmm5,xmm5,xmm15,0x00
695 vpxor xmm5,xmm5,xmm2
696
697 vmovdqu xmm2,XMMWORD[96+rsp]
698 vpclmulqdq xmm6,xmm1,xmm0,0x00
699 vmovdqu xmm3,XMMWORD[((96-32))+r9]
700 vpxor xmm6,xmm6,xmm7
701 vpunpckhqdq xmm7,xmm2,xmm2
702 vpclmulqdq xmm1,xmm1,xmm0,0x11
703 vpxor xmm7,xmm7,xmm2
704 vpxor xmm1,xmm1,xmm9
705 vpclmulqdq xmm4,xmm4,xmm15,0x10
706 vmovdqu xmm15,XMMWORD[((128-32))+r9]
707 vpxor xmm4,xmm4,xmm5
708
709 vpxor xmm8,xmm8,XMMWORD[112+rsp]
710 vpclmulqdq xmm5,xmm2,xmm3,0x00
711 vmovdqu xmm0,XMMWORD[((112-32))+r9]
712 vpunpckhqdq xmm9,xmm8,xmm8
713 vpxor xmm5,xmm5,xmm6
714 vpclmulqdq xmm2,xmm2,xmm3,0x11
715 vpxor xmm9,xmm9,xmm8
716 vpxor xmm2,xmm2,xmm1
717 vpclmulqdq xmm7,xmm7,xmm15,0x00
718 vpxor xmm4,xmm7,xmm4
719
720 vpclmulqdq xmm6,xmm8,xmm0,0x00
721 vmovdqu xmm3,XMMWORD[((0-32))+r9]
722 vpunpckhqdq xmm1,xmm14,xmm14
723 vpclmulqdq xmm8,xmm8,xmm0,0x11
724 vpxor xmm1,xmm1,xmm14
725 vpxor xmm5,xmm6,xmm5
726 vpclmulqdq xmm9,xmm9,xmm15,0x10
727 vmovdqu xmm15,XMMWORD[((32-32))+r9]
728 vpxor xmm7,xmm8,xmm2
729 vpxor xmm6,xmm9,xmm4
730
731 vmovdqu xmm0,XMMWORD[((16-32))+r9]
732 vpxor xmm9,xmm7,xmm5
733 vpclmulqdq xmm4,xmm14,xmm3,0x00
734 vpxor xmm6,xmm6,xmm9
735 vpunpckhqdq xmm2,xmm13,xmm13
736 vpclmulqdq xmm14,xmm14,xmm3,0x11
737 vpxor xmm2,xmm2,xmm13
738 vpslldq xmm9,xmm6,8
739 vpclmulqdq xmm1,xmm1,xmm15,0x00
740 vpxor xmm8,xmm5,xmm9
741 vpsrldq xmm6,xmm6,8
742 vpxor xmm7,xmm7,xmm6
743
744 vpclmulqdq xmm5,xmm13,xmm0,0x00
745 vmovdqu xmm3,XMMWORD[((48-32))+r9]
746 vpxor xmm5,xmm5,xmm4
747 vpunpckhqdq xmm9,xmm12,xmm12
748 vpclmulqdq xmm13,xmm13,xmm0,0x11
749 vpxor xmm9,xmm9,xmm12
750 vpxor xmm13,xmm13,xmm14
751 vpalignr xmm14,xmm8,xmm8,8
752 vpclmulqdq xmm2,xmm2,xmm15,0x10
753 vmovdqu xmm15,XMMWORD[((80-32))+r9]
754 vpxor xmm2,xmm2,xmm1
755
756 vpclmulqdq xmm4,xmm12,xmm3,0x00
757 vmovdqu xmm0,XMMWORD[((64-32))+r9]
758 vpxor xmm4,xmm4,xmm5
759 vpunpckhqdq xmm1,xmm11,xmm11
760 vpclmulqdq xmm12,xmm12,xmm3,0x11
761 vpxor xmm1,xmm1,xmm11
762 vpxor xmm12,xmm12,xmm13
763 vxorps xmm7,xmm7,XMMWORD[16+rsp]
764 vpclmulqdq xmm9,xmm9,xmm15,0x00
765 vpxor xmm9,xmm9,xmm2
766
767 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10
768 vxorps xmm8,xmm8,xmm14
769
770 vpclmulqdq xmm5,xmm11,xmm0,0x00
771 vmovdqu xmm3,XMMWORD[((96-32))+r9]
772 vpxor xmm5,xmm5,xmm4
773 vpunpckhqdq xmm2,xmm10,xmm10
774 vpclmulqdq xmm11,xmm11,xmm0,0x11
775 vpxor xmm2,xmm2,xmm10
776 vpalignr xmm14,xmm8,xmm8,8
777 vpxor xmm11,xmm11,xmm12
778 vpclmulqdq xmm1,xmm1,xmm15,0x10
779 vmovdqu xmm15,XMMWORD[((128-32))+r9]
780 vpxor xmm1,xmm1,xmm9
781
782 vxorps xmm14,xmm14,xmm7
783 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10
784 vxorps xmm8,xmm8,xmm14
785
786 vpclmulqdq xmm4,xmm10,xmm3,0x00
787 vmovdqu xmm0,XMMWORD[((112-32))+r9]
788 vpxor xmm4,xmm4,xmm5
789 vpunpckhqdq xmm9,xmm8,xmm8
790 vpclmulqdq xmm10,xmm10,xmm3,0x11
791 vpxor xmm9,xmm9,xmm8
792 vpxor xmm10,xmm10,xmm11
793 vpclmulqdq xmm2,xmm2,xmm15,0x00
794 vpxor xmm2,xmm2,xmm1
795
796 vpclmulqdq xmm5,xmm8,xmm0,0x00
797 vpclmulqdq xmm7,xmm8,xmm0,0x11
798 vpxor xmm5,xmm5,xmm4
799 vpclmulqdq xmm6,xmm9,xmm15,0x10
800 vpxor xmm7,xmm7,xmm10
801 vpxor xmm6,xmm6,xmm2
802
803 vpxor xmm4,xmm7,xmm5
804 vpxor xmm6,xmm6,xmm4
805 vpslldq xmm1,xmm6,8
806 vmovdqu xmm3,XMMWORD[16+r11]
807 vpsrldq xmm6,xmm6,8
808 vpxor xmm8,xmm5,xmm1
809 vpxor xmm7,xmm7,xmm6
810
811 vpalignr xmm2,xmm8,xmm8,8
812 vpclmulqdq xmm8,xmm8,xmm3,0x10
813 vpxor xmm8,xmm8,xmm2
814
815 vpalignr xmm2,xmm8,xmm8,8
816 vpclmulqdq xmm8,xmm8,xmm3,0x10
817 vpxor xmm2,xmm2,xmm7
818 vpxor xmm8,xmm8,xmm2
819 vpshufb xmm8,xmm8,XMMWORD[r11]
820 vmovdqu XMMWORD[(-64)+r9],xmm8
821
822 vzeroupper
823 movaps xmm6,XMMWORD[((-216))+rax]
824 movaps xmm7,XMMWORD[((-200))+rax]
825 movaps xmm8,XMMWORD[((-184))+rax]
826 movaps xmm9,XMMWORD[((-168))+rax]
827 movaps xmm10,XMMWORD[((-152))+rax]
828 movaps xmm11,XMMWORD[((-136))+rax]
829 movaps xmm12,XMMWORD[((-120))+rax]
830 movaps xmm13,XMMWORD[((-104))+rax]
831 movaps xmm14,XMMWORD[((-88))+rax]
832 movaps xmm15,XMMWORD[((-72))+rax]
833 mov r15,QWORD[((-48))+rax]
834
835 mov r14,QWORD[((-40))+rax]
836
837 mov r13,QWORD[((-32))+rax]
838
839 mov r12,QWORD[((-24))+rax]
840
841 mov rbp,QWORD[((-16))+rax]
842
843 mov rbx,QWORD[((-8))+rax]
844
845 lea rsp,[rax]
846
847$L$gcm_enc_abort:
848 mov rax,r10
849 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
850 mov rsi,QWORD[16+rsp]
851 DB 0F3h,0C3h ;repret
852
853$L$SEH_end_aesni_gcm_encrypt:
854ALIGN 64
855$L$bswap_mask:
856DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
857$L$poly:
858DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
859$L$one_msb:
860DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
861$L$two_lsb:
862DB 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
863$L$one_lsb:
864DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
865DB 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108
866DB 101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82
867DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
868DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
869ALIGN 64
870EXTERN __imp_RtlVirtualUnwind
871
872ALIGN 16
873gcm_se_handler:
874 push rsi
875 push rdi
876 push rbx
877 push rbp
878 push r12
879 push r13
880 push r14
881 push r15
882 pushfq
883 sub rsp,64
884
885 mov rax,QWORD[120+r8]
886 mov rbx,QWORD[248+r8]
887
888 mov rsi,QWORD[8+r9]
889 mov r11,QWORD[56+r9]
890
891 mov r10d,DWORD[r11]
892 lea r10,[r10*1+rsi]
893 cmp rbx,r10
894 jb NEAR $L$common_seh_tail
895
896 mov rax,QWORD[152+r8]
897
898 mov r10d,DWORD[4+r11]
899 lea r10,[r10*1+rsi]
900 cmp rbx,r10
901 jae NEAR $L$common_seh_tail
902
903 mov rax,QWORD[120+r8]
904
905 mov r15,QWORD[((-48))+rax]
906 mov r14,QWORD[((-40))+rax]
907 mov r13,QWORD[((-32))+rax]
908 mov r12,QWORD[((-24))+rax]
909 mov rbp,QWORD[((-16))+rax]
910 mov rbx,QWORD[((-8))+rax]
911 mov QWORD[240+r8],r15
912 mov QWORD[232+r8],r14
913 mov QWORD[224+r8],r13
914 mov QWORD[216+r8],r12
915 mov QWORD[160+r8],rbp
916 mov QWORD[144+r8],rbx
917
918 lea rsi,[((-216))+rax]
919 lea rdi,[512+r8]
920 mov ecx,20
921 DD 0xa548f3fc
922
923$L$common_seh_tail:
924 mov rdi,QWORD[8+rax]
925 mov rsi,QWORD[16+rax]
926 mov QWORD[152+r8],rax
927 mov QWORD[168+r8],rsi
928 mov QWORD[176+r8],rdi
929
930 mov rdi,QWORD[40+r9]
931 mov rsi,r8
932 mov ecx,154
933 DD 0xa548f3fc
934
935 mov rsi,r9
936 xor rcx,rcx
937 mov rdx,QWORD[8+rsi]
938 mov r8,QWORD[rsi]
939 mov r9,QWORD[16+rsi]
940 mov r10,QWORD[40+rsi]
941 lea r11,[56+rsi]
942 lea r12,[24+rsi]
943 mov QWORD[32+rsp],r10
944 mov QWORD[40+rsp],r11
945 mov QWORD[48+rsp],r12
946 mov QWORD[56+rsp],rcx
947 call QWORD[__imp_RtlVirtualUnwind]
948
949 mov eax,1
950 add rsp,64
951 popfq
952 pop r15
953 pop r14
954 pop r13
955 pop r12
956 pop rbp
957 pop rbx
958 pop rdi
959 pop rsi
960 DB 0F3h,0C3h ;repret
961
962
963section .pdata rdata align=4
964ALIGN 4
965 DD $L$SEH_begin_aesni_gcm_decrypt wrt ..imagebase
966 DD $L$SEH_end_aesni_gcm_decrypt wrt ..imagebase
967 DD $L$SEH_gcm_dec_info wrt ..imagebase
968
969 DD $L$SEH_begin_aesni_gcm_encrypt wrt ..imagebase
970 DD $L$SEH_end_aesni_gcm_encrypt wrt ..imagebase
971 DD $L$SEH_gcm_enc_info wrt ..imagebase
972section .xdata rdata align=8
973ALIGN 8
974$L$SEH_gcm_dec_info:
975DB 9,0,0,0
976 DD gcm_se_handler wrt ..imagebase
977 DD $L$gcm_dec_body wrt ..imagebase,$L$gcm_dec_abort wrt ..imagebase
978$L$SEH_gcm_enc_info:
979DB 9,0,0,0
980 DD gcm_se_handler wrt ..imagebase
981 DD $L$gcm_enc_body wrt ..imagebase,$L$gcm_enc_abort wrt ..imagebase
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette