VirtualBox

source: vbox/trunk/src/libs/openssl-1.1.0g/crypto/genasm-nasm/aesni-gcm-x86_64.S@ 69881

Last change on this file since 69881 was 69881, checked in by vboxsync, 7 years ago

Update OpenSSL to 1.1.0g.
bugref:8070: src/libs maintenance

  • Property svn:eol-style set to native
File size: 21.7 KB
Line 
1default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
7
8
9ALIGN 32
10_aesni_ctr32_ghash_6x:
11 vmovdqu xmm2,XMMWORD[32+r11]
12 sub rdx,6
13 vpxor xmm4,xmm4,xmm4
14 vmovdqu xmm15,XMMWORD[((0-128))+rcx]
15 vpaddb xmm10,xmm1,xmm2
16 vpaddb xmm11,xmm10,xmm2
17 vpaddb xmm12,xmm11,xmm2
18 vpaddb xmm13,xmm12,xmm2
19 vpaddb xmm14,xmm13,xmm2
20 vpxor xmm9,xmm1,xmm15
21 vmovdqu XMMWORD[(16+8)+rsp],xmm4
22 jmp NEAR $L$oop6x
23
24ALIGN 32
25$L$oop6x:
26 add ebx,100663296
27 jc NEAR $L$handle_ctr32
28 vmovdqu xmm3,XMMWORD[((0-32))+r9]
29 vpaddb xmm1,xmm14,xmm2
30 vpxor xmm10,xmm10,xmm15
31 vpxor xmm11,xmm11,xmm15
32
33$L$resume_ctr32:
34 vmovdqu XMMWORD[r8],xmm1
35 vpclmulqdq xmm5,xmm7,xmm3,0x10
36 vpxor xmm12,xmm12,xmm15
37 vmovups xmm2,XMMWORD[((16-128))+rcx]
38 vpclmulqdq xmm6,xmm7,xmm3,0x01
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56 xor r12,r12
57 cmp r15,r14
58
59 vaesenc xmm9,xmm9,xmm2
60 vmovdqu xmm0,XMMWORD[((48+8))+rsp]
61 vpxor xmm13,xmm13,xmm15
62 vpclmulqdq xmm1,xmm7,xmm3,0x00
63 vaesenc xmm10,xmm10,xmm2
64 vpxor xmm14,xmm14,xmm15
65 setnc r12b
66 vpclmulqdq xmm7,xmm7,xmm3,0x11
67 vaesenc xmm11,xmm11,xmm2
68 vmovdqu xmm3,XMMWORD[((16-32))+r9]
69 neg r12
70 vaesenc xmm12,xmm12,xmm2
71 vpxor xmm6,xmm6,xmm5
72 vpclmulqdq xmm5,xmm0,xmm3,0x00
73 vpxor xmm8,xmm8,xmm4
74 vaesenc xmm13,xmm13,xmm2
75 vpxor xmm4,xmm1,xmm5
76 and r12,0x60
77 vmovups xmm15,XMMWORD[((32-128))+rcx]
78 vpclmulqdq xmm1,xmm0,xmm3,0x10
79 vaesenc xmm14,xmm14,xmm2
80
81 vpclmulqdq xmm2,xmm0,xmm3,0x01
82 lea r14,[r12*1+r14]
83 vaesenc xmm9,xmm9,xmm15
84 vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp]
85 vpclmulqdq xmm3,xmm0,xmm3,0x11
86 vmovdqu xmm0,XMMWORD[((64+8))+rsp]
87 vaesenc xmm10,xmm10,xmm15
88 movbe r13,QWORD[88+r14]
89 vaesenc xmm11,xmm11,xmm15
90 movbe r12,QWORD[80+r14]
91 vaesenc xmm12,xmm12,xmm15
92 mov QWORD[((32+8))+rsp],r13
93 vaesenc xmm13,xmm13,xmm15
94 mov QWORD[((40+8))+rsp],r12
95 vmovdqu xmm5,XMMWORD[((48-32))+r9]
96 vaesenc xmm14,xmm14,xmm15
97
98 vmovups xmm15,XMMWORD[((48-128))+rcx]
99 vpxor xmm6,xmm6,xmm1
100 vpclmulqdq xmm1,xmm0,xmm5,0x00
101 vaesenc xmm9,xmm9,xmm15
102 vpxor xmm6,xmm6,xmm2
103 vpclmulqdq xmm2,xmm0,xmm5,0x10
104 vaesenc xmm10,xmm10,xmm15
105 vpxor xmm7,xmm7,xmm3
106 vpclmulqdq xmm3,xmm0,xmm5,0x01
107 vaesenc xmm11,xmm11,xmm15
108 vpclmulqdq xmm5,xmm0,xmm5,0x11
109 vmovdqu xmm0,XMMWORD[((80+8))+rsp]
110 vaesenc xmm12,xmm12,xmm15
111 vaesenc xmm13,xmm13,xmm15
112 vpxor xmm4,xmm4,xmm1
113 vmovdqu xmm1,XMMWORD[((64-32))+r9]
114 vaesenc xmm14,xmm14,xmm15
115
116 vmovups xmm15,XMMWORD[((64-128))+rcx]
117 vpxor xmm6,xmm6,xmm2
118 vpclmulqdq xmm2,xmm0,xmm1,0x00
119 vaesenc xmm9,xmm9,xmm15
120 vpxor xmm6,xmm6,xmm3
121 vpclmulqdq xmm3,xmm0,xmm1,0x10
122 vaesenc xmm10,xmm10,xmm15
123 movbe r13,QWORD[72+r14]
124 vpxor xmm7,xmm7,xmm5
125 vpclmulqdq xmm5,xmm0,xmm1,0x01
126 vaesenc xmm11,xmm11,xmm15
127 movbe r12,QWORD[64+r14]
128 vpclmulqdq xmm1,xmm0,xmm1,0x11
129 vmovdqu xmm0,XMMWORD[((96+8))+rsp]
130 vaesenc xmm12,xmm12,xmm15
131 mov QWORD[((48+8))+rsp],r13
132 vaesenc xmm13,xmm13,xmm15
133 mov QWORD[((56+8))+rsp],r12
134 vpxor xmm4,xmm4,xmm2
135 vmovdqu xmm2,XMMWORD[((96-32))+r9]
136 vaesenc xmm14,xmm14,xmm15
137
138 vmovups xmm15,XMMWORD[((80-128))+rcx]
139 vpxor xmm6,xmm6,xmm3
140 vpclmulqdq xmm3,xmm0,xmm2,0x00
141 vaesenc xmm9,xmm9,xmm15
142 vpxor xmm6,xmm6,xmm5
143 vpclmulqdq xmm5,xmm0,xmm2,0x10
144 vaesenc xmm10,xmm10,xmm15
145 movbe r13,QWORD[56+r14]
146 vpxor xmm7,xmm7,xmm1
147 vpclmulqdq xmm1,xmm0,xmm2,0x01
148 vpxor xmm8,xmm8,XMMWORD[((112+8))+rsp]
149 vaesenc xmm11,xmm11,xmm15
150 movbe r12,QWORD[48+r14]
151 vpclmulqdq xmm2,xmm0,xmm2,0x11
152 vaesenc xmm12,xmm12,xmm15
153 mov QWORD[((64+8))+rsp],r13
154 vaesenc xmm13,xmm13,xmm15
155 mov QWORD[((72+8))+rsp],r12
156 vpxor xmm4,xmm4,xmm3
157 vmovdqu xmm3,XMMWORD[((112-32))+r9]
158 vaesenc xmm14,xmm14,xmm15
159
160 vmovups xmm15,XMMWORD[((96-128))+rcx]
161 vpxor xmm6,xmm6,xmm5
162 vpclmulqdq xmm5,xmm8,xmm3,0x10
163 vaesenc xmm9,xmm9,xmm15
164 vpxor xmm6,xmm6,xmm1
165 vpclmulqdq xmm1,xmm8,xmm3,0x01
166 vaesenc xmm10,xmm10,xmm15
167 movbe r13,QWORD[40+r14]
168 vpxor xmm7,xmm7,xmm2
169 vpclmulqdq xmm2,xmm8,xmm3,0x00
170 vaesenc xmm11,xmm11,xmm15
171 movbe r12,QWORD[32+r14]
172 vpclmulqdq xmm8,xmm8,xmm3,0x11
173 vaesenc xmm12,xmm12,xmm15
174 mov QWORD[((80+8))+rsp],r13
175 vaesenc xmm13,xmm13,xmm15
176 mov QWORD[((88+8))+rsp],r12
177 vpxor xmm6,xmm6,xmm5
178 vaesenc xmm14,xmm14,xmm15
179 vpxor xmm6,xmm6,xmm1
180
181 vmovups xmm15,XMMWORD[((112-128))+rcx]
182 vpslldq xmm5,xmm6,8
183 vpxor xmm4,xmm4,xmm2
184 vmovdqu xmm3,XMMWORD[16+r11]
185
186 vaesenc xmm9,xmm9,xmm15
187 vpxor xmm7,xmm7,xmm8
188 vaesenc xmm10,xmm10,xmm15
189 vpxor xmm4,xmm4,xmm5
190 movbe r13,QWORD[24+r14]
191 vaesenc xmm11,xmm11,xmm15
192 movbe r12,QWORD[16+r14]
193 vpalignr xmm0,xmm4,xmm4,8
194 vpclmulqdq xmm4,xmm4,xmm3,0x10
195 mov QWORD[((96+8))+rsp],r13
196 vaesenc xmm12,xmm12,xmm15
197 mov QWORD[((104+8))+rsp],r12
198 vaesenc xmm13,xmm13,xmm15
199 vmovups xmm1,XMMWORD[((128-128))+rcx]
200 vaesenc xmm14,xmm14,xmm15
201
202 vaesenc xmm9,xmm9,xmm1
203 vmovups xmm15,XMMWORD[((144-128))+rcx]
204 vaesenc xmm10,xmm10,xmm1
205 vpsrldq xmm6,xmm6,8
206 vaesenc xmm11,xmm11,xmm1
207 vpxor xmm7,xmm7,xmm6
208 vaesenc xmm12,xmm12,xmm1
209 vpxor xmm4,xmm4,xmm0
210 movbe r13,QWORD[8+r14]
211 vaesenc xmm13,xmm13,xmm1
212 movbe r12,QWORD[r14]
213 vaesenc xmm14,xmm14,xmm1
214 vmovups xmm1,XMMWORD[((160-128))+rcx]
215 cmp ebp,11
216 jb NEAR $L$enc_tail
217
218 vaesenc xmm9,xmm9,xmm15
219 vaesenc xmm10,xmm10,xmm15
220 vaesenc xmm11,xmm11,xmm15
221 vaesenc xmm12,xmm12,xmm15
222 vaesenc xmm13,xmm13,xmm15
223 vaesenc xmm14,xmm14,xmm15
224
225 vaesenc xmm9,xmm9,xmm1
226 vaesenc xmm10,xmm10,xmm1
227 vaesenc xmm11,xmm11,xmm1
228 vaesenc xmm12,xmm12,xmm1
229 vaesenc xmm13,xmm13,xmm1
230 vmovups xmm15,XMMWORD[((176-128))+rcx]
231 vaesenc xmm14,xmm14,xmm1
232 vmovups xmm1,XMMWORD[((192-128))+rcx]
233 je NEAR $L$enc_tail
234
235 vaesenc xmm9,xmm9,xmm15
236 vaesenc xmm10,xmm10,xmm15
237 vaesenc xmm11,xmm11,xmm15
238 vaesenc xmm12,xmm12,xmm15
239 vaesenc xmm13,xmm13,xmm15
240 vaesenc xmm14,xmm14,xmm15
241
242 vaesenc xmm9,xmm9,xmm1
243 vaesenc xmm10,xmm10,xmm1
244 vaesenc xmm11,xmm11,xmm1
245 vaesenc xmm12,xmm12,xmm1
246 vaesenc xmm13,xmm13,xmm1
247 vmovups xmm15,XMMWORD[((208-128))+rcx]
248 vaesenc xmm14,xmm14,xmm1
249 vmovups xmm1,XMMWORD[((224-128))+rcx]
250 jmp NEAR $L$enc_tail
251
252ALIGN 32
253$L$handle_ctr32:
254 vmovdqu xmm0,XMMWORD[r11]
255 vpshufb xmm6,xmm1,xmm0
256 vmovdqu xmm5,XMMWORD[48+r11]
257 vpaddd xmm10,xmm6,XMMWORD[64+r11]
258 vpaddd xmm11,xmm6,xmm5
259 vmovdqu xmm3,XMMWORD[((0-32))+r9]
260 vpaddd xmm12,xmm10,xmm5
261 vpshufb xmm10,xmm10,xmm0
262 vpaddd xmm13,xmm11,xmm5
263 vpshufb xmm11,xmm11,xmm0
264 vpxor xmm10,xmm10,xmm15
265 vpaddd xmm14,xmm12,xmm5
266 vpshufb xmm12,xmm12,xmm0
267 vpxor xmm11,xmm11,xmm15
268 vpaddd xmm1,xmm13,xmm5
269 vpshufb xmm13,xmm13,xmm0
270 vpshufb xmm14,xmm14,xmm0
271 vpshufb xmm1,xmm1,xmm0
272 jmp NEAR $L$resume_ctr32
273
274ALIGN 32
275$L$enc_tail:
276 vaesenc xmm9,xmm9,xmm15
277 vmovdqu XMMWORD[(16+8)+rsp],xmm7
278 vpalignr xmm8,xmm4,xmm4,8
279 vaesenc xmm10,xmm10,xmm15
280 vpclmulqdq xmm4,xmm4,xmm3,0x10
281 vpxor xmm2,xmm1,XMMWORD[rdi]
282 vaesenc xmm11,xmm11,xmm15
283 vpxor xmm0,xmm1,XMMWORD[16+rdi]
284 vaesenc xmm12,xmm12,xmm15
285 vpxor xmm5,xmm1,XMMWORD[32+rdi]
286 vaesenc xmm13,xmm13,xmm15
287 vpxor xmm6,xmm1,XMMWORD[48+rdi]
288 vaesenc xmm14,xmm14,xmm15
289 vpxor xmm7,xmm1,XMMWORD[64+rdi]
290 vpxor xmm3,xmm1,XMMWORD[80+rdi]
291 vmovdqu xmm1,XMMWORD[r8]
292
293 vaesenclast xmm9,xmm9,xmm2
294 vmovdqu xmm2,XMMWORD[32+r11]
295 vaesenclast xmm10,xmm10,xmm0
296 vpaddb xmm0,xmm1,xmm2
297 mov QWORD[((112+8))+rsp],r13
298 lea rdi,[96+rdi]
299 vaesenclast xmm11,xmm11,xmm5
300 vpaddb xmm5,xmm0,xmm2
301 mov QWORD[((120+8))+rsp],r12
302 lea rsi,[96+rsi]
303 vmovdqu xmm15,XMMWORD[((0-128))+rcx]
304 vaesenclast xmm12,xmm12,xmm6
305 vpaddb xmm6,xmm5,xmm2
306 vaesenclast xmm13,xmm13,xmm7
307 vpaddb xmm7,xmm6,xmm2
308 vaesenclast xmm14,xmm14,xmm3
309 vpaddb xmm3,xmm7,xmm2
310
311 add r10,0x60
312 sub rdx,0x6
313 jc NEAR $L$6x_done
314
315 vmovups XMMWORD[(-96)+rsi],xmm9
316 vpxor xmm9,xmm1,xmm15
317 vmovups XMMWORD[(-80)+rsi],xmm10
318 vmovdqa xmm10,xmm0
319 vmovups XMMWORD[(-64)+rsi],xmm11
320 vmovdqa xmm11,xmm5
321 vmovups XMMWORD[(-48)+rsi],xmm12
322 vmovdqa xmm12,xmm6
323 vmovups XMMWORD[(-32)+rsi],xmm13
324 vmovdqa xmm13,xmm7
325 vmovups XMMWORD[(-16)+rsi],xmm14
326 vmovdqa xmm14,xmm3
327 vmovdqu xmm7,XMMWORD[((32+8))+rsp]
328 jmp NEAR $L$oop6x
329
330$L$6x_done:
331 vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp]
332 vpxor xmm8,xmm8,xmm4
333
334 DB 0F3h,0C3h ;repret
335
336global aesni_gcm_decrypt
337
338ALIGN 32
339aesni_gcm_decrypt:
340 mov QWORD[8+rsp],rdi ;WIN64 prologue
341 mov QWORD[16+rsp],rsi
342 mov rax,rsp
343$L$SEH_begin_aesni_gcm_decrypt:
344 mov rdi,rcx
345 mov rsi,rdx
346 mov rdx,r8
347 mov rcx,r9
348 mov r8,QWORD[40+rsp]
349 mov r9,QWORD[48+rsp]
350
351
352 xor r10,r10
353
354
355
356 cmp rdx,0x60
357 jb NEAR $L$gcm_dec_abort
358
359 lea rax,[rsp]
360 push rbx
361 push rbp
362 push r12
363 push r13
364 push r14
365 push r15
366 lea rsp,[((-168))+rsp]
367 movaps XMMWORD[(-216)+rax],xmm6
368 movaps XMMWORD[(-200)+rax],xmm7
369 movaps XMMWORD[(-184)+rax],xmm8
370 movaps XMMWORD[(-168)+rax],xmm9
371 movaps XMMWORD[(-152)+rax],xmm10
372 movaps XMMWORD[(-136)+rax],xmm11
373 movaps XMMWORD[(-120)+rax],xmm12
374 movaps XMMWORD[(-104)+rax],xmm13
375 movaps XMMWORD[(-88)+rax],xmm14
376 movaps XMMWORD[(-72)+rax],xmm15
377$L$gcm_dec_body:
378 vzeroupper
379
380 vmovdqu xmm1,XMMWORD[r8]
381 add rsp,-128
382 mov ebx,DWORD[12+r8]
383 lea r11,[$L$bswap_mask]
384 lea r14,[((-128))+rcx]
385 mov r15,0xf80
386 vmovdqu xmm8,XMMWORD[r9]
387 and rsp,-128
388 vmovdqu xmm0,XMMWORD[r11]
389 lea rcx,[128+rcx]
390 lea r9,[((32+32))+r9]
391 mov ebp,DWORD[((240-128))+rcx]
392 vpshufb xmm8,xmm8,xmm0
393
394 and r14,r15
395 and r15,rsp
396 sub r15,r14
397 jc NEAR $L$dec_no_key_aliasing
398 cmp r15,768
399 jnc NEAR $L$dec_no_key_aliasing
400 sub rsp,r15
401$L$dec_no_key_aliasing:
402
403 vmovdqu xmm7,XMMWORD[80+rdi]
404 lea r14,[rdi]
405 vmovdqu xmm4,XMMWORD[64+rdi]
406
407
408
409
410
411
412
413 lea r15,[((-192))+rdx*1+rdi]
414
415 vmovdqu xmm5,XMMWORD[48+rdi]
416 shr rdx,4
417 xor r10,r10
418 vmovdqu xmm6,XMMWORD[32+rdi]
419 vpshufb xmm7,xmm7,xmm0
420 vmovdqu xmm2,XMMWORD[16+rdi]
421 vpshufb xmm4,xmm4,xmm0
422 vmovdqu xmm3,XMMWORD[rdi]
423 vpshufb xmm5,xmm5,xmm0
424 vmovdqu XMMWORD[48+rsp],xmm4
425 vpshufb xmm6,xmm6,xmm0
426 vmovdqu XMMWORD[64+rsp],xmm5
427 vpshufb xmm2,xmm2,xmm0
428 vmovdqu XMMWORD[80+rsp],xmm6
429 vpshufb xmm3,xmm3,xmm0
430 vmovdqu XMMWORD[96+rsp],xmm2
431 vmovdqu XMMWORD[112+rsp],xmm3
432
433 call _aesni_ctr32_ghash_6x
434
435 vmovups XMMWORD[(-96)+rsi],xmm9
436 vmovups XMMWORD[(-80)+rsi],xmm10
437 vmovups XMMWORD[(-64)+rsi],xmm11
438 vmovups XMMWORD[(-48)+rsi],xmm12
439 vmovups XMMWORD[(-32)+rsi],xmm13
440 vmovups XMMWORD[(-16)+rsi],xmm14
441
442 vpshufb xmm8,xmm8,XMMWORD[r11]
443 vmovdqu XMMWORD[(-64)+r9],xmm8
444
445 vzeroupper
446 movaps xmm6,XMMWORD[((-216))+rax]
447 movaps xmm7,XMMWORD[((-200))+rax]
448 movaps xmm8,XMMWORD[((-184))+rax]
449 movaps xmm9,XMMWORD[((-168))+rax]
450 movaps xmm10,XMMWORD[((-152))+rax]
451 movaps xmm11,XMMWORD[((-136))+rax]
452 movaps xmm12,XMMWORD[((-120))+rax]
453 movaps xmm13,XMMWORD[((-104))+rax]
454 movaps xmm14,XMMWORD[((-88))+rax]
455 movaps xmm15,XMMWORD[((-72))+rax]
456 mov r15,QWORD[((-48))+rax]
457 mov r14,QWORD[((-40))+rax]
458 mov r13,QWORD[((-32))+rax]
459 mov r12,QWORD[((-24))+rax]
460 mov rbp,QWORD[((-16))+rax]
461 mov rbx,QWORD[((-8))+rax]
462 lea rsp,[rax]
463$L$gcm_dec_abort:
464 mov rax,r10
465 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
466 mov rsi,QWORD[16+rsp]
467 DB 0F3h,0C3h ;repret
468$L$SEH_end_aesni_gcm_decrypt:
469
470ALIGN 32
471_aesni_ctr32_6x:
472 vmovdqu xmm4,XMMWORD[((0-128))+rcx]
473 vmovdqu xmm2,XMMWORD[32+r11]
474 lea r13,[((-1))+rbp]
475 vmovups xmm15,XMMWORD[((16-128))+rcx]
476 lea r12,[((32-128))+rcx]
477 vpxor xmm9,xmm1,xmm4
478 add ebx,100663296
479 jc NEAR $L$handle_ctr32_2
480 vpaddb xmm10,xmm1,xmm2
481 vpaddb xmm11,xmm10,xmm2
482 vpxor xmm10,xmm10,xmm4
483 vpaddb xmm12,xmm11,xmm2
484 vpxor xmm11,xmm11,xmm4
485 vpaddb xmm13,xmm12,xmm2
486 vpxor xmm12,xmm12,xmm4
487 vpaddb xmm14,xmm13,xmm2
488 vpxor xmm13,xmm13,xmm4
489 vpaddb xmm1,xmm14,xmm2
490 vpxor xmm14,xmm14,xmm4
491 jmp NEAR $L$oop_ctr32
492
493ALIGN 16
494$L$oop_ctr32:
495 vaesenc xmm9,xmm9,xmm15
496 vaesenc xmm10,xmm10,xmm15
497 vaesenc xmm11,xmm11,xmm15
498 vaesenc xmm12,xmm12,xmm15
499 vaesenc xmm13,xmm13,xmm15
500 vaesenc xmm14,xmm14,xmm15
501 vmovups xmm15,XMMWORD[r12]
502 lea r12,[16+r12]
503 dec r13d
504 jnz NEAR $L$oop_ctr32
505
506 vmovdqu xmm3,XMMWORD[r12]
507 vaesenc xmm9,xmm9,xmm15
508 vpxor xmm4,xmm3,XMMWORD[rdi]
509 vaesenc xmm10,xmm10,xmm15
510 vpxor xmm5,xmm3,XMMWORD[16+rdi]
511 vaesenc xmm11,xmm11,xmm15
512 vpxor xmm6,xmm3,XMMWORD[32+rdi]
513 vaesenc xmm12,xmm12,xmm15
514 vpxor xmm8,xmm3,XMMWORD[48+rdi]
515 vaesenc xmm13,xmm13,xmm15
516 vpxor xmm2,xmm3,XMMWORD[64+rdi]
517 vaesenc xmm14,xmm14,xmm15
518 vpxor xmm3,xmm3,XMMWORD[80+rdi]
519 lea rdi,[96+rdi]
520
521 vaesenclast xmm9,xmm9,xmm4
522 vaesenclast xmm10,xmm10,xmm5
523 vaesenclast xmm11,xmm11,xmm6
524 vaesenclast xmm12,xmm12,xmm8
525 vaesenclast xmm13,xmm13,xmm2
526 vaesenclast xmm14,xmm14,xmm3
527 vmovups XMMWORD[rsi],xmm9
528 vmovups XMMWORD[16+rsi],xmm10
529 vmovups XMMWORD[32+rsi],xmm11
530 vmovups XMMWORD[48+rsi],xmm12
531 vmovups XMMWORD[64+rsi],xmm13
532 vmovups XMMWORD[80+rsi],xmm14
533 lea rsi,[96+rsi]
534
535 DB 0F3h,0C3h ;repret
536ALIGN 32
537$L$handle_ctr32_2:
538 vpshufb xmm6,xmm1,xmm0
539 vmovdqu xmm5,XMMWORD[48+r11]
540 vpaddd xmm10,xmm6,XMMWORD[64+r11]
541 vpaddd xmm11,xmm6,xmm5
542 vpaddd xmm12,xmm10,xmm5
543 vpshufb xmm10,xmm10,xmm0
544 vpaddd xmm13,xmm11,xmm5
545 vpshufb xmm11,xmm11,xmm0
546 vpxor xmm10,xmm10,xmm4
547 vpaddd xmm14,xmm12,xmm5
548 vpshufb xmm12,xmm12,xmm0
549 vpxor xmm11,xmm11,xmm4
550 vpaddd xmm1,xmm13,xmm5
551 vpshufb xmm13,xmm13,xmm0
552 vpxor xmm12,xmm12,xmm4
553 vpshufb xmm14,xmm14,xmm0
554 vpxor xmm13,xmm13,xmm4
555 vpshufb xmm1,xmm1,xmm0
556 vpxor xmm14,xmm14,xmm4
557 jmp NEAR $L$oop_ctr32
558
559
560global aesni_gcm_encrypt
561
562ALIGN 32
563aesni_gcm_encrypt:
564 mov QWORD[8+rsp],rdi ;WIN64 prologue
565 mov QWORD[16+rsp],rsi
566 mov rax,rsp
567$L$SEH_begin_aesni_gcm_encrypt:
568 mov rdi,rcx
569 mov rsi,rdx
570 mov rdx,r8
571 mov rcx,r9
572 mov r8,QWORD[40+rsp]
573 mov r9,QWORD[48+rsp]
574
575
576 xor r10,r10
577
578
579
580
581 cmp rdx,0x60*3
582 jb NEAR $L$gcm_enc_abort
583
584 lea rax,[rsp]
585 push rbx
586 push rbp
587 push r12
588 push r13
589 push r14
590 push r15
591 lea rsp,[((-168))+rsp]
592 movaps XMMWORD[(-216)+rax],xmm6
593 movaps XMMWORD[(-200)+rax],xmm7
594 movaps XMMWORD[(-184)+rax],xmm8
595 movaps XMMWORD[(-168)+rax],xmm9
596 movaps XMMWORD[(-152)+rax],xmm10
597 movaps XMMWORD[(-136)+rax],xmm11
598 movaps XMMWORD[(-120)+rax],xmm12
599 movaps XMMWORD[(-104)+rax],xmm13
600 movaps XMMWORD[(-88)+rax],xmm14
601 movaps XMMWORD[(-72)+rax],xmm15
602$L$gcm_enc_body:
603 vzeroupper
604
605 vmovdqu xmm1,XMMWORD[r8]
606 add rsp,-128
607 mov ebx,DWORD[12+r8]
608 lea r11,[$L$bswap_mask]
609 lea r14,[((-128))+rcx]
610 mov r15,0xf80
611 lea rcx,[128+rcx]
612 vmovdqu xmm0,XMMWORD[r11]
613 and rsp,-128
614 mov ebp,DWORD[((240-128))+rcx]
615
616 and r14,r15
617 and r15,rsp
618 sub r15,r14
619 jc NEAR $L$enc_no_key_aliasing
620 cmp r15,768
621 jnc NEAR $L$enc_no_key_aliasing
622 sub rsp,r15
623$L$enc_no_key_aliasing:
624
625 lea r14,[rsi]
626
627
628
629
630
631
632
633
634 lea r15,[((-192))+rdx*1+rsi]
635
636 shr rdx,4
637
638 call _aesni_ctr32_6x
639 vpshufb xmm8,xmm9,xmm0
640 vpshufb xmm2,xmm10,xmm0
641 vmovdqu XMMWORD[112+rsp],xmm8
642 vpshufb xmm4,xmm11,xmm0
643 vmovdqu XMMWORD[96+rsp],xmm2
644 vpshufb xmm5,xmm12,xmm0
645 vmovdqu XMMWORD[80+rsp],xmm4
646 vpshufb xmm6,xmm13,xmm0
647 vmovdqu XMMWORD[64+rsp],xmm5
648 vpshufb xmm7,xmm14,xmm0
649 vmovdqu XMMWORD[48+rsp],xmm6
650
651 call _aesni_ctr32_6x
652
653 vmovdqu xmm8,XMMWORD[r9]
654 lea r9,[((32+32))+r9]
655 sub rdx,12
656 mov r10,0x60*2
657 vpshufb xmm8,xmm8,xmm0
658
659 call _aesni_ctr32_ghash_6x
660 vmovdqu xmm7,XMMWORD[32+rsp]
661 vmovdqu xmm0,XMMWORD[r11]
662 vmovdqu xmm3,XMMWORD[((0-32))+r9]
663 vpunpckhqdq xmm1,xmm7,xmm7
664 vmovdqu xmm15,XMMWORD[((32-32))+r9]
665 vmovups XMMWORD[(-96)+rsi],xmm9
666 vpshufb xmm9,xmm9,xmm0
667 vpxor xmm1,xmm1,xmm7
668 vmovups XMMWORD[(-80)+rsi],xmm10
669 vpshufb xmm10,xmm10,xmm0
670 vmovups XMMWORD[(-64)+rsi],xmm11
671 vpshufb xmm11,xmm11,xmm0
672 vmovups XMMWORD[(-48)+rsi],xmm12
673 vpshufb xmm12,xmm12,xmm0
674 vmovups XMMWORD[(-32)+rsi],xmm13
675 vpshufb xmm13,xmm13,xmm0
676 vmovups XMMWORD[(-16)+rsi],xmm14
677 vpshufb xmm14,xmm14,xmm0
678 vmovdqu XMMWORD[16+rsp],xmm9
679 vmovdqu xmm6,XMMWORD[48+rsp]
680 vmovdqu xmm0,XMMWORD[((16-32))+r9]
681 vpunpckhqdq xmm2,xmm6,xmm6
682 vpclmulqdq xmm5,xmm7,xmm3,0x00
683 vpxor xmm2,xmm2,xmm6
684 vpclmulqdq xmm7,xmm7,xmm3,0x11
685 vpclmulqdq xmm1,xmm1,xmm15,0x00
686
687 vmovdqu xmm9,XMMWORD[64+rsp]
688 vpclmulqdq xmm4,xmm6,xmm0,0x00
689 vmovdqu xmm3,XMMWORD[((48-32))+r9]
690 vpxor xmm4,xmm4,xmm5
691 vpunpckhqdq xmm5,xmm9,xmm9
692 vpclmulqdq xmm6,xmm6,xmm0,0x11
693 vpxor xmm5,xmm5,xmm9
694 vpxor xmm6,xmm6,xmm7
695 vpclmulqdq xmm2,xmm2,xmm15,0x10
696 vmovdqu xmm15,XMMWORD[((80-32))+r9]
697 vpxor xmm2,xmm2,xmm1
698
699 vmovdqu xmm1,XMMWORD[80+rsp]
700 vpclmulqdq xmm7,xmm9,xmm3,0x00
701 vmovdqu xmm0,XMMWORD[((64-32))+r9]
702 vpxor xmm7,xmm7,xmm4
703 vpunpckhqdq xmm4,xmm1,xmm1
704 vpclmulqdq xmm9,xmm9,xmm3,0x11
705 vpxor xmm4,xmm4,xmm1
706 vpxor xmm9,xmm9,xmm6
707 vpclmulqdq xmm5,xmm5,xmm15,0x00
708 vpxor xmm5,xmm5,xmm2
709
710 vmovdqu xmm2,XMMWORD[96+rsp]
711 vpclmulqdq xmm6,xmm1,xmm0,0x00
712 vmovdqu xmm3,XMMWORD[((96-32))+r9]
713 vpxor xmm6,xmm6,xmm7
714 vpunpckhqdq xmm7,xmm2,xmm2
715 vpclmulqdq xmm1,xmm1,xmm0,0x11
716 vpxor xmm7,xmm7,xmm2
717 vpxor xmm1,xmm1,xmm9
718 vpclmulqdq xmm4,xmm4,xmm15,0x10
719 vmovdqu xmm15,XMMWORD[((128-32))+r9]
720 vpxor xmm4,xmm4,xmm5
721
722 vpxor xmm8,xmm8,XMMWORD[112+rsp]
723 vpclmulqdq xmm5,xmm2,xmm3,0x00
724 vmovdqu xmm0,XMMWORD[((112-32))+r9]
725 vpunpckhqdq xmm9,xmm8,xmm8
726 vpxor xmm5,xmm5,xmm6
727 vpclmulqdq xmm2,xmm2,xmm3,0x11
728 vpxor xmm9,xmm9,xmm8
729 vpxor xmm2,xmm2,xmm1
730 vpclmulqdq xmm7,xmm7,xmm15,0x00
731 vpxor xmm4,xmm7,xmm4
732
733 vpclmulqdq xmm6,xmm8,xmm0,0x00
734 vmovdqu xmm3,XMMWORD[((0-32))+r9]
735 vpunpckhqdq xmm1,xmm14,xmm14
736 vpclmulqdq xmm8,xmm8,xmm0,0x11
737 vpxor xmm1,xmm1,xmm14
738 vpxor xmm5,xmm6,xmm5
739 vpclmulqdq xmm9,xmm9,xmm15,0x10
740 vmovdqu xmm15,XMMWORD[((32-32))+r9]
741 vpxor xmm7,xmm8,xmm2
742 vpxor xmm6,xmm9,xmm4
743
744 vmovdqu xmm0,XMMWORD[((16-32))+r9]
745 vpxor xmm9,xmm7,xmm5
746 vpclmulqdq xmm4,xmm14,xmm3,0x00
747 vpxor xmm6,xmm6,xmm9
748 vpunpckhqdq xmm2,xmm13,xmm13
749 vpclmulqdq xmm14,xmm14,xmm3,0x11
750 vpxor xmm2,xmm2,xmm13
751 vpslldq xmm9,xmm6,8
752 vpclmulqdq xmm1,xmm1,xmm15,0x00
753 vpxor xmm8,xmm5,xmm9
754 vpsrldq xmm6,xmm6,8
755 vpxor xmm7,xmm7,xmm6
756
757 vpclmulqdq xmm5,xmm13,xmm0,0x00
758 vmovdqu xmm3,XMMWORD[((48-32))+r9]
759 vpxor xmm5,xmm5,xmm4
760 vpunpckhqdq xmm9,xmm12,xmm12
761 vpclmulqdq xmm13,xmm13,xmm0,0x11
762 vpxor xmm9,xmm9,xmm12
763 vpxor xmm13,xmm13,xmm14
764 vpalignr xmm14,xmm8,xmm8,8
765 vpclmulqdq xmm2,xmm2,xmm15,0x10
766 vmovdqu xmm15,XMMWORD[((80-32))+r9]
767 vpxor xmm2,xmm2,xmm1
768
769 vpclmulqdq xmm4,xmm12,xmm3,0x00
770 vmovdqu xmm0,XMMWORD[((64-32))+r9]
771 vpxor xmm4,xmm4,xmm5
772 vpunpckhqdq xmm1,xmm11,xmm11
773 vpclmulqdq xmm12,xmm12,xmm3,0x11
774 vpxor xmm1,xmm1,xmm11
775 vpxor xmm12,xmm12,xmm13
776 vxorps xmm7,xmm7,XMMWORD[16+rsp]
777 vpclmulqdq xmm9,xmm9,xmm15,0x00
778 vpxor xmm9,xmm9,xmm2
779
780 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10
781 vxorps xmm8,xmm8,xmm14
782
783 vpclmulqdq xmm5,xmm11,xmm0,0x00
784 vmovdqu xmm3,XMMWORD[((96-32))+r9]
785 vpxor xmm5,xmm5,xmm4
786 vpunpckhqdq xmm2,xmm10,xmm10
787 vpclmulqdq xmm11,xmm11,xmm0,0x11
788 vpxor xmm2,xmm2,xmm10
789 vpalignr xmm14,xmm8,xmm8,8
790 vpxor xmm11,xmm11,xmm12
791 vpclmulqdq xmm1,xmm1,xmm15,0x10
792 vmovdqu xmm15,XMMWORD[((128-32))+r9]
793 vpxor xmm1,xmm1,xmm9
794
795 vxorps xmm14,xmm14,xmm7
796 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10
797 vxorps xmm8,xmm8,xmm14
798
799 vpclmulqdq xmm4,xmm10,xmm3,0x00
800 vmovdqu xmm0,XMMWORD[((112-32))+r9]
801 vpxor xmm4,xmm4,xmm5
802 vpunpckhqdq xmm9,xmm8,xmm8
803 vpclmulqdq xmm10,xmm10,xmm3,0x11
804 vpxor xmm9,xmm9,xmm8
805 vpxor xmm10,xmm10,xmm11
806 vpclmulqdq xmm2,xmm2,xmm15,0x00
807 vpxor xmm2,xmm2,xmm1
808
809 vpclmulqdq xmm5,xmm8,xmm0,0x00
810 vpclmulqdq xmm7,xmm8,xmm0,0x11
811 vpxor xmm5,xmm5,xmm4
812 vpclmulqdq xmm6,xmm9,xmm15,0x10
813 vpxor xmm7,xmm7,xmm10
814 vpxor xmm6,xmm6,xmm2
815
816 vpxor xmm4,xmm7,xmm5
817 vpxor xmm6,xmm6,xmm4
818 vpslldq xmm1,xmm6,8
819 vmovdqu xmm3,XMMWORD[16+r11]
820 vpsrldq xmm6,xmm6,8
821 vpxor xmm8,xmm5,xmm1
822 vpxor xmm7,xmm7,xmm6
823
824 vpalignr xmm2,xmm8,xmm8,8
825 vpclmulqdq xmm8,xmm8,xmm3,0x10
826 vpxor xmm8,xmm8,xmm2
827
828 vpalignr xmm2,xmm8,xmm8,8
829 vpclmulqdq xmm8,xmm8,xmm3,0x10
830 vpxor xmm2,xmm2,xmm7
831 vpxor xmm8,xmm8,xmm2
832 vpshufb xmm8,xmm8,XMMWORD[r11]
833 vmovdqu XMMWORD[(-64)+r9],xmm8
834
835 vzeroupper
836 movaps xmm6,XMMWORD[((-216))+rax]
837 movaps xmm7,XMMWORD[((-200))+rax]
838 movaps xmm8,XMMWORD[((-184))+rax]
839 movaps xmm9,XMMWORD[((-168))+rax]
840 movaps xmm10,XMMWORD[((-152))+rax]
841 movaps xmm11,XMMWORD[((-136))+rax]
842 movaps xmm12,XMMWORD[((-120))+rax]
843 movaps xmm13,XMMWORD[((-104))+rax]
844 movaps xmm14,XMMWORD[((-88))+rax]
845 movaps xmm15,XMMWORD[((-72))+rax]
846 mov r15,QWORD[((-48))+rax]
847 mov r14,QWORD[((-40))+rax]
848 mov r13,QWORD[((-32))+rax]
849 mov r12,QWORD[((-24))+rax]
850 mov rbp,QWORD[((-16))+rax]
851 mov rbx,QWORD[((-8))+rax]
852 lea rsp,[rax]
853$L$gcm_enc_abort:
854 mov rax,r10
855 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
856 mov rsi,QWORD[16+rsp]
857 DB 0F3h,0C3h ;repret
858$L$SEH_end_aesni_gcm_encrypt:
859ALIGN 64
860$L$bswap_mask:
861DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
862$L$poly:
863DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
864$L$one_msb:
865DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
866$L$two_lsb:
867DB 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
868$L$one_lsb:
869DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
870DB 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108
871DB 101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82
872DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
873DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
874ALIGN 64
875EXTERN __imp_RtlVirtualUnwind
876
877ALIGN 16
878gcm_se_handler:
879 push rsi
880 push rdi
881 push rbx
882 push rbp
883 push r12
884 push r13
885 push r14
886 push r15
887 pushfq
888 sub rsp,64
889
890 mov rax,QWORD[120+r8]
891 mov rbx,QWORD[248+r8]
892
893 mov rsi,QWORD[8+r9]
894 mov r11,QWORD[56+r9]
895
896 mov r10d,DWORD[r11]
897 lea r10,[r10*1+rsi]
898 cmp rbx,r10
899 jb NEAR $L$common_seh_tail
900
901 mov rax,QWORD[152+r8]
902
903 mov r10d,DWORD[4+r11]
904 lea r10,[r10*1+rsi]
905 cmp rbx,r10
906 jae NEAR $L$common_seh_tail
907
908 mov rax,QWORD[120+r8]
909
910 mov r15,QWORD[((-48))+rax]
911 mov r14,QWORD[((-40))+rax]
912 mov r13,QWORD[((-32))+rax]
913 mov r12,QWORD[((-24))+rax]
914 mov rbp,QWORD[((-16))+rax]
915 mov rbx,QWORD[((-8))+rax]
916 mov QWORD[240+r8],r15
917 mov QWORD[232+r8],r14
918 mov QWORD[224+r8],r13
919 mov QWORD[216+r8],r12
920 mov QWORD[160+r8],rbp
921 mov QWORD[144+r8],rbx
922
923 lea rsi,[((-216))+rax]
924 lea rdi,[512+r8]
925 mov ecx,20
926 DD 0xa548f3fc
927
928$L$common_seh_tail:
929 mov rdi,QWORD[8+rax]
930 mov rsi,QWORD[16+rax]
931 mov QWORD[152+r8],rax
932 mov QWORD[168+r8],rsi
933 mov QWORD[176+r8],rdi
934
935 mov rdi,QWORD[40+r9]
936 mov rsi,r8
937 mov ecx,154
938 DD 0xa548f3fc
939
940 mov rsi,r9
941 xor rcx,rcx
942 mov rdx,QWORD[8+rsi]
943 mov r8,QWORD[rsi]
944 mov r9,QWORD[16+rsi]
945 mov r10,QWORD[40+rsi]
946 lea r11,[56+rsi]
947 lea r12,[24+rsi]
948 mov QWORD[32+rsp],r10
949 mov QWORD[40+rsp],r11
950 mov QWORD[48+rsp],r12
951 mov QWORD[56+rsp],rcx
952 call QWORD[__imp_RtlVirtualUnwind]
953
954 mov eax,1
955 add rsp,64
956 popfq
957 pop r15
958 pop r14
959 pop r13
960 pop r12
961 pop rbp
962 pop rbx
963 pop rdi
964 pop rsi
965 DB 0F3h,0C3h ;repret
966
967
968section .pdata rdata align=4
969ALIGN 4
970 DD $L$SEH_begin_aesni_gcm_decrypt wrt ..imagebase
971 DD $L$SEH_end_aesni_gcm_decrypt wrt ..imagebase
972 DD $L$SEH_gcm_dec_info wrt ..imagebase
973
974 DD $L$SEH_begin_aesni_gcm_encrypt wrt ..imagebase
975 DD $L$SEH_end_aesni_gcm_encrypt wrt ..imagebase
976 DD $L$SEH_gcm_enc_info wrt ..imagebase
977section .xdata rdata align=8
978ALIGN 8
979$L$SEH_gcm_dec_info:
980DB 9,0,0,0
981 DD gcm_se_handler wrt ..imagebase
982 DD $L$gcm_dec_body wrt ..imagebase,$L$gcm_dec_abort wrt ..imagebase
983$L$SEH_gcm_enc_info:
984DB 9,0,0,0
985 DD gcm_se_handler wrt ..imagebase
986 DD $L$gcm_enc_body wrt ..imagebase,$L$gcm_enc_abort wrt ..imagebase
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette