1 | default rel
|
---|
2 | %define XMMWORD
|
---|
3 | %define YMMWORD
|
---|
4 | %define ZMMWORD
|
---|
5 | section .text code align=64
|
---|
6 |
|
---|
7 | EXTERN OPENSSL_ia32cap_P
|
---|
8 |
|
---|
9 | global gcm_gmult_4bit
|
---|
10 |
|
---|
11 | ALIGN 16
|
---|
12 | gcm_gmult_4bit:
|
---|
13 | mov QWORD[8+rsp],rdi ;WIN64 prologue
|
---|
14 | mov QWORD[16+rsp],rsi
|
---|
15 | mov rax,rsp
|
---|
16 | $L$SEH_begin_gcm_gmult_4bit:
|
---|
17 | mov rdi,rcx
|
---|
18 | mov rsi,rdx
|
---|
19 |
|
---|
20 |
|
---|
21 | push rbx
|
---|
22 | push rbp
|
---|
23 | push r12
|
---|
24 | $L$gmult_prologue:
|
---|
25 |
|
---|
26 | movzx r8,BYTE[15+rdi]
|
---|
27 | lea r11,[$L$rem_4bit]
|
---|
28 | xor rax,rax
|
---|
29 | xor rbx,rbx
|
---|
30 | mov al,r8b
|
---|
31 | mov bl,r8b
|
---|
32 | shl al,4
|
---|
33 | mov rcx,14
|
---|
34 | mov r8,QWORD[8+rax*1+rsi]
|
---|
35 | mov r9,QWORD[rax*1+rsi]
|
---|
36 | and bl,0xf0
|
---|
37 | mov rdx,r8
|
---|
38 | jmp NEAR $L$oop1
|
---|
39 |
|
---|
40 | ALIGN 16
|
---|
41 | $L$oop1:
|
---|
42 | shr r8,4
|
---|
43 | and rdx,0xf
|
---|
44 | mov r10,r9
|
---|
45 | mov al,BYTE[rcx*1+rdi]
|
---|
46 | shr r9,4
|
---|
47 | xor r8,QWORD[8+rbx*1+rsi]
|
---|
48 | shl r10,60
|
---|
49 | xor r9,QWORD[rbx*1+rsi]
|
---|
50 | mov bl,al
|
---|
51 | xor r9,QWORD[rdx*8+r11]
|
---|
52 | mov rdx,r8
|
---|
53 | shl al,4
|
---|
54 | xor r8,r10
|
---|
55 | dec rcx
|
---|
56 | js NEAR $L$break1
|
---|
57 |
|
---|
58 | shr r8,4
|
---|
59 | and rdx,0xf
|
---|
60 | mov r10,r9
|
---|
61 | shr r9,4
|
---|
62 | xor r8,QWORD[8+rax*1+rsi]
|
---|
63 | shl r10,60
|
---|
64 | xor r9,QWORD[rax*1+rsi]
|
---|
65 | and bl,0xf0
|
---|
66 | xor r9,QWORD[rdx*8+r11]
|
---|
67 | mov rdx,r8
|
---|
68 | xor r8,r10
|
---|
69 | jmp NEAR $L$oop1
|
---|
70 |
|
---|
71 | ALIGN 16
|
---|
72 | $L$break1:
|
---|
73 | shr r8,4
|
---|
74 | and rdx,0xf
|
---|
75 | mov r10,r9
|
---|
76 | shr r9,4
|
---|
77 | xor r8,QWORD[8+rax*1+rsi]
|
---|
78 | shl r10,60
|
---|
79 | xor r9,QWORD[rax*1+rsi]
|
---|
80 | and bl,0xf0
|
---|
81 | xor r9,QWORD[rdx*8+r11]
|
---|
82 | mov rdx,r8
|
---|
83 | xor r8,r10
|
---|
84 |
|
---|
85 | shr r8,4
|
---|
86 | and rdx,0xf
|
---|
87 | mov r10,r9
|
---|
88 | shr r9,4
|
---|
89 | xor r8,QWORD[8+rbx*1+rsi]
|
---|
90 | shl r10,60
|
---|
91 | xor r9,QWORD[rbx*1+rsi]
|
---|
92 | xor r8,r10
|
---|
93 | xor r9,QWORD[rdx*8+r11]
|
---|
94 |
|
---|
95 | bswap r8
|
---|
96 | bswap r9
|
---|
97 | mov QWORD[8+rdi],r8
|
---|
98 | mov QWORD[rdi],r9
|
---|
99 |
|
---|
100 | mov rbx,QWORD[16+rsp]
|
---|
101 | lea rsp,[24+rsp]
|
---|
102 | $L$gmult_epilogue:
|
---|
103 | mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
---|
104 | mov rsi,QWORD[16+rsp]
|
---|
105 | DB 0F3h,0C3h ;repret
|
---|
106 | $L$SEH_end_gcm_gmult_4bit:
|
---|
107 | global gcm_ghash_4bit
|
---|
108 |
|
---|
109 | ALIGN 16
|
---|
110 | gcm_ghash_4bit:
|
---|
111 | mov QWORD[8+rsp],rdi ;WIN64 prologue
|
---|
112 | mov QWORD[16+rsp],rsi
|
---|
113 | mov rax,rsp
|
---|
114 | $L$SEH_begin_gcm_ghash_4bit:
|
---|
115 | mov rdi,rcx
|
---|
116 | mov rsi,rdx
|
---|
117 | mov rdx,r8
|
---|
118 | mov rcx,r9
|
---|
119 |
|
---|
120 |
|
---|
121 | push rbx
|
---|
122 | push rbp
|
---|
123 | push r12
|
---|
124 | push r13
|
---|
125 | push r14
|
---|
126 | push r15
|
---|
127 | sub rsp,280
|
---|
128 | $L$ghash_prologue:
|
---|
129 | mov r14,rdx
|
---|
130 | mov r15,rcx
|
---|
131 | sub rsi,-128
|
---|
132 | lea rbp,[((16+128))+rsp]
|
---|
133 | xor edx,edx
|
---|
134 | mov r8,QWORD[((0+0-128))+rsi]
|
---|
135 | mov rax,QWORD[((0+8-128))+rsi]
|
---|
136 | mov dl,al
|
---|
137 | shr rax,4
|
---|
138 | mov r10,r8
|
---|
139 | shr r8,4
|
---|
140 | mov r9,QWORD[((16+0-128))+rsi]
|
---|
141 | shl dl,4
|
---|
142 | mov rbx,QWORD[((16+8-128))+rsi]
|
---|
143 | shl r10,60
|
---|
144 | mov BYTE[rsp],dl
|
---|
145 | or rax,r10
|
---|
146 | mov dl,bl
|
---|
147 | shr rbx,4
|
---|
148 | mov r10,r9
|
---|
149 | shr r9,4
|
---|
150 | mov QWORD[rbp],r8
|
---|
151 | mov r8,QWORD[((32+0-128))+rsi]
|
---|
152 | shl dl,4
|
---|
153 | mov QWORD[((0-128))+rbp],rax
|
---|
154 | mov rax,QWORD[((32+8-128))+rsi]
|
---|
155 | shl r10,60
|
---|
156 | mov BYTE[1+rsp],dl
|
---|
157 | or rbx,r10
|
---|
158 | mov dl,al
|
---|
159 | shr rax,4
|
---|
160 | mov r10,r8
|
---|
161 | shr r8,4
|
---|
162 | mov QWORD[8+rbp],r9
|
---|
163 | mov r9,QWORD[((48+0-128))+rsi]
|
---|
164 | shl dl,4
|
---|
165 | mov QWORD[((8-128))+rbp],rbx
|
---|
166 | mov rbx,QWORD[((48+8-128))+rsi]
|
---|
167 | shl r10,60
|
---|
168 | mov BYTE[2+rsp],dl
|
---|
169 | or rax,r10
|
---|
170 | mov dl,bl
|
---|
171 | shr rbx,4
|
---|
172 | mov r10,r9
|
---|
173 | shr r9,4
|
---|
174 | mov QWORD[16+rbp],r8
|
---|
175 | mov r8,QWORD[((64+0-128))+rsi]
|
---|
176 | shl dl,4
|
---|
177 | mov QWORD[((16-128))+rbp],rax
|
---|
178 | mov rax,QWORD[((64+8-128))+rsi]
|
---|
179 | shl r10,60
|
---|
180 | mov BYTE[3+rsp],dl
|
---|
181 | or rbx,r10
|
---|
182 | mov dl,al
|
---|
183 | shr rax,4
|
---|
184 | mov r10,r8
|
---|
185 | shr r8,4
|
---|
186 | mov QWORD[24+rbp],r9
|
---|
187 | mov r9,QWORD[((80+0-128))+rsi]
|
---|
188 | shl dl,4
|
---|
189 | mov QWORD[((24-128))+rbp],rbx
|
---|
190 | mov rbx,QWORD[((80+8-128))+rsi]
|
---|
191 | shl r10,60
|
---|
192 | mov BYTE[4+rsp],dl
|
---|
193 | or rax,r10
|
---|
194 | mov dl,bl
|
---|
195 | shr rbx,4
|
---|
196 | mov r10,r9
|
---|
197 | shr r9,4
|
---|
198 | mov QWORD[32+rbp],r8
|
---|
199 | mov r8,QWORD[((96+0-128))+rsi]
|
---|
200 | shl dl,4
|
---|
201 | mov QWORD[((32-128))+rbp],rax
|
---|
202 | mov rax,QWORD[((96+8-128))+rsi]
|
---|
203 | shl r10,60
|
---|
204 | mov BYTE[5+rsp],dl
|
---|
205 | or rbx,r10
|
---|
206 | mov dl,al
|
---|
207 | shr rax,4
|
---|
208 | mov r10,r8
|
---|
209 | shr r8,4
|
---|
210 | mov QWORD[40+rbp],r9
|
---|
211 | mov r9,QWORD[((112+0-128))+rsi]
|
---|
212 | shl dl,4
|
---|
213 | mov QWORD[((40-128))+rbp],rbx
|
---|
214 | mov rbx,QWORD[((112+8-128))+rsi]
|
---|
215 | shl r10,60
|
---|
216 | mov BYTE[6+rsp],dl
|
---|
217 | or rax,r10
|
---|
218 | mov dl,bl
|
---|
219 | shr rbx,4
|
---|
220 | mov r10,r9
|
---|
221 | shr r9,4
|
---|
222 | mov QWORD[48+rbp],r8
|
---|
223 | mov r8,QWORD[((128+0-128))+rsi]
|
---|
224 | shl dl,4
|
---|
225 | mov QWORD[((48-128))+rbp],rax
|
---|
226 | mov rax,QWORD[((128+8-128))+rsi]
|
---|
227 | shl r10,60
|
---|
228 | mov BYTE[7+rsp],dl
|
---|
229 | or rbx,r10
|
---|
230 | mov dl,al
|
---|
231 | shr rax,4
|
---|
232 | mov r10,r8
|
---|
233 | shr r8,4
|
---|
234 | mov QWORD[56+rbp],r9
|
---|
235 | mov r9,QWORD[((144+0-128))+rsi]
|
---|
236 | shl dl,4
|
---|
237 | mov QWORD[((56-128))+rbp],rbx
|
---|
238 | mov rbx,QWORD[((144+8-128))+rsi]
|
---|
239 | shl r10,60
|
---|
240 | mov BYTE[8+rsp],dl
|
---|
241 | or rax,r10
|
---|
242 | mov dl,bl
|
---|
243 | shr rbx,4
|
---|
244 | mov r10,r9
|
---|
245 | shr r9,4
|
---|
246 | mov QWORD[64+rbp],r8
|
---|
247 | mov r8,QWORD[((160+0-128))+rsi]
|
---|
248 | shl dl,4
|
---|
249 | mov QWORD[((64-128))+rbp],rax
|
---|
250 | mov rax,QWORD[((160+8-128))+rsi]
|
---|
251 | shl r10,60
|
---|
252 | mov BYTE[9+rsp],dl
|
---|
253 | or rbx,r10
|
---|
254 | mov dl,al
|
---|
255 | shr rax,4
|
---|
256 | mov r10,r8
|
---|
257 | shr r8,4
|
---|
258 | mov QWORD[72+rbp],r9
|
---|
259 | mov r9,QWORD[((176+0-128))+rsi]
|
---|
260 | shl dl,4
|
---|
261 | mov QWORD[((72-128))+rbp],rbx
|
---|
262 | mov rbx,QWORD[((176+8-128))+rsi]
|
---|
263 | shl r10,60
|
---|
264 | mov BYTE[10+rsp],dl
|
---|
265 | or rax,r10
|
---|
266 | mov dl,bl
|
---|
267 | shr rbx,4
|
---|
268 | mov r10,r9
|
---|
269 | shr r9,4
|
---|
270 | mov QWORD[80+rbp],r8
|
---|
271 | mov r8,QWORD[((192+0-128))+rsi]
|
---|
272 | shl dl,4
|
---|
273 | mov QWORD[((80-128))+rbp],rax
|
---|
274 | mov rax,QWORD[((192+8-128))+rsi]
|
---|
275 | shl r10,60
|
---|
276 | mov BYTE[11+rsp],dl
|
---|
277 | or rbx,r10
|
---|
278 | mov dl,al
|
---|
279 | shr rax,4
|
---|
280 | mov r10,r8
|
---|
281 | shr r8,4
|
---|
282 | mov QWORD[88+rbp],r9
|
---|
283 | mov r9,QWORD[((208+0-128))+rsi]
|
---|
284 | shl dl,4
|
---|
285 | mov QWORD[((88-128))+rbp],rbx
|
---|
286 | mov rbx,QWORD[((208+8-128))+rsi]
|
---|
287 | shl r10,60
|
---|
288 | mov BYTE[12+rsp],dl
|
---|
289 | or rax,r10
|
---|
290 | mov dl,bl
|
---|
291 | shr rbx,4
|
---|
292 | mov r10,r9
|
---|
293 | shr r9,4
|
---|
294 | mov QWORD[96+rbp],r8
|
---|
295 | mov r8,QWORD[((224+0-128))+rsi]
|
---|
296 | shl dl,4
|
---|
297 | mov QWORD[((96-128))+rbp],rax
|
---|
298 | mov rax,QWORD[((224+8-128))+rsi]
|
---|
299 | shl r10,60
|
---|
300 | mov BYTE[13+rsp],dl
|
---|
301 | or rbx,r10
|
---|
302 | mov dl,al
|
---|
303 | shr rax,4
|
---|
304 | mov r10,r8
|
---|
305 | shr r8,4
|
---|
306 | mov QWORD[104+rbp],r9
|
---|
307 | mov r9,QWORD[((240+0-128))+rsi]
|
---|
308 | shl dl,4
|
---|
309 | mov QWORD[((104-128))+rbp],rbx
|
---|
310 | mov rbx,QWORD[((240+8-128))+rsi]
|
---|
311 | shl r10,60
|
---|
312 | mov BYTE[14+rsp],dl
|
---|
313 | or rax,r10
|
---|
314 | mov dl,bl
|
---|
315 | shr rbx,4
|
---|
316 | mov r10,r9
|
---|
317 | shr r9,4
|
---|
318 | mov QWORD[112+rbp],r8
|
---|
319 | shl dl,4
|
---|
320 | mov QWORD[((112-128))+rbp],rax
|
---|
321 | shl r10,60
|
---|
322 | mov BYTE[15+rsp],dl
|
---|
323 | or rbx,r10
|
---|
324 | mov QWORD[120+rbp],r9
|
---|
325 | mov QWORD[((120-128))+rbp],rbx
|
---|
326 | add rsi,-128
|
---|
327 | mov r8,QWORD[8+rdi]
|
---|
328 | mov r9,QWORD[rdi]
|
---|
329 | add r15,r14
|
---|
330 | lea r11,[$L$rem_8bit]
|
---|
331 | jmp NEAR $L$outer_loop
|
---|
332 | ALIGN 16
|
---|
333 | $L$outer_loop:
|
---|
334 | xor r9,QWORD[r14]
|
---|
335 | mov rdx,QWORD[8+r14]
|
---|
336 | lea r14,[16+r14]
|
---|
337 | xor rdx,r8
|
---|
338 | mov QWORD[rdi],r9
|
---|
339 | mov QWORD[8+rdi],rdx
|
---|
340 | shr rdx,32
|
---|
341 | xor rax,rax
|
---|
342 | rol edx,8
|
---|
343 | mov al,dl
|
---|
344 | movzx ebx,dl
|
---|
345 | shl al,4
|
---|
346 | shr ebx,4
|
---|
347 | rol edx,8
|
---|
348 | mov r8,QWORD[8+rax*1+rsi]
|
---|
349 | mov r9,QWORD[rax*1+rsi]
|
---|
350 | mov al,dl
|
---|
351 | movzx ecx,dl
|
---|
352 | shl al,4
|
---|
353 | movzx r12,BYTE[rbx*1+rsp]
|
---|
354 | shr ecx,4
|
---|
355 | xor r12,r8
|
---|
356 | mov r10,r9
|
---|
357 | shr r8,8
|
---|
358 | movzx r12,r12b
|
---|
359 | shr r9,8
|
---|
360 | xor r8,QWORD[((-128))+rbx*8+rbp]
|
---|
361 | shl r10,56
|
---|
362 | xor r9,QWORD[rbx*8+rbp]
|
---|
363 | rol edx,8
|
---|
364 | xor r8,QWORD[8+rax*1+rsi]
|
---|
365 | xor r9,QWORD[rax*1+rsi]
|
---|
366 | mov al,dl
|
---|
367 | xor r8,r10
|
---|
368 | movzx r12,WORD[r12*2+r11]
|
---|
369 | movzx ebx,dl
|
---|
370 | shl al,4
|
---|
371 | movzx r13,BYTE[rcx*1+rsp]
|
---|
372 | shr ebx,4
|
---|
373 | shl r12,48
|
---|
374 | xor r13,r8
|
---|
375 | mov r10,r9
|
---|
376 | xor r9,r12
|
---|
377 | shr r8,8
|
---|
378 | movzx r13,r13b
|
---|
379 | shr r9,8
|
---|
380 | xor r8,QWORD[((-128))+rcx*8+rbp]
|
---|
381 | shl r10,56
|
---|
382 | xor r9,QWORD[rcx*8+rbp]
|
---|
383 | rol edx,8
|
---|
384 | xor r8,QWORD[8+rax*1+rsi]
|
---|
385 | xor r9,QWORD[rax*1+rsi]
|
---|
386 | mov al,dl
|
---|
387 | xor r8,r10
|
---|
388 | movzx r13,WORD[r13*2+r11]
|
---|
389 | movzx ecx,dl
|
---|
390 | shl al,4
|
---|
391 | movzx r12,BYTE[rbx*1+rsp]
|
---|
392 | shr ecx,4
|
---|
393 | shl r13,48
|
---|
394 | xor r12,r8
|
---|
395 | mov r10,r9
|
---|
396 | xor r9,r13
|
---|
397 | shr r8,8
|
---|
398 | movzx r12,r12b
|
---|
399 | mov edx,DWORD[8+rdi]
|
---|
400 | shr r9,8
|
---|
401 | xor r8,QWORD[((-128))+rbx*8+rbp]
|
---|
402 | shl r10,56
|
---|
403 | xor r9,QWORD[rbx*8+rbp]
|
---|
404 | rol edx,8
|
---|
405 | xor r8,QWORD[8+rax*1+rsi]
|
---|
406 | xor r9,QWORD[rax*1+rsi]
|
---|
407 | mov al,dl
|
---|
408 | xor r8,r10
|
---|
409 | movzx r12,WORD[r12*2+r11]
|
---|
410 | movzx ebx,dl
|
---|
411 | shl al,4
|
---|
412 | movzx r13,BYTE[rcx*1+rsp]
|
---|
413 | shr ebx,4
|
---|
414 | shl r12,48
|
---|
415 | xor r13,r8
|
---|
416 | mov r10,r9
|
---|
417 | xor r9,r12
|
---|
418 | shr r8,8
|
---|
419 | movzx r13,r13b
|
---|
420 | shr r9,8
|
---|
421 | xor r8,QWORD[((-128))+rcx*8+rbp]
|
---|
422 | shl r10,56
|
---|
423 | xor r9,QWORD[rcx*8+rbp]
|
---|
424 | rol edx,8
|
---|
425 | xor r8,QWORD[8+rax*1+rsi]
|
---|
426 | xor r9,QWORD[rax*1+rsi]
|
---|
427 | mov al,dl
|
---|
428 | xor r8,r10
|
---|
429 | movzx r13,WORD[r13*2+r11]
|
---|
430 | movzx ecx,dl
|
---|
431 | shl al,4
|
---|
432 | movzx r12,BYTE[rbx*1+rsp]
|
---|
433 | shr ecx,4
|
---|
434 | shl r13,48
|
---|
435 | xor r12,r8
|
---|
436 | mov r10,r9
|
---|
437 | xor r9,r13
|
---|
438 | shr r8,8
|
---|
439 | movzx r12,r12b
|
---|
440 | shr r9,8
|
---|
441 | xor r8,QWORD[((-128))+rbx*8+rbp]
|
---|
442 | shl r10,56
|
---|
443 | xor r9,QWORD[rbx*8+rbp]
|
---|
444 | rol edx,8
|
---|
445 | xor r8,QWORD[8+rax*1+rsi]
|
---|
446 | xor r9,QWORD[rax*1+rsi]
|
---|
447 | mov al,dl
|
---|
448 | xor r8,r10
|
---|
449 | movzx r12,WORD[r12*2+r11]
|
---|
450 | movzx ebx,dl
|
---|
451 | shl al,4
|
---|
452 | movzx r13,BYTE[rcx*1+rsp]
|
---|
453 | shr ebx,4
|
---|
454 | shl r12,48
|
---|
455 | xor r13,r8
|
---|
456 | mov r10,r9
|
---|
457 | xor r9,r12
|
---|
458 | shr r8,8
|
---|
459 | movzx r13,r13b
|
---|
460 | shr r9,8
|
---|
461 | xor r8,QWORD[((-128))+rcx*8+rbp]
|
---|
462 | shl r10,56
|
---|
463 | xor r9,QWORD[rcx*8+rbp]
|
---|
464 | rol edx,8
|
---|
465 | xor r8,QWORD[8+rax*1+rsi]
|
---|
466 | xor r9,QWORD[rax*1+rsi]
|
---|
467 | mov al,dl
|
---|
468 | xor r8,r10
|
---|
469 | movzx r13,WORD[r13*2+r11]
|
---|
470 | movzx ecx,dl
|
---|
471 | shl al,4
|
---|
472 | movzx r12,BYTE[rbx*1+rsp]
|
---|
473 | shr ecx,4
|
---|
474 | shl r13,48
|
---|
475 | xor r12,r8
|
---|
476 | mov r10,r9
|
---|
477 | xor r9,r13
|
---|
478 | shr r8,8
|
---|
479 | movzx r12,r12b
|
---|
480 | mov edx,DWORD[4+rdi]
|
---|
481 | shr r9,8
|
---|
482 | xor r8,QWORD[((-128))+rbx*8+rbp]
|
---|
483 | shl r10,56
|
---|
484 | xor r9,QWORD[rbx*8+rbp]
|
---|
485 | rol edx,8
|
---|
486 | xor r8,QWORD[8+rax*1+rsi]
|
---|
487 | xor r9,QWORD[rax*1+rsi]
|
---|
488 | mov al,dl
|
---|
489 | xor r8,r10
|
---|
490 | movzx r12,WORD[r12*2+r11]
|
---|
491 | movzx ebx,dl
|
---|
492 | shl al,4
|
---|
493 | movzx r13,BYTE[rcx*1+rsp]
|
---|
494 | shr ebx,4
|
---|
495 | shl r12,48
|
---|
496 | xor r13,r8
|
---|
497 | mov r10,r9
|
---|
498 | xor r9,r12
|
---|
499 | shr r8,8
|
---|
500 | movzx r13,r13b
|
---|
501 | shr r9,8
|
---|
502 | xor r8,QWORD[((-128))+rcx*8+rbp]
|
---|
503 | shl r10,56
|
---|
504 | xor r9,QWORD[rcx*8+rbp]
|
---|
505 | rol edx,8
|
---|
506 | xor r8,QWORD[8+rax*1+rsi]
|
---|
507 | xor r9,QWORD[rax*1+rsi]
|
---|
508 | mov al,dl
|
---|
509 | xor r8,r10
|
---|
510 | movzx r13,WORD[r13*2+r11]
|
---|
511 | movzx ecx,dl
|
---|
512 | shl al,4
|
---|
513 | movzx r12,BYTE[rbx*1+rsp]
|
---|
514 | shr ecx,4
|
---|
515 | shl r13,48
|
---|
516 | xor r12,r8
|
---|
517 | mov r10,r9
|
---|
518 | xor r9,r13
|
---|
519 | shr r8,8
|
---|
520 | movzx r12,r12b
|
---|
521 | shr r9,8
|
---|
522 | xor r8,QWORD[((-128))+rbx*8+rbp]
|
---|
523 | shl r10,56
|
---|
524 | xor r9,QWORD[rbx*8+rbp]
|
---|
525 | rol edx,8
|
---|
526 | xor r8,QWORD[8+rax*1+rsi]
|
---|
527 | xor r9,QWORD[rax*1+rsi]
|
---|
528 | mov al,dl
|
---|
529 | xor r8,r10
|
---|
530 | movzx r12,WORD[r12*2+r11]
|
---|
531 | movzx ebx,dl
|
---|
532 | shl al,4
|
---|
533 | movzx r13,BYTE[rcx*1+rsp]
|
---|
534 | shr ebx,4
|
---|
535 | shl r12,48
|
---|
536 | xor r13,r8
|
---|
537 | mov r10,r9
|
---|
538 | xor r9,r12
|
---|
539 | shr r8,8
|
---|
540 | movzx r13,r13b
|
---|
541 | shr r9,8
|
---|
542 | xor r8,QWORD[((-128))+rcx*8+rbp]
|
---|
543 | shl r10,56
|
---|
544 | xor r9,QWORD[rcx*8+rbp]
|
---|
545 | rol edx,8
|
---|
546 | xor r8,QWORD[8+rax*1+rsi]
|
---|
547 | xor r9,QWORD[rax*1+rsi]
|
---|
548 | mov al,dl
|
---|
549 | xor r8,r10
|
---|
550 | movzx r13,WORD[r13*2+r11]
|
---|
551 | movzx ecx,dl
|
---|
552 | shl al,4
|
---|
553 | movzx r12,BYTE[rbx*1+rsp]
|
---|
554 | shr ecx,4
|
---|
555 | shl r13,48
|
---|
556 | xor r12,r8
|
---|
557 | mov r10,r9
|
---|
558 | xor r9,r13
|
---|
559 | shr r8,8
|
---|
560 | movzx r12,r12b
|
---|
561 | mov edx,DWORD[rdi]
|
---|
562 | shr r9,8
|
---|
563 | xor r8,QWORD[((-128))+rbx*8+rbp]
|
---|
564 | shl r10,56
|
---|
565 | xor r9,QWORD[rbx*8+rbp]
|
---|
566 | rol edx,8
|
---|
567 | xor r8,QWORD[8+rax*1+rsi]
|
---|
568 | xor r9,QWORD[rax*1+rsi]
|
---|
569 | mov al,dl
|
---|
570 | xor r8,r10
|
---|
571 | movzx r12,WORD[r12*2+r11]
|
---|
572 | movzx ebx,dl
|
---|
573 | shl al,4
|
---|
574 | movzx r13,BYTE[rcx*1+rsp]
|
---|
575 | shr ebx,4
|
---|
576 | shl r12,48
|
---|
577 | xor r13,r8
|
---|
578 | mov r10,r9
|
---|
579 | xor r9,r12
|
---|
580 | shr r8,8
|
---|
581 | movzx r13,r13b
|
---|
582 | shr r9,8
|
---|
583 | xor r8,QWORD[((-128))+rcx*8+rbp]
|
---|
584 | shl r10,56
|
---|
585 | xor r9,QWORD[rcx*8+rbp]
|
---|
586 | rol edx,8
|
---|
587 | xor r8,QWORD[8+rax*1+rsi]
|
---|
588 | xor r9,QWORD[rax*1+rsi]
|
---|
589 | mov al,dl
|
---|
590 | xor r8,r10
|
---|
591 | movzx r13,WORD[r13*2+r11]
|
---|
592 | movzx ecx,dl
|
---|
593 | shl al,4
|
---|
594 | movzx r12,BYTE[rbx*1+rsp]
|
---|
595 | shr ecx,4
|
---|
596 | shl r13,48
|
---|
597 | xor r12,r8
|
---|
598 | mov r10,r9
|
---|
599 | xor r9,r13
|
---|
600 | shr r8,8
|
---|
601 | movzx r12,r12b
|
---|
602 | shr r9,8
|
---|
603 | xor r8,QWORD[((-128))+rbx*8+rbp]
|
---|
604 | shl r10,56
|
---|
605 | xor r9,QWORD[rbx*8+rbp]
|
---|
606 | rol edx,8
|
---|
607 | xor r8,QWORD[8+rax*1+rsi]
|
---|
608 | xor r9,QWORD[rax*1+rsi]
|
---|
609 | mov al,dl
|
---|
610 | xor r8,r10
|
---|
611 | movzx r12,WORD[r12*2+r11]
|
---|
612 | movzx ebx,dl
|
---|
613 | shl al,4
|
---|
614 | movzx r13,BYTE[rcx*1+rsp]
|
---|
615 | shr ebx,4
|
---|
616 | shl r12,48
|
---|
617 | xor r13,r8
|
---|
618 | mov r10,r9
|
---|
619 | xor r9,r12
|
---|
620 | shr r8,8
|
---|
621 | movzx r13,r13b
|
---|
622 | shr r9,8
|
---|
623 | xor r8,QWORD[((-128))+rcx*8+rbp]
|
---|
624 | shl r10,56
|
---|
625 | xor r9,QWORD[rcx*8+rbp]
|
---|
626 | rol edx,8
|
---|
627 | xor r8,QWORD[8+rax*1+rsi]
|
---|
628 | xor r9,QWORD[rax*1+rsi]
|
---|
629 | mov al,dl
|
---|
630 | xor r8,r10
|
---|
631 | movzx r13,WORD[r13*2+r11]
|
---|
632 | movzx ecx,dl
|
---|
633 | shl al,4
|
---|
634 | movzx r12,BYTE[rbx*1+rsp]
|
---|
635 | and ecx,240
|
---|
636 | shl r13,48
|
---|
637 | xor r12,r8
|
---|
638 | mov r10,r9
|
---|
639 | xor r9,r13
|
---|
640 | shr r8,8
|
---|
641 | movzx r12,r12b
|
---|
642 | mov edx,DWORD[((-4))+rdi]
|
---|
643 | shr r9,8
|
---|
644 | xor r8,QWORD[((-128))+rbx*8+rbp]
|
---|
645 | shl r10,56
|
---|
646 | xor r9,QWORD[rbx*8+rbp]
|
---|
647 | movzx r12,WORD[r12*2+r11]
|
---|
648 | xor r8,QWORD[8+rax*1+rsi]
|
---|
649 | xor r9,QWORD[rax*1+rsi]
|
---|
650 | shl r12,48
|
---|
651 | xor r8,r10
|
---|
652 | xor r9,r12
|
---|
653 | movzx r13,r8b
|
---|
654 | shr r8,4
|
---|
655 | mov r10,r9
|
---|
656 | shl r13b,4
|
---|
657 | shr r9,4
|
---|
658 | xor r8,QWORD[8+rcx*1+rsi]
|
---|
659 | movzx r13,WORD[r13*2+r11]
|
---|
660 | shl r10,60
|
---|
661 | xor r9,QWORD[rcx*1+rsi]
|
---|
662 | xor r8,r10
|
---|
663 | shl r13,48
|
---|
664 | bswap r8
|
---|
665 | xor r9,r13
|
---|
666 | bswap r9
|
---|
667 | cmp r14,r15
|
---|
668 | jb NEAR $L$outer_loop
|
---|
669 | mov QWORD[8+rdi],r8
|
---|
670 | mov QWORD[rdi],r9
|
---|
671 |
|
---|
672 | lea rsi,[280+rsp]
|
---|
673 | mov r15,QWORD[rsi]
|
---|
674 | mov r14,QWORD[8+rsi]
|
---|
675 | mov r13,QWORD[16+rsi]
|
---|
676 | mov r12,QWORD[24+rsi]
|
---|
677 | mov rbp,QWORD[32+rsi]
|
---|
678 | mov rbx,QWORD[40+rsi]
|
---|
679 | lea rsp,[48+rsi]
|
---|
680 | $L$ghash_epilogue:
|
---|
681 | mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
---|
682 | mov rsi,QWORD[16+rsp]
|
---|
683 | DB 0F3h,0C3h ;repret
|
---|
684 | $L$SEH_end_gcm_ghash_4bit:
|
---|
685 | global gcm_init_clmul
|
---|
686 |
|
---|
687 | ALIGN 16
|
---|
688 | gcm_init_clmul:
|
---|
689 | $L$_init_clmul:
|
---|
690 | $L$SEH_begin_gcm_init_clmul:
|
---|
691 |
|
---|
692 | DB 0x48,0x83,0xec,0x18
|
---|
693 | DB 0x0f,0x29,0x34,0x24
|
---|
694 | movdqu xmm2,XMMWORD[rdx]
|
---|
695 | pshufd xmm2,xmm2,78
|
---|
696 |
|
---|
697 |
|
---|
698 | pshufd xmm4,xmm2,255
|
---|
699 | movdqa xmm3,xmm2
|
---|
700 | psllq xmm2,1
|
---|
701 | pxor xmm5,xmm5
|
---|
702 | psrlq xmm3,63
|
---|
703 | pcmpgtd xmm5,xmm4
|
---|
704 | pslldq xmm3,8
|
---|
705 | por xmm2,xmm3
|
---|
706 |
|
---|
707 |
|
---|
708 | pand xmm5,XMMWORD[$L$0x1c2_polynomial]
|
---|
709 | pxor xmm2,xmm5
|
---|
710 |
|
---|
711 |
|
---|
712 | pshufd xmm6,xmm2,78
|
---|
713 | movdqa xmm0,xmm2
|
---|
714 | pxor xmm6,xmm2
|
---|
715 | movdqa xmm1,xmm0
|
---|
716 | pshufd xmm3,xmm0,78
|
---|
717 | pxor xmm3,xmm0
|
---|
718 | DB 102,15,58,68,194,0
|
---|
719 | DB 102,15,58,68,202,17
|
---|
720 | DB 102,15,58,68,222,0
|
---|
721 | pxor xmm3,xmm0
|
---|
722 | pxor xmm3,xmm1
|
---|
723 |
|
---|
724 | movdqa xmm4,xmm3
|
---|
725 | psrldq xmm3,8
|
---|
726 | pslldq xmm4,8
|
---|
727 | pxor xmm1,xmm3
|
---|
728 | pxor xmm0,xmm4
|
---|
729 |
|
---|
730 | movdqa xmm4,xmm0
|
---|
731 | movdqa xmm3,xmm0
|
---|
732 | psllq xmm0,5
|
---|
733 | pxor xmm3,xmm0
|
---|
734 | psllq xmm0,1
|
---|
735 | pxor xmm0,xmm3
|
---|
736 | psllq xmm0,57
|
---|
737 | movdqa xmm3,xmm0
|
---|
738 | pslldq xmm0,8
|
---|
739 | psrldq xmm3,8
|
---|
740 | pxor xmm0,xmm4
|
---|
741 | pxor xmm1,xmm3
|
---|
742 |
|
---|
743 |
|
---|
744 | movdqa xmm4,xmm0
|
---|
745 | psrlq xmm0,1
|
---|
746 | pxor xmm1,xmm4
|
---|
747 | pxor xmm4,xmm0
|
---|
748 | psrlq xmm0,5
|
---|
749 | pxor xmm0,xmm4
|
---|
750 | psrlq xmm0,1
|
---|
751 | pxor xmm0,xmm1
|
---|
752 | pshufd xmm3,xmm2,78
|
---|
753 | pshufd xmm4,xmm0,78
|
---|
754 | pxor xmm3,xmm2
|
---|
755 | movdqu XMMWORD[rcx],xmm2
|
---|
756 | pxor xmm4,xmm0
|
---|
757 | movdqu XMMWORD[16+rcx],xmm0
|
---|
758 | DB 102,15,58,15,227,8
|
---|
759 | movdqu XMMWORD[32+rcx],xmm4
|
---|
760 | movdqa xmm1,xmm0
|
---|
761 | pshufd xmm3,xmm0,78
|
---|
762 | pxor xmm3,xmm0
|
---|
763 | DB 102,15,58,68,194,0
|
---|
764 | DB 102,15,58,68,202,17
|
---|
765 | DB 102,15,58,68,222,0
|
---|
766 | pxor xmm3,xmm0
|
---|
767 | pxor xmm3,xmm1
|
---|
768 |
|
---|
769 | movdqa xmm4,xmm3
|
---|
770 | psrldq xmm3,8
|
---|
771 | pslldq xmm4,8
|
---|
772 | pxor xmm1,xmm3
|
---|
773 | pxor xmm0,xmm4
|
---|
774 |
|
---|
775 | movdqa xmm4,xmm0
|
---|
776 | movdqa xmm3,xmm0
|
---|
777 | psllq xmm0,5
|
---|
778 | pxor xmm3,xmm0
|
---|
779 | psllq xmm0,1
|
---|
780 | pxor xmm0,xmm3
|
---|
781 | psllq xmm0,57
|
---|
782 | movdqa xmm3,xmm0
|
---|
783 | pslldq xmm0,8
|
---|
784 | psrldq xmm3,8
|
---|
785 | pxor xmm0,xmm4
|
---|
786 | pxor xmm1,xmm3
|
---|
787 |
|
---|
788 |
|
---|
789 | movdqa xmm4,xmm0
|
---|
790 | psrlq xmm0,1
|
---|
791 | pxor xmm1,xmm4
|
---|
792 | pxor xmm4,xmm0
|
---|
793 | psrlq xmm0,5
|
---|
794 | pxor xmm0,xmm4
|
---|
795 | psrlq xmm0,1
|
---|
796 | pxor xmm0,xmm1
|
---|
797 | movdqa xmm5,xmm0
|
---|
798 | movdqa xmm1,xmm0
|
---|
799 | pshufd xmm3,xmm0,78
|
---|
800 | pxor xmm3,xmm0
|
---|
801 | DB 102,15,58,68,194,0
|
---|
802 | DB 102,15,58,68,202,17
|
---|
803 | DB 102,15,58,68,222,0
|
---|
804 | pxor xmm3,xmm0
|
---|
805 | pxor xmm3,xmm1
|
---|
806 |
|
---|
807 | movdqa xmm4,xmm3
|
---|
808 | psrldq xmm3,8
|
---|
809 | pslldq xmm4,8
|
---|
810 | pxor xmm1,xmm3
|
---|
811 | pxor xmm0,xmm4
|
---|
812 |
|
---|
813 | movdqa xmm4,xmm0
|
---|
814 | movdqa xmm3,xmm0
|
---|
815 | psllq xmm0,5
|
---|
816 | pxor xmm3,xmm0
|
---|
817 | psllq xmm0,1
|
---|
818 | pxor xmm0,xmm3
|
---|
819 | psllq xmm0,57
|
---|
820 | movdqa xmm3,xmm0
|
---|
821 | pslldq xmm0,8
|
---|
822 | psrldq xmm3,8
|
---|
823 | pxor xmm0,xmm4
|
---|
824 | pxor xmm1,xmm3
|
---|
825 |
|
---|
826 |
|
---|
827 | movdqa xmm4,xmm0
|
---|
828 | psrlq xmm0,1
|
---|
829 | pxor xmm1,xmm4
|
---|
830 | pxor xmm4,xmm0
|
---|
831 | psrlq xmm0,5
|
---|
832 | pxor xmm0,xmm4
|
---|
833 | psrlq xmm0,1
|
---|
834 | pxor xmm0,xmm1
|
---|
835 | pshufd xmm3,xmm5,78
|
---|
836 | pshufd xmm4,xmm0,78
|
---|
837 | pxor xmm3,xmm5
|
---|
838 | movdqu XMMWORD[48+rcx],xmm5
|
---|
839 | pxor xmm4,xmm0
|
---|
840 | movdqu XMMWORD[64+rcx],xmm0
|
---|
841 | DB 102,15,58,15,227,8
|
---|
842 | movdqu XMMWORD[80+rcx],xmm4
|
---|
843 | movaps xmm6,XMMWORD[rsp]
|
---|
844 | lea rsp,[24+rsp]
|
---|
845 | $L$SEH_end_gcm_init_clmul:
|
---|
846 | DB 0F3h,0C3h ;repret
|
---|
847 |
|
---|
848 | global gcm_gmult_clmul
|
---|
849 |
|
---|
850 | ALIGN 16
|
---|
851 | gcm_gmult_clmul:
|
---|
852 | $L$_gmult_clmul:
|
---|
853 | movdqu xmm0,XMMWORD[rcx]
|
---|
854 | movdqa xmm5,XMMWORD[$L$bswap_mask]
|
---|
855 | movdqu xmm2,XMMWORD[rdx]
|
---|
856 | movdqu xmm4,XMMWORD[32+rdx]
|
---|
857 | DB 102,15,56,0,197
|
---|
858 | movdqa xmm1,xmm0
|
---|
859 | pshufd xmm3,xmm0,78
|
---|
860 | pxor xmm3,xmm0
|
---|
861 | DB 102,15,58,68,194,0
|
---|
862 | DB 102,15,58,68,202,17
|
---|
863 | DB 102,15,58,68,220,0
|
---|
864 | pxor xmm3,xmm0
|
---|
865 | pxor xmm3,xmm1
|
---|
866 |
|
---|
867 | movdqa xmm4,xmm3
|
---|
868 | psrldq xmm3,8
|
---|
869 | pslldq xmm4,8
|
---|
870 | pxor xmm1,xmm3
|
---|
871 | pxor xmm0,xmm4
|
---|
872 |
|
---|
873 | movdqa xmm4,xmm0
|
---|
874 | movdqa xmm3,xmm0
|
---|
875 | psllq xmm0,5
|
---|
876 | pxor xmm3,xmm0
|
---|
877 | psllq xmm0,1
|
---|
878 | pxor xmm0,xmm3
|
---|
879 | psllq xmm0,57
|
---|
880 | movdqa xmm3,xmm0
|
---|
881 | pslldq xmm0,8
|
---|
882 | psrldq xmm3,8
|
---|
883 | pxor xmm0,xmm4
|
---|
884 | pxor xmm1,xmm3
|
---|
885 |
|
---|
886 |
|
---|
887 | movdqa xmm4,xmm0
|
---|
888 | psrlq xmm0,1
|
---|
889 | pxor xmm1,xmm4
|
---|
890 | pxor xmm4,xmm0
|
---|
891 | psrlq xmm0,5
|
---|
892 | pxor xmm0,xmm4
|
---|
893 | psrlq xmm0,1
|
---|
894 | pxor xmm0,xmm1
|
---|
895 | DB 102,15,56,0,197
|
---|
896 | movdqu XMMWORD[rcx],xmm0
|
---|
897 | DB 0F3h,0C3h ;repret
|
---|
898 |
|
---|
899 | global gcm_ghash_clmul
|
---|
900 |
|
---|
901 | ALIGN 32
|
---|
902 | gcm_ghash_clmul:
|
---|
903 | $L$_ghash_clmul:
|
---|
904 | lea rax,[((-136))+rsp]
|
---|
905 | $L$SEH_begin_gcm_ghash_clmul:
|
---|
906 |
|
---|
907 | DB 0x48,0x8d,0x60,0xe0
|
---|
908 | DB 0x0f,0x29,0x70,0xe0
|
---|
909 | DB 0x0f,0x29,0x78,0xf0
|
---|
910 | DB 0x44,0x0f,0x29,0x00
|
---|
911 | DB 0x44,0x0f,0x29,0x48,0x10
|
---|
912 | DB 0x44,0x0f,0x29,0x50,0x20
|
---|
913 | DB 0x44,0x0f,0x29,0x58,0x30
|
---|
914 | DB 0x44,0x0f,0x29,0x60,0x40
|
---|
915 | DB 0x44,0x0f,0x29,0x68,0x50
|
---|
916 | DB 0x44,0x0f,0x29,0x70,0x60
|
---|
917 | DB 0x44,0x0f,0x29,0x78,0x70
|
---|
918 | movdqa xmm10,XMMWORD[$L$bswap_mask]
|
---|
919 |
|
---|
920 | movdqu xmm0,XMMWORD[rcx]
|
---|
921 | movdqu xmm2,XMMWORD[rdx]
|
---|
922 | movdqu xmm7,XMMWORD[32+rdx]
|
---|
923 | DB 102,65,15,56,0,194
|
---|
924 |
|
---|
925 | sub r9,0x10
|
---|
926 | jz NEAR $L$odd_tail
|
---|
927 |
|
---|
928 | movdqu xmm6,XMMWORD[16+rdx]
|
---|
929 | mov eax,DWORD[((OPENSSL_ia32cap_P+4))]
|
---|
930 | cmp r9,0x30
|
---|
931 | jb NEAR $L$skip4x
|
---|
932 |
|
---|
933 | and eax,71303168
|
---|
934 | cmp eax,4194304
|
---|
935 | je NEAR $L$skip4x
|
---|
936 |
|
---|
937 | sub r9,0x30
|
---|
938 | mov rax,0xA040608020C0E000
|
---|
939 | movdqu xmm14,XMMWORD[48+rdx]
|
---|
940 | movdqu xmm15,XMMWORD[64+rdx]
|
---|
941 |
|
---|
942 |
|
---|
943 |
|
---|
944 |
|
---|
945 | movdqu xmm3,XMMWORD[48+r8]
|
---|
946 | movdqu xmm11,XMMWORD[32+r8]
|
---|
947 | DB 102,65,15,56,0,218
|
---|
948 | DB 102,69,15,56,0,218
|
---|
949 | movdqa xmm5,xmm3
|
---|
950 | pshufd xmm4,xmm3,78
|
---|
951 | pxor xmm4,xmm3
|
---|
952 | DB 102,15,58,68,218,0
|
---|
953 | DB 102,15,58,68,234,17
|
---|
954 | DB 102,15,58,68,231,0
|
---|
955 |
|
---|
956 | movdqa xmm13,xmm11
|
---|
957 | pshufd xmm12,xmm11,78
|
---|
958 | pxor xmm12,xmm11
|
---|
959 | DB 102,68,15,58,68,222,0
|
---|
960 | DB 102,68,15,58,68,238,17
|
---|
961 | DB 102,68,15,58,68,231,16
|
---|
962 | xorps xmm3,xmm11
|
---|
963 | xorps xmm5,xmm13
|
---|
964 | movups xmm7,XMMWORD[80+rdx]
|
---|
965 | xorps xmm4,xmm12
|
---|
966 |
|
---|
967 | movdqu xmm11,XMMWORD[16+r8]
|
---|
968 | movdqu xmm8,XMMWORD[r8]
|
---|
969 | DB 102,69,15,56,0,218
|
---|
970 | DB 102,69,15,56,0,194
|
---|
971 | movdqa xmm13,xmm11
|
---|
972 | pshufd xmm12,xmm11,78
|
---|
973 | pxor xmm0,xmm8
|
---|
974 | pxor xmm12,xmm11
|
---|
975 | DB 102,69,15,58,68,222,0
|
---|
976 | movdqa xmm1,xmm0
|
---|
977 | pshufd xmm8,xmm0,78
|
---|
978 | pxor xmm8,xmm0
|
---|
979 | DB 102,69,15,58,68,238,17
|
---|
980 | DB 102,68,15,58,68,231,0
|
---|
981 | xorps xmm3,xmm11
|
---|
982 | xorps xmm5,xmm13
|
---|
983 |
|
---|
984 | lea r8,[64+r8]
|
---|
985 | sub r9,0x40
|
---|
986 | jc NEAR $L$tail4x
|
---|
987 |
|
---|
988 | jmp NEAR $L$mod4_loop
|
---|
989 | ALIGN 32
|
---|
990 | $L$mod4_loop:
|
---|
991 | DB 102,65,15,58,68,199,0
|
---|
992 | xorps xmm4,xmm12
|
---|
993 | movdqu xmm11,XMMWORD[48+r8]
|
---|
994 | DB 102,69,15,56,0,218
|
---|
995 | DB 102,65,15,58,68,207,17
|
---|
996 | xorps xmm0,xmm3
|
---|
997 | movdqu xmm3,XMMWORD[32+r8]
|
---|
998 | movdqa xmm13,xmm11
|
---|
999 | DB 102,68,15,58,68,199,16
|
---|
1000 | pshufd xmm12,xmm11,78
|
---|
1001 | xorps xmm1,xmm5
|
---|
1002 | pxor xmm12,xmm11
|
---|
1003 | DB 102,65,15,56,0,218
|
---|
1004 | movups xmm7,XMMWORD[32+rdx]
|
---|
1005 | xorps xmm8,xmm4
|
---|
1006 | DB 102,68,15,58,68,218,0
|
---|
1007 | pshufd xmm4,xmm3,78
|
---|
1008 |
|
---|
1009 | pxor xmm8,xmm0
|
---|
1010 | movdqa xmm5,xmm3
|
---|
1011 | pxor xmm8,xmm1
|
---|
1012 | pxor xmm4,xmm3
|
---|
1013 | movdqa xmm9,xmm8
|
---|
1014 | DB 102,68,15,58,68,234,17
|
---|
1015 | pslldq xmm8,8
|
---|
1016 | psrldq xmm9,8
|
---|
1017 | pxor xmm0,xmm8
|
---|
1018 | movdqa xmm8,XMMWORD[$L$7_mask]
|
---|
1019 | pxor xmm1,xmm9
|
---|
1020 | DB 102,76,15,110,200
|
---|
1021 |
|
---|
1022 | pand xmm8,xmm0
|
---|
1023 | DB 102,69,15,56,0,200
|
---|
1024 | pxor xmm9,xmm0
|
---|
1025 | DB 102,68,15,58,68,231,0
|
---|
1026 | psllq xmm9,57
|
---|
1027 | movdqa xmm8,xmm9
|
---|
1028 | pslldq xmm9,8
|
---|
1029 | DB 102,15,58,68,222,0
|
---|
1030 | psrldq xmm8,8
|
---|
1031 | pxor xmm0,xmm9
|
---|
1032 | pxor xmm1,xmm8
|
---|
1033 | movdqu xmm8,XMMWORD[r8]
|
---|
1034 |
|
---|
1035 | movdqa xmm9,xmm0
|
---|
1036 | psrlq xmm0,1
|
---|
1037 | DB 102,15,58,68,238,17
|
---|
1038 | xorps xmm3,xmm11
|
---|
1039 | movdqu xmm11,XMMWORD[16+r8]
|
---|
1040 | DB 102,69,15,56,0,218
|
---|
1041 | DB 102,15,58,68,231,16
|
---|
1042 | xorps xmm5,xmm13
|
---|
1043 | movups xmm7,XMMWORD[80+rdx]
|
---|
1044 | DB 102,69,15,56,0,194
|
---|
1045 | pxor xmm1,xmm9
|
---|
1046 | pxor xmm9,xmm0
|
---|
1047 | psrlq xmm0,5
|
---|
1048 |
|
---|
1049 | movdqa xmm13,xmm11
|
---|
1050 | pxor xmm4,xmm12
|
---|
1051 | pshufd xmm12,xmm11,78
|
---|
1052 | pxor xmm0,xmm9
|
---|
1053 | pxor xmm1,xmm8
|
---|
1054 | pxor xmm12,xmm11
|
---|
1055 | DB 102,69,15,58,68,222,0
|
---|
1056 | psrlq xmm0,1
|
---|
1057 | pxor xmm0,xmm1
|
---|
1058 | movdqa xmm1,xmm0
|
---|
1059 | DB 102,69,15,58,68,238,17
|
---|
1060 | xorps xmm3,xmm11
|
---|
1061 | pshufd xmm8,xmm0,78
|
---|
1062 | pxor xmm8,xmm0
|
---|
1063 |
|
---|
1064 | DB 102,68,15,58,68,231,0
|
---|
1065 | xorps xmm5,xmm13
|
---|
1066 |
|
---|
1067 | lea r8,[64+r8]
|
---|
1068 | sub r9,0x40
|
---|
1069 | jnc NEAR $L$mod4_loop
|
---|
1070 |
|
---|
1071 | $L$tail4x:
|
---|
1072 | DB 102,65,15,58,68,199,0
|
---|
1073 | DB 102,65,15,58,68,207,17
|
---|
1074 | DB 102,68,15,58,68,199,16
|
---|
1075 | xorps xmm4,xmm12
|
---|
1076 | xorps xmm0,xmm3
|
---|
1077 | xorps xmm1,xmm5
|
---|
1078 | pxor xmm1,xmm0
|
---|
1079 | pxor xmm8,xmm4
|
---|
1080 |
|
---|
1081 | pxor xmm8,xmm1
|
---|
1082 | pxor xmm1,xmm0
|
---|
1083 |
|
---|
1084 | movdqa xmm9,xmm8
|
---|
1085 | psrldq xmm8,8
|
---|
1086 | pslldq xmm9,8
|
---|
1087 | pxor xmm1,xmm8
|
---|
1088 | pxor xmm0,xmm9
|
---|
1089 |
|
---|
1090 | movdqa xmm4,xmm0
|
---|
1091 | movdqa xmm3,xmm0
|
---|
1092 | psllq xmm0,5
|
---|
1093 | pxor xmm3,xmm0
|
---|
1094 | psllq xmm0,1
|
---|
1095 | pxor xmm0,xmm3
|
---|
1096 | psllq xmm0,57
|
---|
1097 | movdqa xmm3,xmm0
|
---|
1098 | pslldq xmm0,8
|
---|
1099 | psrldq xmm3,8
|
---|
1100 | pxor xmm0,xmm4
|
---|
1101 | pxor xmm1,xmm3
|
---|
1102 |
|
---|
1103 |
|
---|
1104 | movdqa xmm4,xmm0
|
---|
1105 | psrlq xmm0,1
|
---|
1106 | pxor xmm1,xmm4
|
---|
1107 | pxor xmm4,xmm0
|
---|
1108 | psrlq xmm0,5
|
---|
1109 | pxor xmm0,xmm4
|
---|
1110 | psrlq xmm0,1
|
---|
1111 | pxor xmm0,xmm1
|
---|
1112 | add r9,0x40
|
---|
1113 | jz NEAR $L$done
|
---|
1114 | movdqu xmm7,XMMWORD[32+rdx]
|
---|
1115 | sub r9,0x10
|
---|
1116 | jz NEAR $L$odd_tail
|
---|
1117 | $L$skip4x:
|
---|
1118 |
|
---|
1119 |
|
---|
1120 |
|
---|
1121 |
|
---|
1122 |
|
---|
1123 | movdqu xmm8,XMMWORD[r8]
|
---|
1124 | movdqu xmm3,XMMWORD[16+r8]
|
---|
1125 | DB 102,69,15,56,0,194
|
---|
1126 | DB 102,65,15,56,0,218
|
---|
1127 | pxor xmm0,xmm8
|
---|
1128 |
|
---|
1129 | movdqa xmm5,xmm3
|
---|
1130 | pshufd xmm4,xmm3,78
|
---|
1131 | pxor xmm4,xmm3
|
---|
1132 | DB 102,15,58,68,218,0
|
---|
1133 | DB 102,15,58,68,234,17
|
---|
1134 | DB 102,15,58,68,231,0
|
---|
1135 |
|
---|
1136 | lea r8,[32+r8]
|
---|
1137 | nop
|
---|
1138 | sub r9,0x20
|
---|
1139 | jbe NEAR $L$even_tail
|
---|
1140 | nop
|
---|
1141 | jmp NEAR $L$mod_loop
|
---|
1142 |
|
---|
1143 | ALIGN 32
|
---|
1144 | $L$mod_loop:
|
---|
1145 | movdqa xmm1,xmm0
|
---|
1146 | movdqa xmm8,xmm4
|
---|
1147 | pshufd xmm4,xmm0,78
|
---|
1148 | pxor xmm4,xmm0
|
---|
1149 |
|
---|
1150 | DB 102,15,58,68,198,0
|
---|
1151 | DB 102,15,58,68,206,17
|
---|
1152 | DB 102,15,58,68,231,16
|
---|
1153 |
|
---|
1154 | pxor xmm0,xmm3
|
---|
1155 | pxor xmm1,xmm5
|
---|
1156 | movdqu xmm9,XMMWORD[r8]
|
---|
1157 | pxor xmm8,xmm0
|
---|
1158 | DB 102,69,15,56,0,202
|
---|
1159 | movdqu xmm3,XMMWORD[16+r8]
|
---|
1160 |
|
---|
1161 | pxor xmm8,xmm1
|
---|
1162 | pxor xmm1,xmm9
|
---|
1163 | pxor xmm4,xmm8
|
---|
1164 | DB 102,65,15,56,0,218
|
---|
1165 | movdqa xmm8,xmm4
|
---|
1166 | psrldq xmm8,8
|
---|
1167 | pslldq xmm4,8
|
---|
1168 | pxor xmm1,xmm8
|
---|
1169 | pxor xmm0,xmm4
|
---|
1170 |
|
---|
1171 | movdqa xmm5,xmm3
|
---|
1172 |
|
---|
1173 | movdqa xmm9,xmm0
|
---|
1174 | movdqa xmm8,xmm0
|
---|
1175 | psllq xmm0,5
|
---|
1176 | pxor xmm8,xmm0
|
---|
1177 | DB 102,15,58,68,218,0
|
---|
1178 | psllq xmm0,1
|
---|
1179 | pxor xmm0,xmm8
|
---|
1180 | psllq xmm0,57
|
---|
1181 | movdqa xmm8,xmm0
|
---|
1182 | pslldq xmm0,8
|
---|
1183 | psrldq xmm8,8
|
---|
1184 | pxor xmm0,xmm9
|
---|
1185 | pshufd xmm4,xmm5,78
|
---|
1186 | pxor xmm1,xmm8
|
---|
1187 | pxor xmm4,xmm5
|
---|
1188 |
|
---|
1189 | movdqa xmm9,xmm0
|
---|
1190 | psrlq xmm0,1
|
---|
1191 | DB 102,15,58,68,234,17
|
---|
1192 | pxor xmm1,xmm9
|
---|
1193 | pxor xmm9,xmm0
|
---|
1194 | psrlq xmm0,5
|
---|
1195 | pxor xmm0,xmm9
|
---|
1196 | lea r8,[32+r8]
|
---|
1197 | psrlq xmm0,1
|
---|
1198 | DB 102,15,58,68,231,0
|
---|
1199 | pxor xmm0,xmm1
|
---|
1200 |
|
---|
1201 | sub r9,0x20
|
---|
1202 | ja NEAR $L$mod_loop
|
---|
1203 |
|
---|
1204 | $L$even_tail:
|
---|
1205 | movdqa xmm1,xmm0
|
---|
1206 | movdqa xmm8,xmm4
|
---|
1207 | pshufd xmm4,xmm0,78
|
---|
1208 | pxor xmm4,xmm0
|
---|
1209 |
|
---|
1210 | DB 102,15,58,68,198,0
|
---|
1211 | DB 102,15,58,68,206,17
|
---|
1212 | DB 102,15,58,68,231,16
|
---|
1213 |
|
---|
1214 | pxor xmm0,xmm3
|
---|
1215 | pxor xmm1,xmm5
|
---|
1216 | pxor xmm8,xmm0
|
---|
1217 | pxor xmm8,xmm1
|
---|
1218 | pxor xmm4,xmm8
|
---|
1219 | movdqa xmm8,xmm4
|
---|
1220 | psrldq xmm8,8
|
---|
1221 | pslldq xmm4,8
|
---|
1222 | pxor xmm1,xmm8
|
---|
1223 | pxor xmm0,xmm4
|
---|
1224 |
|
---|
1225 | movdqa xmm4,xmm0
|
---|
1226 | movdqa xmm3,xmm0
|
---|
1227 | psllq xmm0,5
|
---|
1228 | pxor xmm3,xmm0
|
---|
1229 | psllq xmm0,1
|
---|
1230 | pxor xmm0,xmm3
|
---|
1231 | psllq xmm0,57
|
---|
1232 | movdqa xmm3,xmm0
|
---|
1233 | pslldq xmm0,8
|
---|
1234 | psrldq xmm3,8
|
---|
1235 | pxor xmm0,xmm4
|
---|
1236 | pxor xmm1,xmm3
|
---|
1237 |
|
---|
1238 |
|
---|
1239 | movdqa xmm4,xmm0
|
---|
1240 | psrlq xmm0,1
|
---|
1241 | pxor xmm1,xmm4
|
---|
1242 | pxor xmm4,xmm0
|
---|
1243 | psrlq xmm0,5
|
---|
1244 | pxor xmm0,xmm4
|
---|
1245 | psrlq xmm0,1
|
---|
1246 | pxor xmm0,xmm1
|
---|
1247 | test r9,r9
|
---|
1248 | jnz NEAR $L$done
|
---|
1249 |
|
---|
1250 | $L$odd_tail:
|
---|
1251 | movdqu xmm8,XMMWORD[r8]
|
---|
1252 | DB 102,69,15,56,0,194
|
---|
1253 | pxor xmm0,xmm8
|
---|
1254 | movdqa xmm1,xmm0
|
---|
1255 | pshufd xmm3,xmm0,78
|
---|
1256 | pxor xmm3,xmm0
|
---|
1257 | DB 102,15,58,68,194,0
|
---|
1258 | DB 102,15,58,68,202,17
|
---|
1259 | DB 102,15,58,68,223,0
|
---|
1260 | pxor xmm3,xmm0
|
---|
1261 | pxor xmm3,xmm1
|
---|
1262 |
|
---|
1263 | movdqa xmm4,xmm3
|
---|
1264 | psrldq xmm3,8
|
---|
1265 | pslldq xmm4,8
|
---|
1266 | pxor xmm1,xmm3
|
---|
1267 | pxor xmm0,xmm4
|
---|
1268 |
|
---|
1269 | movdqa xmm4,xmm0
|
---|
1270 | movdqa xmm3,xmm0
|
---|
1271 | psllq xmm0,5
|
---|
1272 | pxor xmm3,xmm0
|
---|
1273 | psllq xmm0,1
|
---|
1274 | pxor xmm0,xmm3
|
---|
1275 | psllq xmm0,57
|
---|
1276 | movdqa xmm3,xmm0
|
---|
1277 | pslldq xmm0,8
|
---|
1278 | psrldq xmm3,8
|
---|
1279 | pxor xmm0,xmm4
|
---|
1280 | pxor xmm1,xmm3
|
---|
1281 |
|
---|
1282 |
|
---|
1283 | movdqa xmm4,xmm0
|
---|
1284 | psrlq xmm0,1
|
---|
1285 | pxor xmm1,xmm4
|
---|
1286 | pxor xmm4,xmm0
|
---|
1287 | psrlq xmm0,5
|
---|
1288 | pxor xmm0,xmm4
|
---|
1289 | psrlq xmm0,1
|
---|
1290 | pxor xmm0,xmm1
|
---|
1291 | $L$done:
|
---|
1292 | DB 102,65,15,56,0,194
|
---|
1293 | movdqu XMMWORD[rcx],xmm0
|
---|
1294 | movaps xmm6,XMMWORD[rsp]
|
---|
1295 | movaps xmm7,XMMWORD[16+rsp]
|
---|
1296 | movaps xmm8,XMMWORD[32+rsp]
|
---|
1297 | movaps xmm9,XMMWORD[48+rsp]
|
---|
1298 | movaps xmm10,XMMWORD[64+rsp]
|
---|
1299 | movaps xmm11,XMMWORD[80+rsp]
|
---|
1300 | movaps xmm12,XMMWORD[96+rsp]
|
---|
1301 | movaps xmm13,XMMWORD[112+rsp]
|
---|
1302 | movaps xmm14,XMMWORD[128+rsp]
|
---|
1303 | movaps xmm15,XMMWORD[144+rsp]
|
---|
1304 | lea rsp,[168+rsp]
|
---|
1305 | $L$SEH_end_gcm_ghash_clmul:
|
---|
1306 | DB 0F3h,0C3h ;repret
|
---|
1307 |
|
---|
1308 | global gcm_init_avx
|
---|
1309 |
|
---|
1310 | ALIGN 32
|
---|
1311 | gcm_init_avx:
|
---|
1312 | $L$SEH_begin_gcm_init_avx:
|
---|
1313 |
|
---|
1314 | DB 0x48,0x83,0xec,0x18
|
---|
1315 | DB 0x0f,0x29,0x34,0x24
|
---|
1316 | vzeroupper
|
---|
1317 |
|
---|
1318 | vmovdqu xmm2,XMMWORD[rdx]
|
---|
1319 | vpshufd xmm2,xmm2,78
|
---|
1320 |
|
---|
1321 |
|
---|
1322 | vpshufd xmm4,xmm2,255
|
---|
1323 | vpsrlq xmm3,xmm2,63
|
---|
1324 | vpsllq xmm2,xmm2,1
|
---|
1325 | vpxor xmm5,xmm5,xmm5
|
---|
1326 | vpcmpgtd xmm5,xmm5,xmm4
|
---|
1327 | vpslldq xmm3,xmm3,8
|
---|
1328 | vpor xmm2,xmm2,xmm3
|
---|
1329 |
|
---|
1330 |
|
---|
1331 | vpand xmm5,xmm5,XMMWORD[$L$0x1c2_polynomial]
|
---|
1332 | vpxor xmm2,xmm2,xmm5
|
---|
1333 |
|
---|
1334 | vpunpckhqdq xmm6,xmm2,xmm2
|
---|
1335 | vmovdqa xmm0,xmm2
|
---|
1336 | vpxor xmm6,xmm6,xmm2
|
---|
1337 | mov r10,4
|
---|
1338 | jmp NEAR $L$init_start_avx
|
---|
1339 | ALIGN 32
|
---|
1340 | $L$init_loop_avx:
|
---|
1341 | vpalignr xmm5,xmm4,xmm3,8
|
---|
1342 | vmovdqu XMMWORD[(-16)+rcx],xmm5
|
---|
1343 | vpunpckhqdq xmm3,xmm0,xmm0
|
---|
1344 | vpxor xmm3,xmm3,xmm0
|
---|
1345 | vpclmulqdq xmm1,xmm0,xmm2,0x11
|
---|
1346 | vpclmulqdq xmm0,xmm0,xmm2,0x00
|
---|
1347 | vpclmulqdq xmm3,xmm3,xmm6,0x00
|
---|
1348 | vpxor xmm4,xmm1,xmm0
|
---|
1349 | vpxor xmm3,xmm3,xmm4
|
---|
1350 |
|
---|
1351 | vpslldq xmm4,xmm3,8
|
---|
1352 | vpsrldq xmm3,xmm3,8
|
---|
1353 | vpxor xmm0,xmm0,xmm4
|
---|
1354 | vpxor xmm1,xmm1,xmm3
|
---|
1355 | vpsllq xmm3,xmm0,57
|
---|
1356 | vpsllq xmm4,xmm0,62
|
---|
1357 | vpxor xmm4,xmm4,xmm3
|
---|
1358 | vpsllq xmm3,xmm0,63
|
---|
1359 | vpxor xmm4,xmm4,xmm3
|
---|
1360 | vpslldq xmm3,xmm4,8
|
---|
1361 | vpsrldq xmm4,xmm4,8
|
---|
1362 | vpxor xmm0,xmm0,xmm3
|
---|
1363 | vpxor xmm1,xmm1,xmm4
|
---|
1364 |
|
---|
1365 | vpsrlq xmm4,xmm0,1
|
---|
1366 | vpxor xmm1,xmm1,xmm0
|
---|
1367 | vpxor xmm0,xmm0,xmm4
|
---|
1368 | vpsrlq xmm4,xmm4,5
|
---|
1369 | vpxor xmm0,xmm0,xmm4
|
---|
1370 | vpsrlq xmm0,xmm0,1
|
---|
1371 | vpxor xmm0,xmm0,xmm1
|
---|
1372 | $L$init_start_avx:
|
---|
1373 | vmovdqa xmm5,xmm0
|
---|
1374 | vpunpckhqdq xmm3,xmm0,xmm0
|
---|
1375 | vpxor xmm3,xmm3,xmm0
|
---|
1376 | vpclmulqdq xmm1,xmm0,xmm2,0x11
|
---|
1377 | vpclmulqdq xmm0,xmm0,xmm2,0x00
|
---|
1378 | vpclmulqdq xmm3,xmm3,xmm6,0x00
|
---|
1379 | vpxor xmm4,xmm1,xmm0
|
---|
1380 | vpxor xmm3,xmm3,xmm4
|
---|
1381 |
|
---|
1382 | vpslldq xmm4,xmm3,8
|
---|
1383 | vpsrldq xmm3,xmm3,8
|
---|
1384 | vpxor xmm0,xmm0,xmm4
|
---|
1385 | vpxor xmm1,xmm1,xmm3
|
---|
1386 | vpsllq xmm3,xmm0,57
|
---|
1387 | vpsllq xmm4,xmm0,62
|
---|
1388 | vpxor xmm4,xmm4,xmm3
|
---|
1389 | vpsllq xmm3,xmm0,63
|
---|
1390 | vpxor xmm4,xmm4,xmm3
|
---|
1391 | vpslldq xmm3,xmm4,8
|
---|
1392 | vpsrldq xmm4,xmm4,8
|
---|
1393 | vpxor xmm0,xmm0,xmm3
|
---|
1394 | vpxor xmm1,xmm1,xmm4
|
---|
1395 |
|
---|
1396 | vpsrlq xmm4,xmm0,1
|
---|
1397 | vpxor xmm1,xmm1,xmm0
|
---|
1398 | vpxor xmm0,xmm0,xmm4
|
---|
1399 | vpsrlq xmm4,xmm4,5
|
---|
1400 | vpxor xmm0,xmm0,xmm4
|
---|
1401 | vpsrlq xmm0,xmm0,1
|
---|
1402 | vpxor xmm0,xmm0,xmm1
|
---|
1403 | vpshufd xmm3,xmm5,78
|
---|
1404 | vpshufd xmm4,xmm0,78
|
---|
1405 | vpxor xmm3,xmm3,xmm5
|
---|
1406 | vmovdqu XMMWORD[rcx],xmm5
|
---|
1407 | vpxor xmm4,xmm4,xmm0
|
---|
1408 | vmovdqu XMMWORD[16+rcx],xmm0
|
---|
1409 | lea rcx,[48+rcx]
|
---|
1410 | sub r10,1
|
---|
1411 | jnz NEAR $L$init_loop_avx
|
---|
1412 |
|
---|
1413 | vpalignr xmm5,xmm3,xmm4,8
|
---|
1414 | vmovdqu XMMWORD[(-16)+rcx],xmm5
|
---|
1415 |
|
---|
1416 | vzeroupper
|
---|
1417 | movaps xmm6,XMMWORD[rsp]
|
---|
1418 | lea rsp,[24+rsp]
|
---|
1419 | $L$SEH_end_gcm_init_avx:
|
---|
1420 | DB 0F3h,0C3h ;repret
|
---|
1421 |
|
---|
1422 | global gcm_gmult_avx
|
---|
1423 |
|
---|
1424 | ALIGN 32
|
---|
1425 | gcm_gmult_avx:
|
---|
1426 | jmp NEAR $L$_gmult_clmul
|
---|
1427 |
|
---|
1428 | global gcm_ghash_avx
|
---|
1429 |
|
---|
1430 | ALIGN 32
|
---|
1431 | gcm_ghash_avx:
|
---|
1432 | lea rax,[((-136))+rsp]
|
---|
1433 | $L$SEH_begin_gcm_ghash_avx:
|
---|
1434 |
|
---|
1435 | DB 0x48,0x8d,0x60,0xe0
|
---|
1436 | DB 0x0f,0x29,0x70,0xe0
|
---|
1437 | DB 0x0f,0x29,0x78,0xf0
|
---|
1438 | DB 0x44,0x0f,0x29,0x00
|
---|
1439 | DB 0x44,0x0f,0x29,0x48,0x10
|
---|
1440 | DB 0x44,0x0f,0x29,0x50,0x20
|
---|
1441 | DB 0x44,0x0f,0x29,0x58,0x30
|
---|
1442 | DB 0x44,0x0f,0x29,0x60,0x40
|
---|
1443 | DB 0x44,0x0f,0x29,0x68,0x50
|
---|
1444 | DB 0x44,0x0f,0x29,0x70,0x60
|
---|
1445 | DB 0x44,0x0f,0x29,0x78,0x70
|
---|
1446 | vzeroupper
|
---|
1447 |
|
---|
1448 | vmovdqu xmm10,XMMWORD[rcx]
|
---|
1449 | lea r10,[$L$0x1c2_polynomial]
|
---|
1450 | lea rdx,[64+rdx]
|
---|
1451 | vmovdqu xmm13,XMMWORD[$L$bswap_mask]
|
---|
1452 | vpshufb xmm10,xmm10,xmm13
|
---|
1453 | cmp r9,0x80
|
---|
1454 | jb NEAR $L$short_avx
|
---|
1455 | sub r9,0x80
|
---|
1456 |
|
---|
1457 | vmovdqu xmm14,XMMWORD[112+r8]
|
---|
1458 | vmovdqu xmm6,XMMWORD[((0-64))+rdx]
|
---|
1459 | vpshufb xmm14,xmm14,xmm13
|
---|
1460 | vmovdqu xmm7,XMMWORD[((32-64))+rdx]
|
---|
1461 |
|
---|
1462 | vpunpckhqdq xmm9,xmm14,xmm14
|
---|
1463 | vmovdqu xmm15,XMMWORD[96+r8]
|
---|
1464 | vpclmulqdq xmm0,xmm14,xmm6,0x00
|
---|
1465 | vpxor xmm9,xmm9,xmm14
|
---|
1466 | vpshufb xmm15,xmm15,xmm13
|
---|
1467 | vpclmulqdq xmm1,xmm14,xmm6,0x11
|
---|
1468 | vmovdqu xmm6,XMMWORD[((16-64))+rdx]
|
---|
1469 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1470 | vmovdqu xmm14,XMMWORD[80+r8]
|
---|
1471 | vpclmulqdq xmm2,xmm9,xmm7,0x00
|
---|
1472 | vpxor xmm8,xmm8,xmm15
|
---|
1473 |
|
---|
1474 | vpshufb xmm14,xmm14,xmm13
|
---|
1475 | vpclmulqdq xmm3,xmm15,xmm6,0x00
|
---|
1476 | vpunpckhqdq xmm9,xmm14,xmm14
|
---|
1477 | vpclmulqdq xmm4,xmm15,xmm6,0x11
|
---|
1478 | vmovdqu xmm6,XMMWORD[((48-64))+rdx]
|
---|
1479 | vpxor xmm9,xmm9,xmm14
|
---|
1480 | vmovdqu xmm15,XMMWORD[64+r8]
|
---|
1481 | vpclmulqdq xmm5,xmm8,xmm7,0x10
|
---|
1482 | vmovdqu xmm7,XMMWORD[((80-64))+rdx]
|
---|
1483 |
|
---|
1484 | vpshufb xmm15,xmm15,xmm13
|
---|
1485 | vpxor xmm3,xmm3,xmm0
|
---|
1486 | vpclmulqdq xmm0,xmm14,xmm6,0x00
|
---|
1487 | vpxor xmm4,xmm4,xmm1
|
---|
1488 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1489 | vpclmulqdq xmm1,xmm14,xmm6,0x11
|
---|
1490 | vmovdqu xmm6,XMMWORD[((64-64))+rdx]
|
---|
1491 | vpxor xmm5,xmm5,xmm2
|
---|
1492 | vpclmulqdq xmm2,xmm9,xmm7,0x00
|
---|
1493 | vpxor xmm8,xmm8,xmm15
|
---|
1494 |
|
---|
1495 | vmovdqu xmm14,XMMWORD[48+r8]
|
---|
1496 | vpxor xmm0,xmm0,xmm3
|
---|
1497 | vpclmulqdq xmm3,xmm15,xmm6,0x00
|
---|
1498 | vpxor xmm1,xmm1,xmm4
|
---|
1499 | vpshufb xmm14,xmm14,xmm13
|
---|
1500 | vpclmulqdq xmm4,xmm15,xmm6,0x11
|
---|
1501 | vmovdqu xmm6,XMMWORD[((96-64))+rdx]
|
---|
1502 | vpxor xmm2,xmm2,xmm5
|
---|
1503 | vpunpckhqdq xmm9,xmm14,xmm14
|
---|
1504 | vpclmulqdq xmm5,xmm8,xmm7,0x10
|
---|
1505 | vmovdqu xmm7,XMMWORD[((128-64))+rdx]
|
---|
1506 | vpxor xmm9,xmm9,xmm14
|
---|
1507 |
|
---|
1508 | vmovdqu xmm15,XMMWORD[32+r8]
|
---|
1509 | vpxor xmm3,xmm3,xmm0
|
---|
1510 | vpclmulqdq xmm0,xmm14,xmm6,0x00
|
---|
1511 | vpxor xmm4,xmm4,xmm1
|
---|
1512 | vpshufb xmm15,xmm15,xmm13
|
---|
1513 | vpclmulqdq xmm1,xmm14,xmm6,0x11
|
---|
1514 | vmovdqu xmm6,XMMWORD[((112-64))+rdx]
|
---|
1515 | vpxor xmm5,xmm5,xmm2
|
---|
1516 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1517 | vpclmulqdq xmm2,xmm9,xmm7,0x00
|
---|
1518 | vpxor xmm8,xmm8,xmm15
|
---|
1519 |
|
---|
1520 | vmovdqu xmm14,XMMWORD[16+r8]
|
---|
1521 | vpxor xmm0,xmm0,xmm3
|
---|
1522 | vpclmulqdq xmm3,xmm15,xmm6,0x00
|
---|
1523 | vpxor xmm1,xmm1,xmm4
|
---|
1524 | vpshufb xmm14,xmm14,xmm13
|
---|
1525 | vpclmulqdq xmm4,xmm15,xmm6,0x11
|
---|
1526 | vmovdqu xmm6,XMMWORD[((144-64))+rdx]
|
---|
1527 | vpxor xmm2,xmm2,xmm5
|
---|
1528 | vpunpckhqdq xmm9,xmm14,xmm14
|
---|
1529 | vpclmulqdq xmm5,xmm8,xmm7,0x10
|
---|
1530 | vmovdqu xmm7,XMMWORD[((176-64))+rdx]
|
---|
1531 | vpxor xmm9,xmm9,xmm14
|
---|
1532 |
|
---|
1533 | vmovdqu xmm15,XMMWORD[r8]
|
---|
1534 | vpxor xmm3,xmm3,xmm0
|
---|
1535 | vpclmulqdq xmm0,xmm14,xmm6,0x00
|
---|
1536 | vpxor xmm4,xmm4,xmm1
|
---|
1537 | vpshufb xmm15,xmm15,xmm13
|
---|
1538 | vpclmulqdq xmm1,xmm14,xmm6,0x11
|
---|
1539 | vmovdqu xmm6,XMMWORD[((160-64))+rdx]
|
---|
1540 | vpxor xmm5,xmm5,xmm2
|
---|
1541 | vpclmulqdq xmm2,xmm9,xmm7,0x10
|
---|
1542 |
|
---|
1543 | lea r8,[128+r8]
|
---|
1544 | cmp r9,0x80
|
---|
1545 | jb NEAR $L$tail_avx
|
---|
1546 |
|
---|
1547 | vpxor xmm15,xmm15,xmm10
|
---|
1548 | sub r9,0x80
|
---|
1549 | jmp NEAR $L$oop8x_avx
|
---|
1550 |
|
---|
1551 | ALIGN 32
|
---|
1552 | $L$oop8x_avx:
|
---|
1553 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1554 | vmovdqu xmm14,XMMWORD[112+r8]
|
---|
1555 | vpxor xmm3,xmm3,xmm0
|
---|
1556 | vpxor xmm8,xmm8,xmm15
|
---|
1557 | vpclmulqdq xmm10,xmm15,xmm6,0x00
|
---|
1558 | vpshufb xmm14,xmm14,xmm13
|
---|
1559 | vpxor xmm4,xmm4,xmm1
|
---|
1560 | vpclmulqdq xmm11,xmm15,xmm6,0x11
|
---|
1561 | vmovdqu xmm6,XMMWORD[((0-64))+rdx]
|
---|
1562 | vpunpckhqdq xmm9,xmm14,xmm14
|
---|
1563 | vpxor xmm5,xmm5,xmm2
|
---|
1564 | vpclmulqdq xmm12,xmm8,xmm7,0x00
|
---|
1565 | vmovdqu xmm7,XMMWORD[((32-64))+rdx]
|
---|
1566 | vpxor xmm9,xmm9,xmm14
|
---|
1567 |
|
---|
1568 | vmovdqu xmm15,XMMWORD[96+r8]
|
---|
1569 | vpclmulqdq xmm0,xmm14,xmm6,0x00
|
---|
1570 | vpxor xmm10,xmm10,xmm3
|
---|
1571 | vpshufb xmm15,xmm15,xmm13
|
---|
1572 | vpclmulqdq xmm1,xmm14,xmm6,0x11
|
---|
1573 | vxorps xmm11,xmm11,xmm4
|
---|
1574 | vmovdqu xmm6,XMMWORD[((16-64))+rdx]
|
---|
1575 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1576 | vpclmulqdq xmm2,xmm9,xmm7,0x00
|
---|
1577 | vpxor xmm12,xmm12,xmm5
|
---|
1578 | vxorps xmm8,xmm8,xmm15
|
---|
1579 |
|
---|
1580 | vmovdqu xmm14,XMMWORD[80+r8]
|
---|
1581 | vpxor xmm12,xmm12,xmm10
|
---|
1582 | vpclmulqdq xmm3,xmm15,xmm6,0x00
|
---|
1583 | vpxor xmm12,xmm12,xmm11
|
---|
1584 | vpslldq xmm9,xmm12,8
|
---|
1585 | vpxor xmm3,xmm3,xmm0
|
---|
1586 | vpclmulqdq xmm4,xmm15,xmm6,0x11
|
---|
1587 | vpsrldq xmm12,xmm12,8
|
---|
1588 | vpxor xmm10,xmm10,xmm9
|
---|
1589 | vmovdqu xmm6,XMMWORD[((48-64))+rdx]
|
---|
1590 | vpshufb xmm14,xmm14,xmm13
|
---|
1591 | vxorps xmm11,xmm11,xmm12
|
---|
1592 | vpxor xmm4,xmm4,xmm1
|
---|
1593 | vpunpckhqdq xmm9,xmm14,xmm14
|
---|
1594 | vpclmulqdq xmm5,xmm8,xmm7,0x10
|
---|
1595 | vmovdqu xmm7,XMMWORD[((80-64))+rdx]
|
---|
1596 | vpxor xmm9,xmm9,xmm14
|
---|
1597 | vpxor xmm5,xmm5,xmm2
|
---|
1598 |
|
---|
1599 | vmovdqu xmm15,XMMWORD[64+r8]
|
---|
1600 | vpalignr xmm12,xmm10,xmm10,8
|
---|
1601 | vpclmulqdq xmm0,xmm14,xmm6,0x00
|
---|
1602 | vpshufb xmm15,xmm15,xmm13
|
---|
1603 | vpxor xmm0,xmm0,xmm3
|
---|
1604 | vpclmulqdq xmm1,xmm14,xmm6,0x11
|
---|
1605 | vmovdqu xmm6,XMMWORD[((64-64))+rdx]
|
---|
1606 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1607 | vpxor xmm1,xmm1,xmm4
|
---|
1608 | vpclmulqdq xmm2,xmm9,xmm7,0x00
|
---|
1609 | vxorps xmm8,xmm8,xmm15
|
---|
1610 | vpxor xmm2,xmm2,xmm5
|
---|
1611 |
|
---|
1612 | vmovdqu xmm14,XMMWORD[48+r8]
|
---|
1613 | vpclmulqdq xmm10,xmm10,XMMWORD[r10],0x10
|
---|
1614 | vpclmulqdq xmm3,xmm15,xmm6,0x00
|
---|
1615 | vpshufb xmm14,xmm14,xmm13
|
---|
1616 | vpxor xmm3,xmm3,xmm0
|
---|
1617 | vpclmulqdq xmm4,xmm15,xmm6,0x11
|
---|
1618 | vmovdqu xmm6,XMMWORD[((96-64))+rdx]
|
---|
1619 | vpunpckhqdq xmm9,xmm14,xmm14
|
---|
1620 | vpxor xmm4,xmm4,xmm1
|
---|
1621 | vpclmulqdq xmm5,xmm8,xmm7,0x10
|
---|
1622 | vmovdqu xmm7,XMMWORD[((128-64))+rdx]
|
---|
1623 | vpxor xmm9,xmm9,xmm14
|
---|
1624 | vpxor xmm5,xmm5,xmm2
|
---|
1625 |
|
---|
1626 | vmovdqu xmm15,XMMWORD[32+r8]
|
---|
1627 | vpclmulqdq xmm0,xmm14,xmm6,0x00
|
---|
1628 | vpshufb xmm15,xmm15,xmm13
|
---|
1629 | vpxor xmm0,xmm0,xmm3
|
---|
1630 | vpclmulqdq xmm1,xmm14,xmm6,0x11
|
---|
1631 | vmovdqu xmm6,XMMWORD[((112-64))+rdx]
|
---|
1632 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1633 | vpxor xmm1,xmm1,xmm4
|
---|
1634 | vpclmulqdq xmm2,xmm9,xmm7,0x00
|
---|
1635 | vpxor xmm8,xmm8,xmm15
|
---|
1636 | vpxor xmm2,xmm2,xmm5
|
---|
1637 | vxorps xmm10,xmm10,xmm12
|
---|
1638 |
|
---|
1639 | vmovdqu xmm14,XMMWORD[16+r8]
|
---|
1640 | vpalignr xmm12,xmm10,xmm10,8
|
---|
1641 | vpclmulqdq xmm3,xmm15,xmm6,0x00
|
---|
1642 | vpshufb xmm14,xmm14,xmm13
|
---|
1643 | vpxor xmm3,xmm3,xmm0
|
---|
1644 | vpclmulqdq xmm4,xmm15,xmm6,0x11
|
---|
1645 | vmovdqu xmm6,XMMWORD[((144-64))+rdx]
|
---|
1646 | vpclmulqdq xmm10,xmm10,XMMWORD[r10],0x10
|
---|
1647 | vxorps xmm12,xmm12,xmm11
|
---|
1648 | vpunpckhqdq xmm9,xmm14,xmm14
|
---|
1649 | vpxor xmm4,xmm4,xmm1
|
---|
1650 | vpclmulqdq xmm5,xmm8,xmm7,0x10
|
---|
1651 | vmovdqu xmm7,XMMWORD[((176-64))+rdx]
|
---|
1652 | vpxor xmm9,xmm9,xmm14
|
---|
1653 | vpxor xmm5,xmm5,xmm2
|
---|
1654 |
|
---|
1655 | vmovdqu xmm15,XMMWORD[r8]
|
---|
1656 | vpclmulqdq xmm0,xmm14,xmm6,0x00
|
---|
1657 | vpshufb xmm15,xmm15,xmm13
|
---|
1658 | vpclmulqdq xmm1,xmm14,xmm6,0x11
|
---|
1659 | vmovdqu xmm6,XMMWORD[((160-64))+rdx]
|
---|
1660 | vpxor xmm15,xmm15,xmm12
|
---|
1661 | vpclmulqdq xmm2,xmm9,xmm7,0x10
|
---|
1662 | vpxor xmm15,xmm15,xmm10
|
---|
1663 |
|
---|
1664 | lea r8,[128+r8]
|
---|
1665 | sub r9,0x80
|
---|
1666 | jnc NEAR $L$oop8x_avx
|
---|
1667 |
|
---|
1668 | add r9,0x80
|
---|
1669 | jmp NEAR $L$tail_no_xor_avx
|
---|
1670 |
|
---|
1671 | ALIGN 32
|
---|
1672 | $L$short_avx:
|
---|
1673 | vmovdqu xmm14,XMMWORD[((-16))+r9*1+r8]
|
---|
1674 | lea r8,[r9*1+r8]
|
---|
1675 | vmovdqu xmm6,XMMWORD[((0-64))+rdx]
|
---|
1676 | vmovdqu xmm7,XMMWORD[((32-64))+rdx]
|
---|
1677 | vpshufb xmm15,xmm14,xmm13
|
---|
1678 |
|
---|
1679 | vmovdqa xmm3,xmm0
|
---|
1680 | vmovdqa xmm4,xmm1
|
---|
1681 | vmovdqa xmm5,xmm2
|
---|
1682 | sub r9,0x10
|
---|
1683 | jz NEAR $L$tail_avx
|
---|
1684 |
|
---|
1685 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1686 | vpxor xmm3,xmm3,xmm0
|
---|
1687 | vpclmulqdq xmm0,xmm15,xmm6,0x00
|
---|
1688 | vpxor xmm8,xmm8,xmm15
|
---|
1689 | vmovdqu xmm14,XMMWORD[((-32))+r8]
|
---|
1690 | vpxor xmm4,xmm4,xmm1
|
---|
1691 | vpclmulqdq xmm1,xmm15,xmm6,0x11
|
---|
1692 | vmovdqu xmm6,XMMWORD[((16-64))+rdx]
|
---|
1693 | vpshufb xmm15,xmm14,xmm13
|
---|
1694 | vpxor xmm5,xmm5,xmm2
|
---|
1695 | vpclmulqdq xmm2,xmm8,xmm7,0x00
|
---|
1696 | vpsrldq xmm7,xmm7,8
|
---|
1697 | sub r9,0x10
|
---|
1698 | jz NEAR $L$tail_avx
|
---|
1699 |
|
---|
1700 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1701 | vpxor xmm3,xmm3,xmm0
|
---|
1702 | vpclmulqdq xmm0,xmm15,xmm6,0x00
|
---|
1703 | vpxor xmm8,xmm8,xmm15
|
---|
1704 | vmovdqu xmm14,XMMWORD[((-48))+r8]
|
---|
1705 | vpxor xmm4,xmm4,xmm1
|
---|
1706 | vpclmulqdq xmm1,xmm15,xmm6,0x11
|
---|
1707 | vmovdqu xmm6,XMMWORD[((48-64))+rdx]
|
---|
1708 | vpshufb xmm15,xmm14,xmm13
|
---|
1709 | vpxor xmm5,xmm5,xmm2
|
---|
1710 | vpclmulqdq xmm2,xmm8,xmm7,0x00
|
---|
1711 | vmovdqu xmm7,XMMWORD[((80-64))+rdx]
|
---|
1712 | sub r9,0x10
|
---|
1713 | jz NEAR $L$tail_avx
|
---|
1714 |
|
---|
1715 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1716 | vpxor xmm3,xmm3,xmm0
|
---|
1717 | vpclmulqdq xmm0,xmm15,xmm6,0x00
|
---|
1718 | vpxor xmm8,xmm8,xmm15
|
---|
1719 | vmovdqu xmm14,XMMWORD[((-64))+r8]
|
---|
1720 | vpxor xmm4,xmm4,xmm1
|
---|
1721 | vpclmulqdq xmm1,xmm15,xmm6,0x11
|
---|
1722 | vmovdqu xmm6,XMMWORD[((64-64))+rdx]
|
---|
1723 | vpshufb xmm15,xmm14,xmm13
|
---|
1724 | vpxor xmm5,xmm5,xmm2
|
---|
1725 | vpclmulqdq xmm2,xmm8,xmm7,0x00
|
---|
1726 | vpsrldq xmm7,xmm7,8
|
---|
1727 | sub r9,0x10
|
---|
1728 | jz NEAR $L$tail_avx
|
---|
1729 |
|
---|
1730 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1731 | vpxor xmm3,xmm3,xmm0
|
---|
1732 | vpclmulqdq xmm0,xmm15,xmm6,0x00
|
---|
1733 | vpxor xmm8,xmm8,xmm15
|
---|
1734 | vmovdqu xmm14,XMMWORD[((-80))+r8]
|
---|
1735 | vpxor xmm4,xmm4,xmm1
|
---|
1736 | vpclmulqdq xmm1,xmm15,xmm6,0x11
|
---|
1737 | vmovdqu xmm6,XMMWORD[((96-64))+rdx]
|
---|
1738 | vpshufb xmm15,xmm14,xmm13
|
---|
1739 | vpxor xmm5,xmm5,xmm2
|
---|
1740 | vpclmulqdq xmm2,xmm8,xmm7,0x00
|
---|
1741 | vmovdqu xmm7,XMMWORD[((128-64))+rdx]
|
---|
1742 | sub r9,0x10
|
---|
1743 | jz NEAR $L$tail_avx
|
---|
1744 |
|
---|
1745 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1746 | vpxor xmm3,xmm3,xmm0
|
---|
1747 | vpclmulqdq xmm0,xmm15,xmm6,0x00
|
---|
1748 | vpxor xmm8,xmm8,xmm15
|
---|
1749 | vmovdqu xmm14,XMMWORD[((-96))+r8]
|
---|
1750 | vpxor xmm4,xmm4,xmm1
|
---|
1751 | vpclmulqdq xmm1,xmm15,xmm6,0x11
|
---|
1752 | vmovdqu xmm6,XMMWORD[((112-64))+rdx]
|
---|
1753 | vpshufb xmm15,xmm14,xmm13
|
---|
1754 | vpxor xmm5,xmm5,xmm2
|
---|
1755 | vpclmulqdq xmm2,xmm8,xmm7,0x00
|
---|
1756 | vpsrldq xmm7,xmm7,8
|
---|
1757 | sub r9,0x10
|
---|
1758 | jz NEAR $L$tail_avx
|
---|
1759 |
|
---|
1760 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1761 | vpxor xmm3,xmm3,xmm0
|
---|
1762 | vpclmulqdq xmm0,xmm15,xmm6,0x00
|
---|
1763 | vpxor xmm8,xmm8,xmm15
|
---|
1764 | vmovdqu xmm14,XMMWORD[((-112))+r8]
|
---|
1765 | vpxor xmm4,xmm4,xmm1
|
---|
1766 | vpclmulqdq xmm1,xmm15,xmm6,0x11
|
---|
1767 | vmovdqu xmm6,XMMWORD[((144-64))+rdx]
|
---|
1768 | vpshufb xmm15,xmm14,xmm13
|
---|
1769 | vpxor xmm5,xmm5,xmm2
|
---|
1770 | vpclmulqdq xmm2,xmm8,xmm7,0x00
|
---|
1771 | vmovq xmm7,QWORD[((184-64))+rdx]
|
---|
1772 | sub r9,0x10
|
---|
1773 | jmp NEAR $L$tail_avx
|
---|
1774 |
|
---|
1775 | ALIGN 32
|
---|
1776 | $L$tail_avx:
|
---|
1777 | vpxor xmm15,xmm15,xmm10
|
---|
1778 | $L$tail_no_xor_avx:
|
---|
1779 | vpunpckhqdq xmm8,xmm15,xmm15
|
---|
1780 | vpxor xmm3,xmm3,xmm0
|
---|
1781 | vpclmulqdq xmm0,xmm15,xmm6,0x00
|
---|
1782 | vpxor xmm8,xmm8,xmm15
|
---|
1783 | vpxor xmm4,xmm4,xmm1
|
---|
1784 | vpclmulqdq xmm1,xmm15,xmm6,0x11
|
---|
1785 | vpxor xmm5,xmm5,xmm2
|
---|
1786 | vpclmulqdq xmm2,xmm8,xmm7,0x00
|
---|
1787 |
|
---|
1788 | vmovdqu xmm12,XMMWORD[r10]
|
---|
1789 |
|
---|
1790 | vpxor xmm10,xmm3,xmm0
|
---|
1791 | vpxor xmm11,xmm4,xmm1
|
---|
1792 | vpxor xmm5,xmm5,xmm2
|
---|
1793 |
|
---|
1794 | vpxor xmm5,xmm5,xmm10
|
---|
1795 | vpxor xmm5,xmm5,xmm11
|
---|
1796 | vpslldq xmm9,xmm5,8
|
---|
1797 | vpsrldq xmm5,xmm5,8
|
---|
1798 | vpxor xmm10,xmm10,xmm9
|
---|
1799 | vpxor xmm11,xmm11,xmm5
|
---|
1800 |
|
---|
1801 | vpclmulqdq xmm9,xmm10,xmm12,0x10
|
---|
1802 | vpalignr xmm10,xmm10,xmm10,8
|
---|
1803 | vpxor xmm10,xmm10,xmm9
|
---|
1804 |
|
---|
1805 | vpclmulqdq xmm9,xmm10,xmm12,0x10
|
---|
1806 | vpalignr xmm10,xmm10,xmm10,8
|
---|
1807 | vpxor xmm10,xmm10,xmm11
|
---|
1808 | vpxor xmm10,xmm10,xmm9
|
---|
1809 |
|
---|
1810 | cmp r9,0
|
---|
1811 | jne NEAR $L$short_avx
|
---|
1812 |
|
---|
1813 | vpshufb xmm10,xmm10,xmm13
|
---|
1814 | vmovdqu XMMWORD[rcx],xmm10
|
---|
1815 | vzeroupper
|
---|
1816 | movaps xmm6,XMMWORD[rsp]
|
---|
1817 | movaps xmm7,XMMWORD[16+rsp]
|
---|
1818 | movaps xmm8,XMMWORD[32+rsp]
|
---|
1819 | movaps xmm9,XMMWORD[48+rsp]
|
---|
1820 | movaps xmm10,XMMWORD[64+rsp]
|
---|
1821 | movaps xmm11,XMMWORD[80+rsp]
|
---|
1822 | movaps xmm12,XMMWORD[96+rsp]
|
---|
1823 | movaps xmm13,XMMWORD[112+rsp]
|
---|
1824 | movaps xmm14,XMMWORD[128+rsp]
|
---|
1825 | movaps xmm15,XMMWORD[144+rsp]
|
---|
1826 | lea rsp,[168+rsp]
|
---|
1827 | $L$SEH_end_gcm_ghash_avx:
|
---|
1828 | DB 0F3h,0C3h ;repret
|
---|
1829 |
|
---|
1830 | ALIGN 64
|
---|
1831 | $L$bswap_mask:
|
---|
1832 | DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
|
---|
1833 | $L$0x1c2_polynomial:
|
---|
1834 | DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
|
---|
1835 | $L$7_mask:
|
---|
1836 | DD 7,0,7,0
|
---|
1837 | $L$7_mask_poly:
|
---|
1838 | DD 7,0,450,0
|
---|
1839 | ALIGN 64
|
---|
1840 |
|
---|
1841 | $L$rem_4bit:
|
---|
1842 | DD 0,0,0,471859200,0,943718400,0,610271232
|
---|
1843 | DD 0,1887436800,0,1822425088,0,1220542464,0,1423966208
|
---|
1844 | DD 0,3774873600,0,4246732800,0,3644850176,0,3311403008
|
---|
1845 | DD 0,2441084928,0,2376073216,0,2847932416,0,3051356160
|
---|
1846 |
|
---|
1847 | $L$rem_8bit:
|
---|
1848 | DW 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
|
---|
1849 | DW 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
|
---|
1850 | DW 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
|
---|
1851 | DW 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
|
---|
1852 | DW 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
|
---|
1853 | DW 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
|
---|
1854 | DW 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
|
---|
1855 | DW 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
|
---|
1856 | DW 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
|
---|
1857 | DW 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
|
---|
1858 | DW 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
|
---|
1859 | DW 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
|
---|
1860 | DW 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
|
---|
1861 | DW 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
|
---|
1862 | DW 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
|
---|
1863 | DW 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
|
---|
1864 | DW 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
|
---|
1865 | DW 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
|
---|
1866 | DW 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
|
---|
1867 | DW 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
|
---|
1868 | DW 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
|
---|
1869 | DW 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
|
---|
1870 | DW 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
|
---|
1871 | DW 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
|
---|
1872 | DW 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
|
---|
1873 | DW 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
|
---|
1874 | DW 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
|
---|
1875 | DW 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
|
---|
1876 | DW 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
|
---|
1877 | DW 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
|
---|
1878 | DW 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
|
---|
1879 | DW 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
|
---|
1880 |
|
---|
1881 | DB 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
|
---|
1882 | DB 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
|
---|
1883 | DB 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
|
---|
1884 | DB 114,103,62,0
|
---|
1885 | ALIGN 64
|
---|
1886 | EXTERN __imp_RtlVirtualUnwind
|
---|
1887 |
|
---|
1888 | ALIGN 16
|
---|
1889 | se_handler:
|
---|
1890 | push rsi
|
---|
1891 | push rdi
|
---|
1892 | push rbx
|
---|
1893 | push rbp
|
---|
1894 | push r12
|
---|
1895 | push r13
|
---|
1896 | push r14
|
---|
1897 | push r15
|
---|
1898 | pushfq
|
---|
1899 | sub rsp,64
|
---|
1900 |
|
---|
1901 | mov rax,QWORD[120+r8]
|
---|
1902 | mov rbx,QWORD[248+r8]
|
---|
1903 |
|
---|
1904 | mov rsi,QWORD[8+r9]
|
---|
1905 | mov r11,QWORD[56+r9]
|
---|
1906 |
|
---|
1907 | mov r10d,DWORD[r11]
|
---|
1908 | lea r10,[r10*1+rsi]
|
---|
1909 | cmp rbx,r10
|
---|
1910 | jb NEAR $L$in_prologue
|
---|
1911 |
|
---|
1912 | mov rax,QWORD[152+r8]
|
---|
1913 |
|
---|
1914 | mov r10d,DWORD[4+r11]
|
---|
1915 | lea r10,[r10*1+rsi]
|
---|
1916 | cmp rbx,r10
|
---|
1917 | jae NEAR $L$in_prologue
|
---|
1918 |
|
---|
1919 | lea rax,[24+rax]
|
---|
1920 |
|
---|
1921 | mov rbx,QWORD[((-8))+rax]
|
---|
1922 | mov rbp,QWORD[((-16))+rax]
|
---|
1923 | mov r12,QWORD[((-24))+rax]
|
---|
1924 | mov QWORD[144+r8],rbx
|
---|
1925 | mov QWORD[160+r8],rbp
|
---|
1926 | mov QWORD[216+r8],r12
|
---|
1927 |
|
---|
1928 | $L$in_prologue:
|
---|
1929 | mov rdi,QWORD[8+rax]
|
---|
1930 | mov rsi,QWORD[16+rax]
|
---|
1931 | mov QWORD[152+r8],rax
|
---|
1932 | mov QWORD[168+r8],rsi
|
---|
1933 | mov QWORD[176+r8],rdi
|
---|
1934 |
|
---|
1935 | mov rdi,QWORD[40+r9]
|
---|
1936 | mov rsi,r8
|
---|
1937 | mov ecx,154
|
---|
1938 | DD 0xa548f3fc
|
---|
1939 |
|
---|
1940 | mov rsi,r9
|
---|
1941 | xor rcx,rcx
|
---|
1942 | mov rdx,QWORD[8+rsi]
|
---|
1943 | mov r8,QWORD[rsi]
|
---|
1944 | mov r9,QWORD[16+rsi]
|
---|
1945 | mov r10,QWORD[40+rsi]
|
---|
1946 | lea r11,[56+rsi]
|
---|
1947 | lea r12,[24+rsi]
|
---|
1948 | mov QWORD[32+rsp],r10
|
---|
1949 | mov QWORD[40+rsp],r11
|
---|
1950 | mov QWORD[48+rsp],r12
|
---|
1951 | mov QWORD[56+rsp],rcx
|
---|
1952 | call QWORD[__imp_RtlVirtualUnwind]
|
---|
1953 |
|
---|
1954 | mov eax,1
|
---|
1955 | add rsp,64
|
---|
1956 | popfq
|
---|
1957 | pop r15
|
---|
1958 | pop r14
|
---|
1959 | pop r13
|
---|
1960 | pop r12
|
---|
1961 | pop rbp
|
---|
1962 | pop rbx
|
---|
1963 | pop rdi
|
---|
1964 | pop rsi
|
---|
1965 | DB 0F3h,0C3h ;repret
|
---|
1966 |
|
---|
1967 |
|
---|
1968 | section .pdata rdata align=4
|
---|
1969 | ALIGN 4
|
---|
1970 | DD $L$SEH_begin_gcm_gmult_4bit wrt ..imagebase
|
---|
1971 | DD $L$SEH_end_gcm_gmult_4bit wrt ..imagebase
|
---|
1972 | DD $L$SEH_info_gcm_gmult_4bit wrt ..imagebase
|
---|
1973 |
|
---|
1974 | DD $L$SEH_begin_gcm_ghash_4bit wrt ..imagebase
|
---|
1975 | DD $L$SEH_end_gcm_ghash_4bit wrt ..imagebase
|
---|
1976 | DD $L$SEH_info_gcm_ghash_4bit wrt ..imagebase
|
---|
1977 |
|
---|
1978 | DD $L$SEH_begin_gcm_init_clmul wrt ..imagebase
|
---|
1979 | DD $L$SEH_end_gcm_init_clmul wrt ..imagebase
|
---|
1980 | DD $L$SEH_info_gcm_init_clmul wrt ..imagebase
|
---|
1981 |
|
---|
1982 | DD $L$SEH_begin_gcm_ghash_clmul wrt ..imagebase
|
---|
1983 | DD $L$SEH_end_gcm_ghash_clmul wrt ..imagebase
|
---|
1984 | DD $L$SEH_info_gcm_ghash_clmul wrt ..imagebase
|
---|
1985 | DD $L$SEH_begin_gcm_init_avx wrt ..imagebase
|
---|
1986 | DD $L$SEH_end_gcm_init_avx wrt ..imagebase
|
---|
1987 | DD $L$SEH_info_gcm_init_clmul wrt ..imagebase
|
---|
1988 |
|
---|
1989 | DD $L$SEH_begin_gcm_ghash_avx wrt ..imagebase
|
---|
1990 | DD $L$SEH_end_gcm_ghash_avx wrt ..imagebase
|
---|
1991 | DD $L$SEH_info_gcm_ghash_clmul wrt ..imagebase
|
---|
1992 | section .xdata rdata align=8
|
---|
1993 | ALIGN 8
|
---|
1994 | $L$SEH_info_gcm_gmult_4bit:
|
---|
1995 | DB 9,0,0,0
|
---|
1996 | DD se_handler wrt ..imagebase
|
---|
1997 | DD $L$gmult_prologue wrt ..imagebase,$L$gmult_epilogue wrt ..imagebase
|
---|
1998 | $L$SEH_info_gcm_ghash_4bit:
|
---|
1999 | DB 9,0,0,0
|
---|
2000 | DD se_handler wrt ..imagebase
|
---|
2001 | DD $L$ghash_prologue wrt ..imagebase,$L$ghash_epilogue wrt ..imagebase
|
---|
2002 | $L$SEH_info_gcm_init_clmul:
|
---|
2003 | DB 0x01,0x08,0x03,0x00
|
---|
2004 | DB 0x08,0x68,0x00,0x00
|
---|
2005 | DB 0x04,0x22,0x00,0x00
|
---|
2006 | $L$SEH_info_gcm_ghash_clmul:
|
---|
2007 | DB 0x01,0x33,0x16,0x00
|
---|
2008 | DB 0x33,0xf8,0x09,0x00
|
---|
2009 | DB 0x2e,0xe8,0x08,0x00
|
---|
2010 | DB 0x29,0xd8,0x07,0x00
|
---|
2011 | DB 0x24,0xc8,0x06,0x00
|
---|
2012 | DB 0x1f,0xb8,0x05,0x00
|
---|
2013 | DB 0x1a,0xa8,0x04,0x00
|
---|
2014 | DB 0x15,0x98,0x03,0x00
|
---|
2015 | DB 0x10,0x88,0x02,0x00
|
---|
2016 | DB 0x0c,0x78,0x01,0x00
|
---|
2017 | DB 0x08,0x68,0x00,0x00
|
---|
2018 | DB 0x04,0x01,0x15,0x00
|
---|