1 | %ifidn __OUTPUT_FORMAT__,obj
|
---|
2 | section code use32 class=code align=64
|
---|
3 | %elifidn __OUTPUT_FORMAT__,win32
|
---|
4 | section .text code align=64
|
---|
5 | %else
|
---|
6 | section .text code
|
---|
7 | %endif
|
---|
8 | global _gcm_gmult_4bit_x86
|
---|
9 | align 16
|
---|
10 | _gcm_gmult_4bit_x86:
|
---|
11 | L$_gcm_gmult_4bit_x86_begin:
|
---|
12 | push ebp
|
---|
13 | push ebx
|
---|
14 | push esi
|
---|
15 | push edi
|
---|
16 | sub esp,84
|
---|
17 | mov edi,DWORD [104+esp]
|
---|
18 | mov esi,DWORD [108+esp]
|
---|
19 | mov ebp,DWORD [edi]
|
---|
20 | mov edx,DWORD [4+edi]
|
---|
21 | mov ecx,DWORD [8+edi]
|
---|
22 | mov ebx,DWORD [12+edi]
|
---|
23 | mov DWORD [16+esp],0
|
---|
24 | mov DWORD [20+esp],471859200
|
---|
25 | mov DWORD [24+esp],943718400
|
---|
26 | mov DWORD [28+esp],610271232
|
---|
27 | mov DWORD [32+esp],1887436800
|
---|
28 | mov DWORD [36+esp],1822425088
|
---|
29 | mov DWORD [40+esp],1220542464
|
---|
30 | mov DWORD [44+esp],1423966208
|
---|
31 | mov DWORD [48+esp],3774873600
|
---|
32 | mov DWORD [52+esp],4246732800
|
---|
33 | mov DWORD [56+esp],3644850176
|
---|
34 | mov DWORD [60+esp],3311403008
|
---|
35 | mov DWORD [64+esp],2441084928
|
---|
36 | mov DWORD [68+esp],2376073216
|
---|
37 | mov DWORD [72+esp],2847932416
|
---|
38 | mov DWORD [76+esp],3051356160
|
---|
39 | mov DWORD [esp],ebp
|
---|
40 | mov DWORD [4+esp],edx
|
---|
41 | mov DWORD [8+esp],ecx
|
---|
42 | mov DWORD [12+esp],ebx
|
---|
43 | shr ebx,20
|
---|
44 | and ebx,240
|
---|
45 | mov ebp,DWORD [4+ebx*1+esi]
|
---|
46 | mov edx,DWORD [ebx*1+esi]
|
---|
47 | mov ecx,DWORD [12+ebx*1+esi]
|
---|
48 | mov ebx,DWORD [8+ebx*1+esi]
|
---|
49 | xor eax,eax
|
---|
50 | mov edi,15
|
---|
51 | jmp NEAR L$000x86_loop
|
---|
52 | align 16
|
---|
53 | L$000x86_loop:
|
---|
54 | mov al,bl
|
---|
55 | shrd ebx,ecx,4
|
---|
56 | and al,15
|
---|
57 | shrd ecx,edx,4
|
---|
58 | shrd edx,ebp,4
|
---|
59 | shr ebp,4
|
---|
60 | xor ebp,DWORD [16+eax*4+esp]
|
---|
61 | mov al,BYTE [edi*1+esp]
|
---|
62 | and al,240
|
---|
63 | xor ebx,DWORD [8+eax*1+esi]
|
---|
64 | xor ecx,DWORD [12+eax*1+esi]
|
---|
65 | xor edx,DWORD [eax*1+esi]
|
---|
66 | xor ebp,DWORD [4+eax*1+esi]
|
---|
67 | dec edi
|
---|
68 | js NEAR L$001x86_break
|
---|
69 | mov al,bl
|
---|
70 | shrd ebx,ecx,4
|
---|
71 | and al,15
|
---|
72 | shrd ecx,edx,4
|
---|
73 | shrd edx,ebp,4
|
---|
74 | shr ebp,4
|
---|
75 | xor ebp,DWORD [16+eax*4+esp]
|
---|
76 | mov al,BYTE [edi*1+esp]
|
---|
77 | shl al,4
|
---|
78 | xor ebx,DWORD [8+eax*1+esi]
|
---|
79 | xor ecx,DWORD [12+eax*1+esi]
|
---|
80 | xor edx,DWORD [eax*1+esi]
|
---|
81 | xor ebp,DWORD [4+eax*1+esi]
|
---|
82 | jmp NEAR L$000x86_loop
|
---|
83 | align 16
|
---|
84 | L$001x86_break:
|
---|
85 | bswap ebx
|
---|
86 | bswap ecx
|
---|
87 | bswap edx
|
---|
88 | bswap ebp
|
---|
89 | mov edi,DWORD [104+esp]
|
---|
90 | mov DWORD [12+edi],ebx
|
---|
91 | mov DWORD [8+edi],ecx
|
---|
92 | mov DWORD [4+edi],edx
|
---|
93 | mov DWORD [edi],ebp
|
---|
94 | add esp,84
|
---|
95 | pop edi
|
---|
96 | pop esi
|
---|
97 | pop ebx
|
---|
98 | pop ebp
|
---|
99 | ret
|
---|
100 | global _gcm_ghash_4bit_x86
|
---|
101 | align 16
|
---|
102 | _gcm_ghash_4bit_x86:
|
---|
103 | L$_gcm_ghash_4bit_x86_begin:
|
---|
104 | push ebp
|
---|
105 | push ebx
|
---|
106 | push esi
|
---|
107 | push edi
|
---|
108 | sub esp,84
|
---|
109 | mov ebx,DWORD [104+esp]
|
---|
110 | mov esi,DWORD [108+esp]
|
---|
111 | mov edi,DWORD [112+esp]
|
---|
112 | mov ecx,DWORD [116+esp]
|
---|
113 | add ecx,edi
|
---|
114 | mov DWORD [116+esp],ecx
|
---|
115 | mov ebp,DWORD [ebx]
|
---|
116 | mov edx,DWORD [4+ebx]
|
---|
117 | mov ecx,DWORD [8+ebx]
|
---|
118 | mov ebx,DWORD [12+ebx]
|
---|
119 | mov DWORD [16+esp],0
|
---|
120 | mov DWORD [20+esp],471859200
|
---|
121 | mov DWORD [24+esp],943718400
|
---|
122 | mov DWORD [28+esp],610271232
|
---|
123 | mov DWORD [32+esp],1887436800
|
---|
124 | mov DWORD [36+esp],1822425088
|
---|
125 | mov DWORD [40+esp],1220542464
|
---|
126 | mov DWORD [44+esp],1423966208
|
---|
127 | mov DWORD [48+esp],3774873600
|
---|
128 | mov DWORD [52+esp],4246732800
|
---|
129 | mov DWORD [56+esp],3644850176
|
---|
130 | mov DWORD [60+esp],3311403008
|
---|
131 | mov DWORD [64+esp],2441084928
|
---|
132 | mov DWORD [68+esp],2376073216
|
---|
133 | mov DWORD [72+esp],2847932416
|
---|
134 | mov DWORD [76+esp],3051356160
|
---|
135 | align 16
|
---|
136 | L$002x86_outer_loop:
|
---|
137 | xor ebx,DWORD [12+edi]
|
---|
138 | xor ecx,DWORD [8+edi]
|
---|
139 | xor edx,DWORD [4+edi]
|
---|
140 | xor ebp,DWORD [edi]
|
---|
141 | mov DWORD [12+esp],ebx
|
---|
142 | mov DWORD [8+esp],ecx
|
---|
143 | mov DWORD [4+esp],edx
|
---|
144 | mov DWORD [esp],ebp
|
---|
145 | shr ebx,20
|
---|
146 | and ebx,240
|
---|
147 | mov ebp,DWORD [4+ebx*1+esi]
|
---|
148 | mov edx,DWORD [ebx*1+esi]
|
---|
149 | mov ecx,DWORD [12+ebx*1+esi]
|
---|
150 | mov ebx,DWORD [8+ebx*1+esi]
|
---|
151 | xor eax,eax
|
---|
152 | mov edi,15
|
---|
153 | jmp NEAR L$003x86_loop
|
---|
154 | align 16
|
---|
155 | L$003x86_loop:
|
---|
156 | mov al,bl
|
---|
157 | shrd ebx,ecx,4
|
---|
158 | and al,15
|
---|
159 | shrd ecx,edx,4
|
---|
160 | shrd edx,ebp,4
|
---|
161 | shr ebp,4
|
---|
162 | xor ebp,DWORD [16+eax*4+esp]
|
---|
163 | mov al,BYTE [edi*1+esp]
|
---|
164 | and al,240
|
---|
165 | xor ebx,DWORD [8+eax*1+esi]
|
---|
166 | xor ecx,DWORD [12+eax*1+esi]
|
---|
167 | xor edx,DWORD [eax*1+esi]
|
---|
168 | xor ebp,DWORD [4+eax*1+esi]
|
---|
169 | dec edi
|
---|
170 | js NEAR L$004x86_break
|
---|
171 | mov al,bl
|
---|
172 | shrd ebx,ecx,4
|
---|
173 | and al,15
|
---|
174 | shrd ecx,edx,4
|
---|
175 | shrd edx,ebp,4
|
---|
176 | shr ebp,4
|
---|
177 | xor ebp,DWORD [16+eax*4+esp]
|
---|
178 | mov al,BYTE [edi*1+esp]
|
---|
179 | shl al,4
|
---|
180 | xor ebx,DWORD [8+eax*1+esi]
|
---|
181 | xor ecx,DWORD [12+eax*1+esi]
|
---|
182 | xor edx,DWORD [eax*1+esi]
|
---|
183 | xor ebp,DWORD [4+eax*1+esi]
|
---|
184 | jmp NEAR L$003x86_loop
|
---|
185 | align 16
|
---|
186 | L$004x86_break:
|
---|
187 | bswap ebx
|
---|
188 | bswap ecx
|
---|
189 | bswap edx
|
---|
190 | bswap ebp
|
---|
191 | mov edi,DWORD [112+esp]
|
---|
192 | lea edi,[16+edi]
|
---|
193 | cmp edi,DWORD [116+esp]
|
---|
194 | mov DWORD [112+esp],edi
|
---|
195 | jb NEAR L$002x86_outer_loop
|
---|
196 | mov edi,DWORD [104+esp]
|
---|
197 | mov DWORD [12+edi],ebx
|
---|
198 | mov DWORD [8+edi],ecx
|
---|
199 | mov DWORD [4+edi],edx
|
---|
200 | mov DWORD [edi],ebp
|
---|
201 | add esp,84
|
---|
202 | pop edi
|
---|
203 | pop esi
|
---|
204 | pop ebx
|
---|
205 | pop ebp
|
---|
206 | ret
|
---|
207 | align 16
|
---|
208 | __mmx_gmult_4bit_inner:
|
---|
209 | xor ecx,ecx
|
---|
210 | mov edx,ebx
|
---|
211 | mov cl,dl
|
---|
212 | shl cl,4
|
---|
213 | and edx,240
|
---|
214 | movq mm0,[8+ecx*1+esi]
|
---|
215 | movq mm1,[ecx*1+esi]
|
---|
216 | movd ebp,mm0
|
---|
217 | psrlq mm0,4
|
---|
218 | movq mm2,mm1
|
---|
219 | psrlq mm1,4
|
---|
220 | pxor mm0,[8+edx*1+esi]
|
---|
221 | mov cl,BYTE [14+edi]
|
---|
222 | psllq mm2,60
|
---|
223 | and ebp,15
|
---|
224 | pxor mm1,[edx*1+esi]
|
---|
225 | mov edx,ecx
|
---|
226 | movd ebx,mm0
|
---|
227 | pxor mm0,mm2
|
---|
228 | shl cl,4
|
---|
229 | psrlq mm0,4
|
---|
230 | movq mm2,mm1
|
---|
231 | psrlq mm1,4
|
---|
232 | pxor mm0,[8+ecx*1+esi]
|
---|
233 | psllq mm2,60
|
---|
234 | and edx,240
|
---|
235 | pxor mm1,[ebp*8+eax]
|
---|
236 | and ebx,15
|
---|
237 | pxor mm1,[ecx*1+esi]
|
---|
238 | movd ebp,mm0
|
---|
239 | pxor mm0,mm2
|
---|
240 | psrlq mm0,4
|
---|
241 | movq mm2,mm1
|
---|
242 | psrlq mm1,4
|
---|
243 | pxor mm0,[8+edx*1+esi]
|
---|
244 | mov cl,BYTE [13+edi]
|
---|
245 | psllq mm2,60
|
---|
246 | pxor mm1,[ebx*8+eax]
|
---|
247 | and ebp,15
|
---|
248 | pxor mm1,[edx*1+esi]
|
---|
249 | mov edx,ecx
|
---|
250 | movd ebx,mm0
|
---|
251 | pxor mm0,mm2
|
---|
252 | shl cl,4
|
---|
253 | psrlq mm0,4
|
---|
254 | movq mm2,mm1
|
---|
255 | psrlq mm1,4
|
---|
256 | pxor mm0,[8+ecx*1+esi]
|
---|
257 | psllq mm2,60
|
---|
258 | and edx,240
|
---|
259 | pxor mm1,[ebp*8+eax]
|
---|
260 | and ebx,15
|
---|
261 | pxor mm1,[ecx*1+esi]
|
---|
262 | movd ebp,mm0
|
---|
263 | pxor mm0,mm2
|
---|
264 | psrlq mm0,4
|
---|
265 | movq mm2,mm1
|
---|
266 | psrlq mm1,4
|
---|
267 | pxor mm0,[8+edx*1+esi]
|
---|
268 | mov cl,BYTE [12+edi]
|
---|
269 | psllq mm2,60
|
---|
270 | pxor mm1,[ebx*8+eax]
|
---|
271 | and ebp,15
|
---|
272 | pxor mm1,[edx*1+esi]
|
---|
273 | mov edx,ecx
|
---|
274 | movd ebx,mm0
|
---|
275 | pxor mm0,mm2
|
---|
276 | shl cl,4
|
---|
277 | psrlq mm0,4
|
---|
278 | movq mm2,mm1
|
---|
279 | psrlq mm1,4
|
---|
280 | pxor mm0,[8+ecx*1+esi]
|
---|
281 | psllq mm2,60
|
---|
282 | and edx,240
|
---|
283 | pxor mm1,[ebp*8+eax]
|
---|
284 | and ebx,15
|
---|
285 | pxor mm1,[ecx*1+esi]
|
---|
286 | movd ebp,mm0
|
---|
287 | pxor mm0,mm2
|
---|
288 | psrlq mm0,4
|
---|
289 | movq mm2,mm1
|
---|
290 | psrlq mm1,4
|
---|
291 | pxor mm0,[8+edx*1+esi]
|
---|
292 | mov cl,BYTE [11+edi]
|
---|
293 | psllq mm2,60
|
---|
294 | pxor mm1,[ebx*8+eax]
|
---|
295 | and ebp,15
|
---|
296 | pxor mm1,[edx*1+esi]
|
---|
297 | mov edx,ecx
|
---|
298 | movd ebx,mm0
|
---|
299 | pxor mm0,mm2
|
---|
300 | shl cl,4
|
---|
301 | psrlq mm0,4
|
---|
302 | movq mm2,mm1
|
---|
303 | psrlq mm1,4
|
---|
304 | pxor mm0,[8+ecx*1+esi]
|
---|
305 | psllq mm2,60
|
---|
306 | and edx,240
|
---|
307 | pxor mm1,[ebp*8+eax]
|
---|
308 | and ebx,15
|
---|
309 | pxor mm1,[ecx*1+esi]
|
---|
310 | movd ebp,mm0
|
---|
311 | pxor mm0,mm2
|
---|
312 | psrlq mm0,4
|
---|
313 | movq mm2,mm1
|
---|
314 | psrlq mm1,4
|
---|
315 | pxor mm0,[8+edx*1+esi]
|
---|
316 | mov cl,BYTE [10+edi]
|
---|
317 | psllq mm2,60
|
---|
318 | pxor mm1,[ebx*8+eax]
|
---|
319 | and ebp,15
|
---|
320 | pxor mm1,[edx*1+esi]
|
---|
321 | mov edx,ecx
|
---|
322 | movd ebx,mm0
|
---|
323 | pxor mm0,mm2
|
---|
324 | shl cl,4
|
---|
325 | psrlq mm0,4
|
---|
326 | movq mm2,mm1
|
---|
327 | psrlq mm1,4
|
---|
328 | pxor mm0,[8+ecx*1+esi]
|
---|
329 | psllq mm2,60
|
---|
330 | and edx,240
|
---|
331 | pxor mm1,[ebp*8+eax]
|
---|
332 | and ebx,15
|
---|
333 | pxor mm1,[ecx*1+esi]
|
---|
334 | movd ebp,mm0
|
---|
335 | pxor mm0,mm2
|
---|
336 | psrlq mm0,4
|
---|
337 | movq mm2,mm1
|
---|
338 | psrlq mm1,4
|
---|
339 | pxor mm0,[8+edx*1+esi]
|
---|
340 | mov cl,BYTE [9+edi]
|
---|
341 | psllq mm2,60
|
---|
342 | pxor mm1,[ebx*8+eax]
|
---|
343 | and ebp,15
|
---|
344 | pxor mm1,[edx*1+esi]
|
---|
345 | mov edx,ecx
|
---|
346 | movd ebx,mm0
|
---|
347 | pxor mm0,mm2
|
---|
348 | shl cl,4
|
---|
349 | psrlq mm0,4
|
---|
350 | movq mm2,mm1
|
---|
351 | psrlq mm1,4
|
---|
352 | pxor mm0,[8+ecx*1+esi]
|
---|
353 | psllq mm2,60
|
---|
354 | and edx,240
|
---|
355 | pxor mm1,[ebp*8+eax]
|
---|
356 | and ebx,15
|
---|
357 | pxor mm1,[ecx*1+esi]
|
---|
358 | movd ebp,mm0
|
---|
359 | pxor mm0,mm2
|
---|
360 | psrlq mm0,4
|
---|
361 | movq mm2,mm1
|
---|
362 | psrlq mm1,4
|
---|
363 | pxor mm0,[8+edx*1+esi]
|
---|
364 | mov cl,BYTE [8+edi]
|
---|
365 | psllq mm2,60
|
---|
366 | pxor mm1,[ebx*8+eax]
|
---|
367 | and ebp,15
|
---|
368 | pxor mm1,[edx*1+esi]
|
---|
369 | mov edx,ecx
|
---|
370 | movd ebx,mm0
|
---|
371 | pxor mm0,mm2
|
---|
372 | shl cl,4
|
---|
373 | psrlq mm0,4
|
---|
374 | movq mm2,mm1
|
---|
375 | psrlq mm1,4
|
---|
376 | pxor mm0,[8+ecx*1+esi]
|
---|
377 | psllq mm2,60
|
---|
378 | and edx,240
|
---|
379 | pxor mm1,[ebp*8+eax]
|
---|
380 | and ebx,15
|
---|
381 | pxor mm1,[ecx*1+esi]
|
---|
382 | movd ebp,mm0
|
---|
383 | pxor mm0,mm2
|
---|
384 | psrlq mm0,4
|
---|
385 | movq mm2,mm1
|
---|
386 | psrlq mm1,4
|
---|
387 | pxor mm0,[8+edx*1+esi]
|
---|
388 | mov cl,BYTE [7+edi]
|
---|
389 | psllq mm2,60
|
---|
390 | pxor mm1,[ebx*8+eax]
|
---|
391 | and ebp,15
|
---|
392 | pxor mm1,[edx*1+esi]
|
---|
393 | mov edx,ecx
|
---|
394 | movd ebx,mm0
|
---|
395 | pxor mm0,mm2
|
---|
396 | shl cl,4
|
---|
397 | psrlq mm0,4
|
---|
398 | movq mm2,mm1
|
---|
399 | psrlq mm1,4
|
---|
400 | pxor mm0,[8+ecx*1+esi]
|
---|
401 | psllq mm2,60
|
---|
402 | and edx,240
|
---|
403 | pxor mm1,[ebp*8+eax]
|
---|
404 | and ebx,15
|
---|
405 | pxor mm1,[ecx*1+esi]
|
---|
406 | movd ebp,mm0
|
---|
407 | pxor mm0,mm2
|
---|
408 | psrlq mm0,4
|
---|
409 | movq mm2,mm1
|
---|
410 | psrlq mm1,4
|
---|
411 | pxor mm0,[8+edx*1+esi]
|
---|
412 | mov cl,BYTE [6+edi]
|
---|
413 | psllq mm2,60
|
---|
414 | pxor mm1,[ebx*8+eax]
|
---|
415 | and ebp,15
|
---|
416 | pxor mm1,[edx*1+esi]
|
---|
417 | mov edx,ecx
|
---|
418 | movd ebx,mm0
|
---|
419 | pxor mm0,mm2
|
---|
420 | shl cl,4
|
---|
421 | psrlq mm0,4
|
---|
422 | movq mm2,mm1
|
---|
423 | psrlq mm1,4
|
---|
424 | pxor mm0,[8+ecx*1+esi]
|
---|
425 | psllq mm2,60
|
---|
426 | and edx,240
|
---|
427 | pxor mm1,[ebp*8+eax]
|
---|
428 | and ebx,15
|
---|
429 | pxor mm1,[ecx*1+esi]
|
---|
430 | movd ebp,mm0
|
---|
431 | pxor mm0,mm2
|
---|
432 | psrlq mm0,4
|
---|
433 | movq mm2,mm1
|
---|
434 | psrlq mm1,4
|
---|
435 | pxor mm0,[8+edx*1+esi]
|
---|
436 | mov cl,BYTE [5+edi]
|
---|
437 | psllq mm2,60
|
---|
438 | pxor mm1,[ebx*8+eax]
|
---|
439 | and ebp,15
|
---|
440 | pxor mm1,[edx*1+esi]
|
---|
441 | mov edx,ecx
|
---|
442 | movd ebx,mm0
|
---|
443 | pxor mm0,mm2
|
---|
444 | shl cl,4
|
---|
445 | psrlq mm0,4
|
---|
446 | movq mm2,mm1
|
---|
447 | psrlq mm1,4
|
---|
448 | pxor mm0,[8+ecx*1+esi]
|
---|
449 | psllq mm2,60
|
---|
450 | and edx,240
|
---|
451 | pxor mm1,[ebp*8+eax]
|
---|
452 | and ebx,15
|
---|
453 | pxor mm1,[ecx*1+esi]
|
---|
454 | movd ebp,mm0
|
---|
455 | pxor mm0,mm2
|
---|
456 | psrlq mm0,4
|
---|
457 | movq mm2,mm1
|
---|
458 | psrlq mm1,4
|
---|
459 | pxor mm0,[8+edx*1+esi]
|
---|
460 | mov cl,BYTE [4+edi]
|
---|
461 | psllq mm2,60
|
---|
462 | pxor mm1,[ebx*8+eax]
|
---|
463 | and ebp,15
|
---|
464 | pxor mm1,[edx*1+esi]
|
---|
465 | mov edx,ecx
|
---|
466 | movd ebx,mm0
|
---|
467 | pxor mm0,mm2
|
---|
468 | shl cl,4
|
---|
469 | psrlq mm0,4
|
---|
470 | movq mm2,mm1
|
---|
471 | psrlq mm1,4
|
---|
472 | pxor mm0,[8+ecx*1+esi]
|
---|
473 | psllq mm2,60
|
---|
474 | and edx,240
|
---|
475 | pxor mm1,[ebp*8+eax]
|
---|
476 | and ebx,15
|
---|
477 | pxor mm1,[ecx*1+esi]
|
---|
478 | movd ebp,mm0
|
---|
479 | pxor mm0,mm2
|
---|
480 | psrlq mm0,4
|
---|
481 | movq mm2,mm1
|
---|
482 | psrlq mm1,4
|
---|
483 | pxor mm0,[8+edx*1+esi]
|
---|
484 | mov cl,BYTE [3+edi]
|
---|
485 | psllq mm2,60
|
---|
486 | pxor mm1,[ebx*8+eax]
|
---|
487 | and ebp,15
|
---|
488 | pxor mm1,[edx*1+esi]
|
---|
489 | mov edx,ecx
|
---|
490 | movd ebx,mm0
|
---|
491 | pxor mm0,mm2
|
---|
492 | shl cl,4
|
---|
493 | psrlq mm0,4
|
---|
494 | movq mm2,mm1
|
---|
495 | psrlq mm1,4
|
---|
496 | pxor mm0,[8+ecx*1+esi]
|
---|
497 | psllq mm2,60
|
---|
498 | and edx,240
|
---|
499 | pxor mm1,[ebp*8+eax]
|
---|
500 | and ebx,15
|
---|
501 | pxor mm1,[ecx*1+esi]
|
---|
502 | movd ebp,mm0
|
---|
503 | pxor mm0,mm2
|
---|
504 | psrlq mm0,4
|
---|
505 | movq mm2,mm1
|
---|
506 | psrlq mm1,4
|
---|
507 | pxor mm0,[8+edx*1+esi]
|
---|
508 | mov cl,BYTE [2+edi]
|
---|
509 | psllq mm2,60
|
---|
510 | pxor mm1,[ebx*8+eax]
|
---|
511 | and ebp,15
|
---|
512 | pxor mm1,[edx*1+esi]
|
---|
513 | mov edx,ecx
|
---|
514 | movd ebx,mm0
|
---|
515 | pxor mm0,mm2
|
---|
516 | shl cl,4
|
---|
517 | psrlq mm0,4
|
---|
518 | movq mm2,mm1
|
---|
519 | psrlq mm1,4
|
---|
520 | pxor mm0,[8+ecx*1+esi]
|
---|
521 | psllq mm2,60
|
---|
522 | and edx,240
|
---|
523 | pxor mm1,[ebp*8+eax]
|
---|
524 | and ebx,15
|
---|
525 | pxor mm1,[ecx*1+esi]
|
---|
526 | movd ebp,mm0
|
---|
527 | pxor mm0,mm2
|
---|
528 | psrlq mm0,4
|
---|
529 | movq mm2,mm1
|
---|
530 | psrlq mm1,4
|
---|
531 | pxor mm0,[8+edx*1+esi]
|
---|
532 | mov cl,BYTE [1+edi]
|
---|
533 | psllq mm2,60
|
---|
534 | pxor mm1,[ebx*8+eax]
|
---|
535 | and ebp,15
|
---|
536 | pxor mm1,[edx*1+esi]
|
---|
537 | mov edx,ecx
|
---|
538 | movd ebx,mm0
|
---|
539 | pxor mm0,mm2
|
---|
540 | shl cl,4
|
---|
541 | psrlq mm0,4
|
---|
542 | movq mm2,mm1
|
---|
543 | psrlq mm1,4
|
---|
544 | pxor mm0,[8+ecx*1+esi]
|
---|
545 | psllq mm2,60
|
---|
546 | and edx,240
|
---|
547 | pxor mm1,[ebp*8+eax]
|
---|
548 | and ebx,15
|
---|
549 | pxor mm1,[ecx*1+esi]
|
---|
550 | movd ebp,mm0
|
---|
551 | pxor mm0,mm2
|
---|
552 | psrlq mm0,4
|
---|
553 | movq mm2,mm1
|
---|
554 | psrlq mm1,4
|
---|
555 | pxor mm0,[8+edx*1+esi]
|
---|
556 | mov cl,BYTE [edi]
|
---|
557 | psllq mm2,60
|
---|
558 | pxor mm1,[ebx*8+eax]
|
---|
559 | and ebp,15
|
---|
560 | pxor mm1,[edx*1+esi]
|
---|
561 | mov edx,ecx
|
---|
562 | movd ebx,mm0
|
---|
563 | pxor mm0,mm2
|
---|
564 | shl cl,4
|
---|
565 | psrlq mm0,4
|
---|
566 | movq mm2,mm1
|
---|
567 | psrlq mm1,4
|
---|
568 | pxor mm0,[8+ecx*1+esi]
|
---|
569 | psllq mm2,60
|
---|
570 | and edx,240
|
---|
571 | pxor mm1,[ebp*8+eax]
|
---|
572 | and ebx,15
|
---|
573 | pxor mm1,[ecx*1+esi]
|
---|
574 | movd ebp,mm0
|
---|
575 | pxor mm0,mm2
|
---|
576 | psrlq mm0,4
|
---|
577 | movq mm2,mm1
|
---|
578 | psrlq mm1,4
|
---|
579 | pxor mm0,[8+edx*1+esi]
|
---|
580 | psllq mm2,60
|
---|
581 | pxor mm1,[ebx*8+eax]
|
---|
582 | and ebp,15
|
---|
583 | pxor mm1,[edx*1+esi]
|
---|
584 | movd ebx,mm0
|
---|
585 | pxor mm0,mm2
|
---|
586 | mov edi,DWORD [4+ebp*8+eax]
|
---|
587 | psrlq mm0,32
|
---|
588 | movd edx,mm1
|
---|
589 | psrlq mm1,32
|
---|
590 | movd ecx,mm0
|
---|
591 | movd ebp,mm1
|
---|
592 | shl edi,4
|
---|
593 | bswap ebx
|
---|
594 | bswap edx
|
---|
595 | bswap ecx
|
---|
596 | xor ebp,edi
|
---|
597 | bswap ebp
|
---|
598 | ret
|
---|
599 | global _gcm_gmult_4bit_mmx
|
---|
600 | align 16
|
---|
601 | _gcm_gmult_4bit_mmx:
|
---|
602 | L$_gcm_gmult_4bit_mmx_begin:
|
---|
603 | push ebp
|
---|
604 | push ebx
|
---|
605 | push esi
|
---|
606 | push edi
|
---|
607 | mov edi,DWORD [20+esp]
|
---|
608 | mov esi,DWORD [24+esp]
|
---|
609 | call L$005pic_point
|
---|
610 | L$005pic_point:
|
---|
611 | pop eax
|
---|
612 | lea eax,[(L$rem_4bit-L$005pic_point)+eax]
|
---|
613 | movzx ebx,BYTE [15+edi]
|
---|
614 | call __mmx_gmult_4bit_inner
|
---|
615 | mov edi,DWORD [20+esp]
|
---|
616 | emms
|
---|
617 | mov DWORD [12+edi],ebx
|
---|
618 | mov DWORD [4+edi],edx
|
---|
619 | mov DWORD [8+edi],ecx
|
---|
620 | mov DWORD [edi],ebp
|
---|
621 | pop edi
|
---|
622 | pop esi
|
---|
623 | pop ebx
|
---|
624 | pop ebp
|
---|
625 | ret
|
---|
626 | global _gcm_ghash_4bit_mmx
|
---|
627 | align 16
|
---|
628 | _gcm_ghash_4bit_mmx:
|
---|
629 | L$_gcm_ghash_4bit_mmx_begin:
|
---|
630 | push ebp
|
---|
631 | push ebx
|
---|
632 | push esi
|
---|
633 | push edi
|
---|
634 | mov ebp,DWORD [20+esp]
|
---|
635 | mov esi,DWORD [24+esp]
|
---|
636 | mov edi,DWORD [28+esp]
|
---|
637 | mov ecx,DWORD [32+esp]
|
---|
638 | call L$006pic_point
|
---|
639 | L$006pic_point:
|
---|
640 | pop eax
|
---|
641 | lea eax,[(L$rem_4bit-L$006pic_point)+eax]
|
---|
642 | add ecx,edi
|
---|
643 | mov DWORD [32+esp],ecx
|
---|
644 | sub esp,20
|
---|
645 | mov ebx,DWORD [12+ebp]
|
---|
646 | mov edx,DWORD [4+ebp]
|
---|
647 | mov ecx,DWORD [8+ebp]
|
---|
648 | mov ebp,DWORD [ebp]
|
---|
649 | jmp NEAR L$007mmx_outer_loop
|
---|
650 | align 16
|
---|
651 | L$007mmx_outer_loop:
|
---|
652 | xor ebx,DWORD [12+edi]
|
---|
653 | xor edx,DWORD [4+edi]
|
---|
654 | xor ecx,DWORD [8+edi]
|
---|
655 | xor ebp,DWORD [edi]
|
---|
656 | mov DWORD [48+esp],edi
|
---|
657 | mov DWORD [12+esp],ebx
|
---|
658 | mov DWORD [4+esp],edx
|
---|
659 | mov DWORD [8+esp],ecx
|
---|
660 | mov DWORD [esp],ebp
|
---|
661 | mov edi,esp
|
---|
662 | shr ebx,24
|
---|
663 | call __mmx_gmult_4bit_inner
|
---|
664 | mov edi,DWORD [48+esp]
|
---|
665 | lea edi,[16+edi]
|
---|
666 | cmp edi,DWORD [52+esp]
|
---|
667 | jb NEAR L$007mmx_outer_loop
|
---|
668 | mov edi,DWORD [40+esp]
|
---|
669 | emms
|
---|
670 | mov DWORD [12+edi],ebx
|
---|
671 | mov DWORD [4+edi],edx
|
---|
672 | mov DWORD [8+edi],ecx
|
---|
673 | mov DWORD [edi],ebp
|
---|
674 | add esp,20
|
---|
675 | pop edi
|
---|
676 | pop esi
|
---|
677 | pop ebx
|
---|
678 | pop ebp
|
---|
679 | ret
|
---|
680 | align 64
|
---|
681 | L$rem_4bit:
|
---|
682 | dd 0,0,0,29491200,0,58982400,0,38141952
|
---|
683 | dd 0,117964800,0,113901568,0,76283904,0,88997888
|
---|
684 | dd 0,235929600,0,265420800,0,227803136,0,206962688
|
---|
685 | dd 0,152567808,0,148504576,0,177995776,0,190709760
|
---|
686 | db 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
|
---|
687 | db 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
|
---|
688 | db 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
|
---|
689 | db 0
|
---|