VirtualBox

source: vbox/trunk/src/libs/zlib-1.2.1/contrib/masmx86/inffas32.asm@ 16236

Last change on this file since 16236 was 6392, checked in by vboxsync, 17 years ago

export libpng and zlib so Windows and OS/2 builds cleanly.

  • Property svn:eol-style set to native
File size: 12.9 KB
Line 
1; 75 "inffast.S"
2;FILE "inffast.S"
3
4;;;GLOBAL _inflate_fast
5
6;;;SECTION .text
7
8
9
10 .586p
11 .mmx
12
13 name inflate_fast_x86
14 .MODEL FLAT
15
16_DATA segment
17inflate_fast_use_mmx:
18 dd 1
19
20
21_TEXT segment
22PUBLIC _inflate_fast
23
24ALIGN 4
25_inflate_fast:
26 jmp inflate_fast_entry
27
28
29
30ALIGN 4
31 db 'Fast decoding Code from Chris Anderson'
32 db 0
33
34ALIGN 4
35invalid_literal_length_code_msg:
36 db 'invalid literal/length code'
37 db 0
38
39ALIGN 4
40invalid_distance_code_msg:
41 db 'invalid distance code'
42 db 0
43
44ALIGN 4
45invalid_distance_too_far_msg:
46 db 'invalid distance too far back'
47 db 0
48
49
50ALIGN 4
51inflate_fast_mask:
52dd 0
53dd 1
54dd 3
55dd 7
56dd 15
57dd 31
58dd 63
59dd 127
60dd 255
61dd 511
62dd 1023
63dd 2047
64dd 4095
65dd 8191
66dd 16383
67dd 32767
68dd 65535
69dd 131071
70dd 262143
71dd 524287
72dd 1048575
73dd 2097151
74dd 4194303
75dd 8388607
76dd 16777215
77dd 33554431
78dd 67108863
79dd 134217727
80dd 268435455
81dd 536870911
82dd 1073741823
83dd 2147483647
84dd 4294967295
85
86
87
88mode_state equ 0 ;/* state->mode */
89wsize_state equ 32 ;/* state->wsize */
90write_state equ (36+4) ;/* state->write */
91window_state equ (40+4) ;/* state->window */
92hold_state equ (44+4) ;/* state->hold */
93bits_state equ (48+4) ;/* state->bits */
94lencode_state equ (64+4) ;/* state->lencode */
95distcode_state equ (68+4) ;/* state->distcode */
96lenbits_state equ (72+4) ;/* state->lenbits */
97distbits_state equ (76+4) ;/* state->distbits */
98
99
100;;SECTION .text
101; 205 "inffast.S"
102;GLOBAL inflate_fast_use_mmx
103
104;SECTION .data
105
106
107; GLOBAL inflate_fast_use_mmx:object
108;.size inflate_fast_use_mmx, 4
109; 226 "inffast.S"
110;SECTION .text
111
112ALIGN 4
113inflate_fast_entry:
114 push edi
115 push esi
116 push ebp
117 push ebx
118 pushfd
119 sub esp,64
120 cld
121
122
123
124
125 mov esi, [esp+88]
126 mov edi, [esi+28]
127
128
129
130
131
132
133
134 mov edx, [esi+4]
135 mov eax, [esi+0]
136
137 add edx,eax
138 sub edx,11
139
140 mov [esp+44],eax
141 mov [esp+20],edx
142
143 mov ebp, [esp+92]
144 mov ecx, [esi+16]
145 mov ebx, [esi+12]
146
147 sub ebp,ecx
148 neg ebp
149 add ebp,ebx
150
151 sub ecx,257
152 add ecx,ebx
153
154 mov [esp+60],ebx
155 mov [esp+40],ebp
156 mov [esp+16],ecx
157; 285 "inffast.S"
158 mov eax, [edi+lencode_state]
159 mov ecx, [edi+distcode_state]
160
161 mov [esp+8],eax
162 mov [esp+12],ecx
163
164 mov eax,1
165 mov ecx, [edi+lenbits_state]
166 shl eax,cl
167 dec eax
168 mov [esp+0],eax
169
170 mov eax,1
171 mov ecx, [edi+distbits_state]
172 shl eax,cl
173 dec eax
174 mov [esp+4],eax
175
176 mov eax, [edi+wsize_state]
177 mov ecx, [edi+write_state]
178 mov edx, [edi+window_state]
179
180 mov [esp+52],eax
181 mov [esp+48],ecx
182 mov [esp+56],edx
183
184 mov ebp, [edi+hold_state]
185 mov ebx, [edi+bits_state]
186; 321 "inffast.S"
187 mov esi, [esp+44]
188 mov ecx, [esp+20]
189 cmp ecx,esi
190 ja L_align_long
191
192 add ecx,11
193 sub ecx,esi
194 mov eax,12
195 sub eax,ecx
196 lea edi, [esp+28]
197 rep movsb
198 mov ecx,eax
199 xor eax,eax
200 rep stosb
201 lea esi, [esp+28]
202 mov [esp+20],esi
203 jmp L_is_aligned
204
205
206L_align_long:
207 test esi,3
208 jz L_is_aligned
209 xor eax,eax
210 mov al, [esi]
211 inc esi
212 mov ecx,ebx
213 add ebx,8
214 shl eax,cl
215 or ebp,eax
216 jmp L_align_long
217
218L_is_aligned:
219 mov edi, [esp+60]
220; 366 "inffast.S"
221L_check_mmx:
222 cmp dword ptr [inflate_fast_use_mmx],2
223 je L_init_mmx
224 ja L_do_loop
225
226 push eax
227 push ebx
228 push ecx
229 push edx
230 pushfd
231 mov eax, [esp]
232 xor dword ptr [esp],0200000h
233
234
235
236
237 popfd
238 pushfd
239 pop edx
240 xor edx,eax
241 jz L_dont_use_mmx
242 xor eax,eax
243 cpuid
244 cmp ebx,0756e6547h
245 jne L_dont_use_mmx
246 cmp ecx,06c65746eh
247 jne L_dont_use_mmx
248 cmp edx,049656e69h
249 jne L_dont_use_mmx
250 mov eax,1
251 cpuid
252 shr eax,8
253 and eax,15
254 cmp eax,6
255 jne L_dont_use_mmx
256 test edx,0800000h
257 jnz L_use_mmx
258 jmp L_dont_use_mmx
259L_use_mmx:
260 mov dword ptr [inflate_fast_use_mmx],2
261 jmp L_check_mmx_pop
262L_dont_use_mmx:
263 mov dword ptr [inflate_fast_use_mmx],3
264L_check_mmx_pop:
265 pop edx
266 pop ecx
267 pop ebx
268 pop eax
269 jmp L_check_mmx
270; 426 "inffast.S"
271ALIGN 4
272L_do_loop:
273; 437 "inffast.S"
274 cmp bl,15
275 ja L_get_length_code
276
277 xor eax,eax
278 lodsw
279 mov cl,bl
280 add bl,16
281 shl eax,cl
282 or ebp,eax
283
284L_get_length_code:
285 mov edx, [esp+0]
286 mov ecx, [esp+8]
287 and edx,ebp
288 mov eax, [ecx+edx*4]
289
290L_dolen:
291
292
293
294
295
296
297 mov cl,ah
298 sub bl,ah
299 shr ebp,cl
300
301
302
303
304
305
306 test al,al
307 jnz L_test_for_length_base
308
309 shr eax,16
310 stosb
311
312L_while_test:
313
314
315 cmp [esp+16],edi
316 jbe L_break_loop
317
318 cmp [esp+20],esi
319 ja L_do_loop
320 jmp L_break_loop
321
322L_test_for_length_base:
323; 502 "inffast.S"
324 mov edx,eax
325 shr edx,16
326 mov cl,al
327
328 test al,16
329 jz L_test_for_second_level_length
330 and cl,15
331 jz L_save_len
332 cmp bl,cl
333 jae L_add_bits_to_len
334
335 mov ch,cl
336 xor eax,eax
337 lodsw
338 mov cl,bl
339 add bl,16
340 shl eax,cl
341 or ebp,eax
342 mov cl,ch
343
344L_add_bits_to_len:
345 mov eax,1
346 shl eax,cl
347 dec eax
348 sub bl,cl
349 and eax,ebp
350 shr ebp,cl
351 add edx,eax
352
353L_save_len:
354 mov [esp+24],edx
355
356
357L_decode_distance:
358; 549 "inffast.S"
359 cmp bl,15
360 ja L_get_distance_code
361
362 xor eax,eax
363 lodsw
364 mov cl,bl
365 add bl,16
366 shl eax,cl
367 or ebp,eax
368
369L_get_distance_code:
370 mov edx, [esp+4]
371 mov ecx, [esp+12]
372 and edx,ebp
373 mov eax, [ecx+edx*4]
374
375
376L_dodist:
377 mov edx,eax
378 shr edx,16
379 mov cl,ah
380 sub bl,ah
381 shr ebp,cl
382; 584 "inffast.S"
383 mov cl,al
384
385 test al,16
386 jz L_test_for_second_level_dist
387 and cl,15
388 jz L_check_dist_one
389 cmp bl,cl
390 jae L_add_bits_to_dist
391
392 mov ch,cl
393 xor eax,eax
394 lodsw
395 mov cl,bl
396 add bl,16
397 shl eax,cl
398 or ebp,eax
399 mov cl,ch
400
401L_add_bits_to_dist:
402 mov eax,1
403 shl eax,cl
404 dec eax
405 sub bl,cl
406 and eax,ebp
407 shr ebp,cl
408 add edx,eax
409 jmp L_check_window
410
411L_check_window:
412; 625 "inffast.S"
413 mov [esp+44],esi
414 mov eax,edi
415 sub eax, [esp+40]
416
417 cmp eax,edx
418 jb L_clip_window
419
420 mov ecx, [esp+24]
421 mov esi,edi
422 sub esi,edx
423
424 sub ecx,3
425 mov al, [esi]
426 mov [edi],al
427 mov al, [esi+1]
428 mov dl, [esi+2]
429 add esi,3
430 mov [edi+1],al
431 mov [edi+2],dl
432 add edi,3
433 rep movsb
434
435 mov esi, [esp+44]
436 jmp L_while_test
437
438ALIGN 4
439L_check_dist_one:
440 cmp edx,1
441 jne L_check_window
442 cmp [esp+40],edi
443 je L_check_window
444
445 dec edi
446 mov ecx, [esp+24]
447 mov al, [edi]
448 sub ecx,3
449
450 mov [edi+1],al
451 mov [edi+2],al
452 mov [edi+3],al
453 add edi,4
454 rep stosb
455
456 jmp L_while_test
457
458ALIGN 4
459L_test_for_second_level_length:
460
461
462
463
464 test al,64
465 jnz L_test_for_end_of_block
466
467 mov eax,1
468 shl eax,cl
469 dec eax
470 and eax,ebp
471 add eax,edx
472 mov edx, [esp+8]
473 mov eax, [edx+eax*4]
474 jmp L_dolen
475
476ALIGN 4
477L_test_for_second_level_dist:
478
479
480
481
482 test al,64
483 jnz L_invalid_distance_code
484
485 mov eax,1
486 shl eax,cl
487 dec eax
488 and eax,ebp
489 add eax,edx
490 mov edx, [esp+12]
491 mov eax, [edx+eax*4]
492 jmp L_dodist
493
494ALIGN 4
495L_clip_window:
496; 721 "inffast.S"
497 mov ecx,eax
498 mov eax, [esp+52]
499 neg ecx
500 mov esi, [esp+56]
501
502 cmp eax,edx
503 jb L_invalid_distance_too_far
504
505 add ecx,edx
506 cmp dword ptr [esp+48],0
507 jne L_wrap_around_window
508
509 sub eax,ecx
510 add esi,eax
511; 749 "inffast.S"
512 mov eax, [esp+24]
513 cmp eax,ecx
514 jbe L_do_copy1
515
516 sub eax,ecx
517 rep movsb
518 mov esi,edi
519 sub esi,edx
520 jmp L_do_copy1
521
522 cmp eax,ecx
523 jbe L_do_copy1
524
525 sub eax,ecx
526 rep movsb
527 mov esi,edi
528 sub esi,edx
529 jmp L_do_copy1
530
531L_wrap_around_window:
532; 793 "inffast.S"
533 mov eax, [esp+48]
534 cmp ecx,eax
535 jbe L_contiguous_in_window
536
537 add esi, [esp+52]
538 add esi,eax
539 sub esi,ecx
540 sub ecx,eax
541
542
543 mov eax, [esp+24]
544 cmp eax,ecx
545 jbe L_do_copy1
546
547 sub eax,ecx
548 rep movsb
549 mov esi, [esp+56]
550 mov ecx, [esp+48]
551 cmp eax,ecx
552 jbe L_do_copy1
553
554 sub eax,ecx
555 rep movsb
556 mov esi,edi
557 sub esi,edx
558 jmp L_do_copy1
559
560L_contiguous_in_window:
561; 836 "inffast.S"
562 add esi,eax
563 sub esi,ecx
564
565
566 mov eax, [esp+24]
567 cmp eax,ecx
568 jbe L_do_copy1
569
570 sub eax,ecx
571 rep movsb
572 mov esi,edi
573 sub esi,edx
574
575L_do_copy1:
576; 862 "inffast.S"
577 mov ecx,eax
578 rep movsb
579
580 mov esi, [esp+44]
581 jmp L_while_test
582; 878 "inffast.S"
583ALIGN 4
584L_init_mmx:
585 emms
586
587
588
589
590
591 movd mm0,ebp
592 mov ebp,ebx
593; 896 "inffast.S"
594 movd mm4,[esp+0]
595 movq mm3,mm4
596 movd mm5,[esp+4]
597 movq mm2,mm5
598 pxor mm1,mm1
599 mov ebx, [esp+8]
600 jmp L_do_loop_mmx
601
602ALIGN 4
603L_do_loop_mmx:
604 psrlq mm0,mm1
605
606 cmp ebp,32
607 ja L_get_length_code_mmx
608
609 movd mm6,ebp
610 movd mm7,[esi]
611 add esi,4
612 psllq mm7,mm6
613 add ebp,32
614 por mm0,mm7
615
616L_get_length_code_mmx:
617 pand mm4,mm0
618 movd eax,mm4
619 movq mm4,mm3
620 mov eax, [ebx+eax*4]
621
622L_dolen_mmx:
623 movzx ecx,ah
624 movd mm1,ecx
625 sub ebp,ecx
626
627 test al,al
628 jnz L_test_for_length_base_mmx
629
630 shr eax,16
631 stosb
632
633L_while_test_mmx:
634
635
636 cmp [esp+16],edi
637 jbe L_break_loop
638
639 cmp [esp+20],esi
640 ja L_do_loop_mmx
641 jmp L_break_loop
642
643L_test_for_length_base_mmx:
644
645 mov edx,eax
646 shr edx,16
647
648 test al,16
649 jz L_test_for_second_level_length_mmx
650 and eax,15
651 jz L_decode_distance_mmx
652
653 psrlq mm0,mm1
654 movd mm1,eax
655 movd ecx,mm0
656 sub ebp,eax
657 and ecx, [inflate_fast_mask+eax*4]
658 add edx,ecx
659
660L_decode_distance_mmx:
661 psrlq mm0,mm1
662
663 cmp ebp,32
664 ja L_get_dist_code_mmx
665
666 movd mm6,ebp
667 movd mm7,[esi]
668 add esi,4
669 psllq mm7,mm6
670 add ebp,32
671 por mm0,mm7
672
673L_get_dist_code_mmx:
674 mov ebx, [esp+12]
675 pand mm5,mm0
676 movd eax,mm5
677 movq mm5,mm2
678 mov eax, [ebx+eax*4]
679
680L_dodist_mmx:
681
682 movzx ecx,ah
683 mov ebx,eax
684 shr ebx,16
685 sub ebp,ecx
686 movd mm1,ecx
687
688 test al,16
689 jz L_test_for_second_level_dist_mmx
690 and eax,15
691 jz L_check_dist_one_mmx
692
693L_add_bits_to_dist_mmx:
694 psrlq mm0,mm1
695 movd mm1,eax
696 movd ecx,mm0
697 sub ebp,eax
698 and ecx, [inflate_fast_mask+eax*4]
699 add ebx,ecx
700
701L_check_window_mmx:
702 mov [esp+44],esi
703 mov eax,edi
704 sub eax, [esp+40]
705
706 cmp eax,ebx
707 jb L_clip_window_mmx
708
709 mov ecx,edx
710 mov esi,edi
711 sub esi,ebx
712
713 sub ecx,3
714 mov al, [esi]
715 mov [edi],al
716 mov al, [esi+1]
717 mov dl, [esi+2]
718 add esi,3
719 mov [edi+1],al
720 mov [edi+2],dl
721 add edi,3
722 rep movsb
723
724 mov esi, [esp+44]
725 mov ebx, [esp+8]
726 jmp L_while_test_mmx
727
728ALIGN 4
729L_check_dist_one_mmx:
730 cmp ebx,1
731 jne L_check_window_mmx
732 cmp [esp+40],edi
733 je L_check_window_mmx
734
735 dec edi
736 mov ecx,edx
737 mov al, [edi]
738 sub ecx,3
739
740 mov [edi+1],al
741 mov [edi+2],al
742 mov [edi+3],al
743 add edi,4
744 rep stosb
745
746 mov ebx, [esp+8]
747 jmp L_while_test_mmx
748
749ALIGN 4
750L_test_for_second_level_length_mmx:
751 test al,64
752 jnz L_test_for_end_of_block
753
754 and eax,15
755 psrlq mm0,mm1
756 movd ecx,mm0
757 and ecx, [inflate_fast_mask+eax*4]
758 add ecx,edx
759 mov eax, [ebx+ecx*4]
760 jmp L_dolen_mmx
761
762ALIGN 4
763L_test_for_second_level_dist_mmx:
764 test al,64
765 jnz L_invalid_distance_code
766
767 and eax,15
768 psrlq mm0,mm1
769 movd ecx,mm0
770 and ecx, [inflate_fast_mask+eax*4]
771 mov eax, [esp+12]
772 add ecx,ebx
773 mov eax, [eax+ecx*4]
774 jmp L_dodist_mmx
775
776ALIGN 4
777L_clip_window_mmx:
778
779 mov ecx,eax
780 mov eax, [esp+52]
781 neg ecx
782 mov esi, [esp+56]
783
784 cmp eax,ebx
785 jb L_invalid_distance_too_far
786
787 add ecx,ebx
788 cmp dword ptr [esp+48],0
789 jne L_wrap_around_window_mmx
790
791 sub eax,ecx
792 add esi,eax
793
794 cmp edx,ecx
795 jbe L_do_copy1_mmx
796
797 sub edx,ecx
798 rep movsb
799 mov esi,edi
800 sub esi,ebx
801 jmp L_do_copy1_mmx
802
803 cmp edx,ecx
804 jbe L_do_copy1_mmx
805
806 sub edx,ecx
807 rep movsb
808 mov esi,edi
809 sub esi,ebx
810 jmp L_do_copy1_mmx
811
812L_wrap_around_window_mmx:
813
814 mov eax, [esp+48]
815 cmp ecx,eax
816 jbe L_contiguous_in_window_mmx
817
818 add esi, [esp+52]
819 add esi,eax
820 sub esi,ecx
821 sub ecx,eax
822
823
824 cmp edx,ecx
825 jbe L_do_copy1_mmx
826
827 sub edx,ecx
828 rep movsb
829 mov esi, [esp+56]
830 mov ecx, [esp+48]
831 cmp edx,ecx
832 jbe L_do_copy1_mmx
833
834 sub edx,ecx
835 rep movsb
836 mov esi,edi
837 sub esi,ebx
838 jmp L_do_copy1_mmx
839
840L_contiguous_in_window_mmx:
841
842 add esi,eax
843 sub esi,ecx
844
845
846 cmp edx,ecx
847 jbe L_do_copy1_mmx
848
849 sub edx,ecx
850 rep movsb
851 mov esi,edi
852 sub esi,ebx
853
854L_do_copy1_mmx:
855
856
857 mov ecx,edx
858 rep movsb
859
860 mov esi, [esp+44]
861 mov ebx, [esp+8]
862 jmp L_while_test_mmx
863; 1174 "inffast.S"
864L_invalid_distance_code:
865
866
867
868
869
870 mov ecx, invalid_distance_code_msg
871 mov edx,26
872 jmp L_update_stream_state
873
874L_test_for_end_of_block:
875
876
877
878
879
880 test al,32
881 jz L_invalid_literal_length_code
882
883 mov ecx,0
884 mov edx,11
885 jmp L_update_stream_state
886
887L_invalid_literal_length_code:
888
889
890
891
892
893 mov ecx, invalid_literal_length_code_msg
894 mov edx,26
895 jmp L_update_stream_state
896
897L_invalid_distance_too_far:
898
899
900
901 mov esi, [esp+44]
902 mov ecx, invalid_distance_too_far_msg
903 mov edx,26
904 jmp L_update_stream_state
905
906L_update_stream_state:
907
908 mov eax, [esp+88]
909 test ecx,ecx
910 jz L_skip_msg
911 mov [eax+24],ecx
912L_skip_msg:
913 mov eax, [eax+28]
914 mov [eax+mode_state],edx
915 jmp L_break_loop
916
917ALIGN 4
918L_break_loop:
919; 1243 "inffast.S"
920 cmp dword ptr [inflate_fast_use_mmx],2
921 jne L_update_next_in
922
923
924
925 mov ebx,ebp
926
927L_update_next_in:
928; 1266 "inffast.S"
929 mov eax, [esp+88]
930 mov ecx,ebx
931 mov edx, [eax+28]
932 shr ecx,3
933 sub esi,ecx
934 shl ecx,3
935 sub ebx,ecx
936 mov [eax+12],edi
937 mov [edx+bits_state],ebx
938 mov ecx,ebx
939
940 lea ebx, [esp+28]
941 cmp [esp+20],ebx
942 jne L_buf_not_used
943
944 sub esi,ebx
945 mov ebx, [eax+0]
946 mov [esp+20],ebx
947 add esi,ebx
948 mov ebx, [eax+4]
949 sub ebx,11
950 add [esp+20],ebx
951
952L_buf_not_used:
953 mov [eax+0],esi
954
955 mov ebx,1
956 shl ebx,cl
957 dec ebx
958
959
960
961
962
963 cmp dword ptr [inflate_fast_use_mmx],2
964 jne L_update_hold
965
966
967
968 psrlq mm0,mm1
969 movd ebp,mm0
970
971 emms
972
973L_update_hold:
974
975
976
977 and ebp,ebx
978 mov [edx+hold_state],ebp
979
980
981
982
983 mov ebx, [esp+20]
984 cmp ebx,esi
985 jbe L_last_is_smaller
986
987 sub ebx,esi
988 add ebx,11
989 mov [eax+4],ebx
990 jmp L_fixup_out
991L_last_is_smaller:
992 sub esi,ebx
993 neg esi
994 add esi,11
995 mov [eax+4],esi
996
997
998
999
1000L_fixup_out:
1001
1002 mov ebx, [esp+16]
1003 cmp ebx,edi
1004 jbe L_end_is_smaller
1005
1006 sub ebx,edi
1007 add ebx,257
1008 mov [eax+16],ebx
1009 jmp L_done
1010L_end_is_smaller:
1011 sub edi,ebx
1012 neg edi
1013 add edi,257
1014 mov [eax+16],edi
1015
1016
1017
1018
1019
1020L_done:
1021 add esp,64
1022 popfd
1023 pop ebx
1024 pop ebp
1025 pop esi
1026 pop edi
1027 ret
1028
1029
1030
1031
1032_TEXT ends
1033end
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette