VirtualBox

source: vbox/trunk/src/libs/zlib-1.2.1/contrib/masmx86/gvmat32.asm@ 16236

Last change on this file since 16236 was 6392, checked in by vboxsync, 17 years ago

export libpng and zlib so Windows and OS/2 builds cleanly.

  • Property svn:eol-style set to native
File size: 21.1 KB
Line 
1;
2; gvmat32.asm -- Asm portion of the optimized longest_match for 32 bits x86
3; Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant.
4; File written by Gilles Vollant, by modifiying the longest_match
5; from Jean-loup Gailly in deflate.c
6; It need wmask == 0x7fff
7; (assembly code is faster with a fixed wmask)
8;
9; For Visual C++ 4.2 and ML 6.11c (version in directory \MASM611C of Win95 DDK)
10; I compile with : "ml /coff /Zi /c gvmat32.asm"
11;
12
13;uInt longest_match_7fff(s, cur_match)
14; deflate_state *s;
15; IPos cur_match; /* current match */
16
17 NbStack equ 76
18 cur_match equ dword ptr[esp+NbStack-0]
19 str_s equ dword ptr[esp+NbStack-4]
20; 5 dword on top (ret,ebp,esi,edi,ebx)
21 adrret equ dword ptr[esp+NbStack-8]
22 pushebp equ dword ptr[esp+NbStack-12]
23 pushedi equ dword ptr[esp+NbStack-16]
24 pushesi equ dword ptr[esp+NbStack-20]
25 pushebx equ dword ptr[esp+NbStack-24]
26
27 chain_length equ dword ptr [esp+NbStack-28]
28 limit equ dword ptr [esp+NbStack-32]
29 best_len equ dword ptr [esp+NbStack-36]
30 window equ dword ptr [esp+NbStack-40]
31 prev equ dword ptr [esp+NbStack-44]
32 scan_start equ word ptr [esp+NbStack-48]
33 wmask equ dword ptr [esp+NbStack-52]
34 match_start_ptr equ dword ptr [esp+NbStack-56]
35 nice_match equ dword ptr [esp+NbStack-60]
36 scan equ dword ptr [esp+NbStack-64]
37
38 windowlen equ dword ptr [esp+NbStack-68]
39 match_start equ dword ptr [esp+NbStack-72]
40 strend equ dword ptr [esp+NbStack-76]
41 NbStackAdd equ (NbStack-24)
42
43 .386p
44
45 name gvmatch
46 .MODEL FLAT
47
48
49
50; all the +4 offsets are due to the addition of pending_buf_size (in zlib
51; in the deflate_state structure since the asm code was first written
52; (if you compile with zlib 1.0.4 or older, remove the +4).
53; Note : these value are good with a 8 bytes boundary pack structure
54 dep_chain_length equ 70h+4
55 dep_window equ 2ch+4
56 dep_strstart equ 60h+4
57 dep_prev_length equ 6ch+4
58 dep_nice_match equ 84h+4
59 dep_w_size equ 20h+4
60 dep_prev equ 34h+4
61 dep_w_mask equ 28h+4
62 dep_good_match equ 80h+4
63 dep_match_start equ 64h+4
64 dep_lookahead equ 68h+4
65
66
67_TEXT segment
68
69IFDEF NOUNDERLINE
70 public longest_match_7fff
71 public longest_match_686
72; public match_init
73ELSE
74 public _longest_match_7fff
75 public _longest_match_686
76; public _match_init
77ENDIF
78
79 MAX_MATCH equ 258
80 MIN_MATCH equ 3
81 MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1)
82
83
84
85IFDEF NOUNDERLINE
86;match_init proc near
87; ret
88;match_init endp
89ELSE
90;_match_init proc near
91; ret
92;_match_init endp
93ENDIF
94
95
96IFDEF NOUNDERLINE
97longest_match_7fff proc near
98ELSE
99_longest_match_7fff proc near
100ENDIF
101
102 mov edx,[esp+4]
103
104
105
106 push ebp
107 push edi
108 push esi
109 push ebx
110
111 sub esp,NbStackAdd
112
113; initialize or check the variables used in match.asm.
114 mov ebp,edx
115
116; chain_length = s->max_chain_length
117; if (prev_length>=good_match) chain_length >>= 2
118 mov edx,[ebp+dep_chain_length]
119 mov ebx,[ebp+dep_prev_length]
120 cmp [ebp+dep_good_match],ebx
121 ja noshr
122 shr edx,2
123noshr:
124; we increment chain_length because in the asm, the --chain_lenght is in the beginning of the loop
125 inc edx
126 mov edi,[ebp+dep_nice_match]
127 mov chain_length,edx
128 mov eax,[ebp+dep_lookahead]
129 cmp eax,edi
130; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
131 jae nolookaheadnicematch
132 mov edi,eax
133nolookaheadnicematch:
134; best_len = s->prev_length
135 mov best_len,ebx
136
137; window = s->window
138 mov esi,[ebp+dep_window]
139 mov ecx,[ebp+dep_strstart]
140 mov window,esi
141
142 mov nice_match,edi
143; scan = window + strstart
144 add esi,ecx
145 mov scan,esi
146; dx = *window
147 mov dx,word ptr [esi]
148; bx = *(window+best_len-1)
149 mov bx,word ptr [esi+ebx-1]
150 add esi,MAX_MATCH-1
151; scan_start = *scan
152 mov scan_start,dx
153; strend = scan + MAX_MATCH-1
154 mov strend,esi
155; bx = scan_end = *(window+best_len-1)
156
157; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
158; s->strstart - (IPos)MAX_DIST(s) : NIL;
159
160 mov esi,[ebp+dep_w_size]
161 sub esi,MIN_LOOKAHEAD
162; here esi = MAX_DIST(s)
163 sub ecx,esi
164 ja nodist
165 xor ecx,ecx
166nodist:
167 mov limit,ecx
168
169; prev = s->prev
170 mov edx,[ebp+dep_prev]
171 mov prev,edx
172
173;
174 mov edx,dword ptr [ebp+dep_match_start]
175 mov bp,scan_start
176 mov eax,cur_match
177 mov match_start,edx
178
179 mov edx,window
180 mov edi,edx
181 add edi,best_len
182 mov esi,prev
183 dec edi
184; windowlen = window + best_len -1
185 mov windowlen,edi
186
187 jmp beginloop2
188 align 4
189
190; here, in the loop
191; eax = ax = cur_match
192; ecx = limit
193; bx = scan_end
194; bp = scan_start
195; edi = windowlen (window + best_len -1)
196; esi = prev
197
198
199;// here; chain_length <=16
200normalbeg0add16:
201 add chain_length,16
202 jz exitloop
203normalbeg0:
204 cmp word ptr[edi+eax],bx
205 je normalbeg2noroll
206rcontlabnoroll:
207; cur_match = prev[cur_match & wmask]
208 and eax,7fffh
209 mov ax,word ptr[esi+eax*2]
210; if cur_match > limit, go to exitloop
211 cmp ecx,eax
212 jnb exitloop
213; if --chain_length != 0, go to exitloop
214 dec chain_length
215 jnz normalbeg0
216 jmp exitloop
217
218normalbeg2noroll:
219; if (scan_start==*(cur_match+window)) goto normalbeg2
220 cmp bp,word ptr[edx+eax]
221 jne rcontlabnoroll
222 jmp normalbeg2
223
224contloop3:
225 mov edi,windowlen
226
227; cur_match = prev[cur_match & wmask]
228 and eax,7fffh
229 mov ax,word ptr[esi+eax*2]
230; if cur_match > limit, go to exitloop
231 cmp ecx,eax
232jnbexitloopshort1:
233 jnb exitloop
234; if --chain_length != 0, go to exitloop
235
236
237; begin the main loop
238beginloop2:
239 sub chain_length,16+1
240; if chain_length <=16, don't use the unrolled loop
241 jna normalbeg0add16
242
243do16:
244 cmp word ptr[edi+eax],bx
245 je normalbeg2dc0
246
247maccn MACRO lab
248 and eax,7fffh
249 mov ax,word ptr[esi+eax*2]
250 cmp ecx,eax
251 jnb exitloop
252 cmp word ptr[edi+eax],bx
253 je lab
254 ENDM
255
256rcontloop0:
257 maccn normalbeg2dc1
258
259rcontloop1:
260 maccn normalbeg2dc2
261
262rcontloop2:
263 maccn normalbeg2dc3
264
265rcontloop3:
266 maccn normalbeg2dc4
267
268rcontloop4:
269 maccn normalbeg2dc5
270
271rcontloop5:
272 maccn normalbeg2dc6
273
274rcontloop6:
275 maccn normalbeg2dc7
276
277rcontloop7:
278 maccn normalbeg2dc8
279
280rcontloop8:
281 maccn normalbeg2dc9
282
283rcontloop9:
284 maccn normalbeg2dc10
285
286rcontloop10:
287 maccn short normalbeg2dc11
288
289rcontloop11:
290 maccn short normalbeg2dc12
291
292rcontloop12:
293 maccn short normalbeg2dc13
294
295rcontloop13:
296 maccn short normalbeg2dc14
297
298rcontloop14:
299 maccn short normalbeg2dc15
300
301rcontloop15:
302 and eax,7fffh
303 mov ax,word ptr[esi+eax*2]
304 cmp ecx,eax
305 jnb exitloop
306
307 sub chain_length,16
308 ja do16
309 jmp normalbeg0add16
310
311;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
312
313normbeg MACRO rcontlab,valsub
314; if we are here, we know that *(match+best_len-1) == scan_end
315 cmp bp,word ptr[edx+eax]
316; if (match != scan_start) goto rcontlab
317 jne rcontlab
318; calculate the good chain_length, and we'll compare scan and match string
319 add chain_length,16-valsub
320 jmp iseq
321 ENDM
322
323
324normalbeg2dc11:
325 normbeg rcontloop11,11
326
327normalbeg2dc12:
328 normbeg short rcontloop12,12
329
330normalbeg2dc13:
331 normbeg short rcontloop13,13
332
333normalbeg2dc14:
334 normbeg short rcontloop14,14
335
336normalbeg2dc15:
337 normbeg short rcontloop15,15
338
339normalbeg2dc10:
340 normbeg rcontloop10,10
341
342normalbeg2dc9:
343 normbeg rcontloop9,9
344
345normalbeg2dc8:
346 normbeg rcontloop8,8
347
348normalbeg2dc7:
349 normbeg rcontloop7,7
350
351normalbeg2dc6:
352 normbeg rcontloop6,6
353
354normalbeg2dc5:
355 normbeg rcontloop5,5
356
357normalbeg2dc4:
358 normbeg rcontloop4,4
359
360normalbeg2dc3:
361 normbeg rcontloop3,3
362
363normalbeg2dc2:
364 normbeg rcontloop2,2
365
366normalbeg2dc1:
367 normbeg rcontloop1,1
368
369normalbeg2dc0:
370 normbeg rcontloop0,0
371
372
373; we go in normalbeg2 because *(ushf*)(match+best_len-1) == scan_end
374
375normalbeg2:
376 mov edi,window
377
378 cmp bp,word ptr[edi+eax]
379 jne contloop3 ; if *(ushf*)match != scan_start, continue
380
381iseq:
382; if we are here, we know that *(match+best_len-1) == scan_end
383; and (match == scan_start)
384
385 mov edi,edx
386 mov esi,scan ; esi = scan
387 add edi,eax ; edi = window + cur_match = match
388
389 mov edx,[esi+3] ; compare manually dword at match+3
390 xor edx,[edi+3] ; and scan +3
391
392 jz begincompare ; if equal, go to long compare
393
394; we will determine the unmatch byte and calculate len (in esi)
395 or dl,dl
396 je eq1rr
397 mov esi,3
398 jmp trfinval
399eq1rr:
400 or dx,dx
401 je eq1
402
403 mov esi,4
404 jmp trfinval
405eq1:
406 and edx,0ffffffh
407 jz eq11
408 mov esi,5
409 jmp trfinval
410eq11:
411 mov esi,6
412 jmp trfinval
413
414begincompare:
415 ; here we now scan and match begin same
416 add edi,6
417 add esi,6
418 mov ecx,(MAX_MATCH-(2+4))/4 ; scan for at most MAX_MATCH bytes
419 repe cmpsd ; loop until mismatch
420
421 je trfin ; go to trfin if not unmatch
422; we determine the unmatch byte
423 sub esi,4
424 mov edx,[edi-4]
425 xor edx,[esi]
426
427 or dl,dl
428 jnz trfin
429 inc esi
430
431 or dx,dx
432 jnz trfin
433 inc esi
434
435 and edx,0ffffffh
436 jnz trfin
437 inc esi
438
439trfin:
440 sub esi,scan ; esi = len
441trfinval:
442; here we have finised compare, and esi contain len of equal string
443 cmp esi,best_len ; if len > best_len, go newbestlen
444 ja short newbestlen
445; now we restore edx, ecx and esi, for the big loop
446 mov esi,prev
447 mov ecx,limit
448 mov edx,window
449 jmp contloop3
450
451newbestlen:
452 mov best_len,esi ; len become best_len
453
454 mov match_start,eax ; save new position as match_start
455 cmp esi,nice_match ; if best_len >= nice_match, exit
456 jae exitloop
457 mov ecx,scan
458 mov edx,window ; restore edx=window
459 add ecx,esi
460 add esi,edx
461
462 dec esi
463 mov windowlen,esi ; windowlen = window + best_len-1
464 mov bx,[ecx-1] ; bx = *(scan+best_len-1) = scan_end
465
466; now we restore ecx and esi, for the big loop :
467 mov esi,prev
468 mov ecx,limit
469 jmp contloop3
470
471exitloop:
472; exit : s->match_start=match_start
473 mov ebx,match_start
474 mov ebp,str_s
475 mov ecx,best_len
476 mov dword ptr [ebp+dep_match_start],ebx
477 mov eax,dword ptr [ebp+dep_lookahead]
478 cmp ecx,eax
479 ja minexlo
480 mov eax,ecx
481minexlo:
482; return min(best_len,s->lookahead)
483
484; restore stack and register ebx,esi,edi,ebp
485 add esp,NbStackAdd
486
487 pop ebx
488 pop esi
489 pop edi
490 pop ebp
491 ret
492InfoAuthor:
493; please don't remove this string !
494; Your are free use gvmat32 in any fre or commercial apps if you don't remove the string in the binary!
495 db 0dh,0ah,"GVMat32 optimised assembly code written 1996-98 by Gilles Vollant",0dh,0ah
496
497
498
499IFDEF NOUNDERLINE
500longest_match_7fff endp
501ELSE
502_longest_match_7fff endp
503ENDIF
504
505
506IFDEF NOUNDERLINE
507cpudetect32 proc near
508ELSE
509_cpudetect32 proc near
510ENDIF
511
512 push ebx
513
514 pushfd ; push original EFLAGS
515 pop eax ; get original EFLAGS
516 mov ecx, eax ; save original EFLAGS
517 xor eax, 40000h ; flip AC bit in EFLAGS
518 push eax ; save new EFLAGS value on stack
519 popfd ; replace current EFLAGS value
520 pushfd ; get new EFLAGS
521 pop eax ; store new EFLAGS in EAX
522 xor eax, ecx ; can’t toggle AC bit, processor=80386
523 jz end_cpu_is_386 ; jump if 80386 processor
524 push ecx
525 popfd ; restore AC bit in EFLAGS first
526
527 pushfd
528 pushfd
529 pop ecx
530
531 mov eax, ecx ; get original EFLAGS
532 xor eax, 200000h ; flip ID bit in EFLAGS
533 push eax ; save new EFLAGS value on stack
534 popfd ; replace current EFLAGS value
535 pushfd ; get new EFLAGS
536 pop eax ; store new EFLAGS in EAX
537 popfd ; restore original EFLAGS
538 xor eax, ecx ; can’t toggle ID bit,
539 je is_old_486 ; processor=old
540
541 mov eax,1
542 db 0fh,0a2h ;CPUID
543
544exitcpudetect:
545 pop ebx
546 ret
547
548end_cpu_is_386:
549 mov eax,0300h
550 jmp exitcpudetect
551
552is_old_486:
553 mov eax,0400h
554 jmp exitcpudetect
555
556IFDEF NOUNDERLINE
557cpudetect32 endp
558ELSE
559_cpudetect32 endp
560ENDIF
561
562
563
564
565MAX_MATCH equ 258
566MIN_MATCH equ 3
567MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1)
568MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h)
569
570
571;;; stack frame offsets
572
573chainlenwmask equ esp + 0 ; high word: current chain len
574 ; low word: s->wmask
575window equ esp + 4 ; local copy of s->window
576windowbestlen equ esp + 8 ; s->window + bestlen
577scanstart equ esp + 16 ; first two bytes of string
578scanend equ esp + 12 ; last two bytes of string
579scanalign equ esp + 20 ; dword-misalignment of string
580nicematch equ esp + 24 ; a good enough match size
581bestlen equ esp + 28 ; size of best match so far
582scan equ esp + 32 ; ptr to string wanting match
583
584LocalVarsSize equ 36
585; saved ebx byte esp + 36
586; saved edi byte esp + 40
587; saved esi byte esp + 44
588; saved ebp byte esp + 48
589; return address byte esp + 52
590deflatestate equ esp + 56 ; the function arguments
591curmatch equ esp + 60
592
593;;; Offsets for fields in the deflate_state structure. These numbers
594;;; are calculated from the definition of deflate_state, with the
595;;; assumption that the compiler will dword-align the fields. (Thus,
596;;; changing the definition of deflate_state could easily cause this
597;;; program to crash horribly, without so much as a warning at
598;;; compile time. Sigh.)
599
600dsWSize equ 36
601dsWMask equ 44
602dsWindow equ 48
603dsPrev equ 56
604dsMatchLen equ 88
605dsPrevMatch equ 92
606dsStrStart equ 100
607dsMatchStart equ 104
608dsLookahead equ 108
609dsPrevLen equ 112
610dsMaxChainLen equ 116
611dsGoodMatch equ 132
612dsNiceMatch equ 136
613
614
615;;; match.asm -- Pentium-Pro-optimized version of longest_match()
616;;; Written for zlib 1.1.2
617;;; Copyright (C) 1998 Brian Raiter <[email protected]>
618;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html
619;;;
620;;; This is free software; you can redistribute it and/or modify it
621;;; under the terms of the GNU General Public License.
622
623;GLOBAL _longest_match, _match_init
624
625
626;SECTION .text
627
628;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch)
629
630;_longest_match:
631IFDEF NOUNDERLINE
632longest_match_686 proc near
633ELSE
634_longest_match_686 proc near
635ENDIF
636
637
638;;; Save registers that the compiler may be using, and adjust esp to
639;;; make room for our stack frame.
640
641 push ebp
642 push edi
643 push esi
644 push ebx
645 sub esp, LocalVarsSize
646
647;;; Retrieve the function arguments. ecx will hold cur_match
648;;; throughout the entire function. edx will hold the pointer to the
649;;; deflate_state structure during the function's setup (before
650;;; entering the main loop.
651
652 mov edx, [deflatestate]
653 mov ecx, [curmatch]
654
655;;; uInt wmask = s->w_mask;
656;;; unsigned chain_length = s->max_chain_length;
657;;; if (s->prev_length >= s->good_match) {
658;;; chain_length >>= 2;
659;;; }
660
661 mov eax, [edx + dsPrevLen]
662 mov ebx, [edx + dsGoodMatch]
663 cmp eax, ebx
664 mov eax, [edx + dsWMask]
665 mov ebx, [edx + dsMaxChainLen]
666 jl LastMatchGood
667 shr ebx, 2
668LastMatchGood:
669
670;;; chainlen is decremented once beforehand so that the function can
671;;; use the sign flag instead of the zero flag for the exit test.
672;;; It is then shifted into the high word, to make room for the wmask
673;;; value, which it will always accompany.
674
675 dec ebx
676 shl ebx, 16
677 or ebx, eax
678 mov [chainlenwmask], ebx
679
680;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
681
682 mov eax, [edx + dsNiceMatch]
683 mov ebx, [edx + dsLookahead]
684 cmp ebx, eax
685 jl LookaheadLess
686 mov ebx, eax
687LookaheadLess: mov [nicematch], ebx
688
689;;; register Bytef *scan = s->window + s->strstart;
690
691 mov esi, [edx + dsWindow]
692 mov [window], esi
693 mov ebp, [edx + dsStrStart]
694 lea edi, [esi + ebp]
695 mov [scan], edi
696
697;;; Determine how many bytes the scan ptr is off from being
698;;; dword-aligned.
699
700 mov eax, edi
701 neg eax
702 and eax, 3
703 mov [scanalign], eax
704
705;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
706;;; s->strstart - (IPos)MAX_DIST(s) : NIL;
707
708 mov eax, [edx + dsWSize]
709 sub eax, MIN_LOOKAHEAD
710 sub ebp, eax
711 jg LimitPositive
712 xor ebp, ebp
713LimitPositive:
714
715;;; int best_len = s->prev_length;
716
717 mov eax, [edx + dsPrevLen]
718 mov [bestlen], eax
719
720;;; Store the sum of s->window + best_len in esi locally, and in esi.
721
722 add esi, eax
723 mov [windowbestlen], esi
724
725;;; register ush scan_start = *(ushf*)scan;
726;;; register ush scan_end = *(ushf*)(scan+best_len-1);
727;;; Posf *prev = s->prev;
728
729 movzx ebx, word ptr [edi]
730 mov [scanstart], ebx
731 movzx ebx, word ptr [edi + eax - 1]
732 mov [scanend], ebx
733 mov edi, [edx + dsPrev]
734
735;;; Jump into the main loop.
736
737 mov edx, [chainlenwmask]
738 jmp short LoopEntry
739
740align 4
741
742;;; do {
743;;; match = s->window + cur_match;
744;;; if (*(ushf*)(match+best_len-1) != scan_end ||
745;;; *(ushf*)match != scan_start) continue;
746;;; [...]
747;;; } while ((cur_match = prev[cur_match & wmask]) > limit
748;;; && --chain_length != 0);
749;;;
750;;; Here is the inner loop of the function. The function will spend the
751;;; majority of its time in this loop, and majority of that time will
752;;; be spent in the first ten instructions.
753;;;
754;;; Within this loop:
755;;; ebx = scanend
756;;; ecx = curmatch
757;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
758;;; esi = windowbestlen - i.e., (window + bestlen)
759;;; edi = prev
760;;; ebp = limit
761
762LookupLoop:
763 and ecx, edx
764 movzx ecx, word ptr [edi + ecx*2]
765 cmp ecx, ebp
766 jbe LeaveNow
767 sub edx, 00010000h
768 js LeaveNow
769LoopEntry: movzx eax, word ptr [esi + ecx - 1]
770 cmp eax, ebx
771 jnz LookupLoop
772 mov eax, [window]
773 movzx eax, word ptr [eax + ecx]
774 cmp eax, [scanstart]
775 jnz LookupLoop
776
777;;; Store the current value of chainlen.
778
779 mov [chainlenwmask], edx
780
781;;; Point edi to the string under scrutiny, and esi to the string we
782;;; are hoping to match it up with. In actuality, esi and edi are
783;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is
784;;; initialized to -(MAX_MATCH_8 - scanalign).
785
786 mov esi, [window]
787 mov edi, [scan]
788 add esi, ecx
789 mov eax, [scanalign]
790 mov edx, 0fffffef8h; -(MAX_MATCH_8)
791 lea edi, [edi + eax + 0108h] ;MAX_MATCH_8]
792 lea esi, [esi + eax + 0108h] ;MAX_MATCH_8]
793
794;;; Test the strings for equality, 8 bytes at a time. At the end,
795;;; adjust edx so that it is offset to the exact byte that mismatched.
796;;;
797;;; We already know at this point that the first three bytes of the
798;;; strings match each other, and they can be safely passed over before
799;;; starting the compare loop. So what this code does is skip over 0-3
800;;; bytes, as much as necessary in order to dword-align the edi
801;;; pointer. (esi will still be misaligned three times out of four.)
802;;;
803;;; It should be confessed that this loop usually does not represent
804;;; much of the total running time. Replacing it with a more
805;;; straightforward "rep cmpsb" would not drastically degrade
806;;; performance.
807
808LoopCmps:
809 mov eax, [esi + edx]
810 xor eax, [edi + edx]
811 jnz LeaveLoopCmps
812 mov eax, [esi + edx + 4]
813 xor eax, [edi + edx + 4]
814 jnz LeaveLoopCmps4
815 add edx, 8
816 jnz LoopCmps
817 jmp short LenMaximum
818LeaveLoopCmps4: add edx, 4
819LeaveLoopCmps: test eax, 0000FFFFh
820 jnz LenLower
821 add edx, 2
822 shr eax, 16
823LenLower: sub al, 1
824 adc edx, 0
825
826;;; Calculate the length of the match. If it is longer than MAX_MATCH,
827;;; then automatically accept it as the best possible match and leave.
828
829 lea eax, [edi + edx]
830 mov edi, [scan]
831 sub eax, edi
832 cmp eax, MAX_MATCH
833 jge LenMaximum
834
835;;; If the length of the match is not longer than the best match we
836;;; have so far, then forget it and return to the lookup loop.
837
838 mov edx, [deflatestate]
839 mov ebx, [bestlen]
840 cmp eax, ebx
841 jg LongerMatch
842 mov esi, [windowbestlen]
843 mov edi, [edx + dsPrev]
844 mov ebx, [scanend]
845 mov edx, [chainlenwmask]
846 jmp LookupLoop
847
848;;; s->match_start = cur_match;
849;;; best_len = len;
850;;; if (len >= nice_match) break;
851;;; scan_end = *(ushf*)(scan+best_len-1);
852
853LongerMatch: mov ebx, [nicematch]
854 mov [bestlen], eax
855 mov [edx + dsMatchStart], ecx
856 cmp eax, ebx
857 jge LeaveNow
858 mov esi, [window]
859 add esi, eax
860 mov [windowbestlen], esi
861 movzx ebx, word ptr [edi + eax - 1]
862 mov edi, [edx + dsPrev]
863 mov [scanend], ebx
864 mov edx, [chainlenwmask]
865 jmp LookupLoop
866
867;;; Accept the current string, with the maximum possible length.
868
869LenMaximum: mov edx, [deflatestate]
870 mov dword ptr [bestlen], MAX_MATCH
871 mov [edx + dsMatchStart], ecx
872
873;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
874;;; return s->lookahead;
875
876LeaveNow:
877 mov edx, [deflatestate]
878 mov ebx, [bestlen]
879 mov eax, [edx + dsLookahead]
880 cmp ebx, eax
881 jg LookaheadRet
882 mov eax, ebx
883LookaheadRet:
884
885;;; Restore the stack and return from whence we came.
886
887 add esp, LocalVarsSize
888 pop ebx
889 pop esi
890 pop edi
891 pop ebp
892
893 ret
894; please don't remove this string !
895; Your can freely use gvmat32 in any free or commercial app if you don't remove the string in the binary!
896 db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah
897
898IFDEF NOUNDERLINE
899longest_match_686 endp
900ELSE
901_longest_match_686 endp
902ENDIF
903
904_TEXT ends
905end
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette