VirtualBox

source: vbox/trunk/src/libs/zlib-1.2.1/contrib/inflate86/inffast.S@ 33944

Last change on this file since 33944 was 6392, checked in by vboxsync, 17 years ago

export libpng and zlib so Windows and OS/2 builds cleanly.

  • Property svn:eol-style set to native
File size: 42.0 KB
Line 
1/*
2 * inffast.S is a hand tuned assembler version of:
3 *
4 * inffast.c -- fast decoding
5 * Copyright (C) 1995-2003 Mark Adler
6 * For conditions of distribution and use, see copyright notice in zlib.h
7 *
8 * Copyright (C) 2003 Chris Anderson <[email protected]>
9 * Please use the copyright conditions above.
10 *
11 * This version (Jan-23-2003) of inflate_fast was coded and tested under
12 * GNU/Linux on a pentium 3, using the gcc-3.2 compiler distribution. On that
13 * machine, I found that gzip style archives decompressed about 20% faster than
14 * the gcc-3.2 -O3 -fomit-frame-pointer compiled version. Your results will
15 * depend on how large of a buffer is used for z_stream.next_in & next_out
16 * (8K-32K worked best for my 256K cpu cache) and how much overhead there is in
17 * stream processing I/O and crc32/addler32. In my case, this routine used
18 * 70% of the cpu time and crc32 used 20%.
19 *
20 * I am confident that this version will work in the general case, but I have
21 * not tested a wide variety of datasets or a wide variety of platforms.
22 *
23 * Jan-24-2003 -- Added -DUSE_MMX define for slightly faster inflating.
24 * It should be a runtime flag instead of compile time flag...
25 *
26 * Jan-26-2003 -- Added runtime check for MMX support with cpuid instruction.
27 * With -DUSE_MMX, only MMX code is compiled. With -DNO_MMX, only non-MMX code
28 * is compiled. Without either option, runtime detection is enabled. Runtime
29 * detection should work on all modern cpus and the recomended algorithm (flip
30 * ID bit on eflags and then use the cpuid instruction) is used in many
31 * multimedia applications. Tested under win2k with gcc-2.95 and gas-2.12
32 * distributed with cygwin3. Compiling with gcc-2.95 -c inffast.S -o
33 * inffast.obj generates a COFF object which can then be linked with MSVC++
34 * compiled code. Tested under FreeBSD 4.7 with gcc-2.95.
35 *
36 * Jan-28-2003 -- Tested Athlon XP... MMX mode is slower than no MMX (and
37 * slower than compiler generated code). Adjusted cpuid check to use the MMX
38 * code only for Pentiums < P4 until I have more data on the P4. Speed
39 * improvment is only about 15% on the Athlon when compared with code generated
40 * with MSVC++. Not sure yet, but I think the P4 will also be slower using the
41 * MMX mode because many of it's x86 ALU instructions execute in .5 cycles and
42 * have less latency than MMX ops. Added code to buffer the last 11 bytes of
43 * the input stream since the MMX code grabs bits in chunks of 32, which
44 * differs from the inffast.c algorithm. I don't think there would have been
45 * read overruns where a page boundary was crossed (a segfault), but there
46 * could have been overruns when next_in ends on unaligned memory (unintialized
47 * memory read).
48 *
49 * Mar-13-2003 -- P4 MMX is slightly slower than P4 NO_MMX. I created a C
50 * version of the non-MMX code so that it doesn't depend on zstrm and zstate
51 * structure offsets which are hard coded in this file. This was last tested
52 * with zlib-1.2.0 which is currently in beta testing, newer versions of this
53 * and inffas86.c can be found at http://www.eetbeetee.com/zlib/ and
54 * http://www.charm.net/~christop/zlib/
55 */
56
57
58/*
59 * if you have underscore linking problems (_inflate_fast undefined), try
60 * using -DGAS_COFF
61 */
62#if ! defined( GAS_COFF ) && ! defined( GAS_ELF )
63
64#if defined( WIN32 ) || defined( __CYGWIN__ )
65#define GAS_COFF /* windows object format */
66#else
67#define GAS_ELF
68#endif
69
70#endif /* ! GAS_COFF && ! GAS_ELF */
71
72
73#if defined( GAS_COFF )
74
75/* coff externals have underscores */
76#define inflate_fast _inflate_fast
77#define inflate_fast_use_mmx _inflate_fast_use_mmx
78
79#endif /* GAS_COFF */
80
81
82.file "inffast.S"
83
84.globl inflate_fast
85
86.text
87.align 4,0
88.L_invalid_literal_length_code_msg:
89.string "invalid literal/length code"
90
91.align 4,0
92.L_invalid_distance_code_msg:
93.string "invalid distance code"
94
95.align 4,0
96.L_invalid_distance_too_far_msg:
97.string "invalid distance too far back"
98
99#if ! defined( NO_MMX )
100.align 4,0
101.L_mask: /* mask[N] = ( 1 << N ) - 1 */
102.long 0
103.long 1
104.long 3
105.long 7
106.long 15
107.long 31
108.long 63
109.long 127
110.long 255
111.long 511
112.long 1023
113.long 2047
114.long 4095
115.long 8191
116.long 16383
117.long 32767
118.long 65535
119.long 131071
120.long 262143
121.long 524287
122.long 1048575
123.long 2097151
124.long 4194303
125.long 8388607
126.long 16777215
127.long 33554431
128.long 67108863
129.long 134217727
130.long 268435455
131.long 536870911
132.long 1073741823
133.long 2147483647
134.long 4294967295
135#endif /* NO_MMX */
136
137.text
138
139/*
140 * struct z_stream offsets, in zlib.h
141 */
142#define next_in_strm 0 /* strm->next_in */
143#define avail_in_strm 4 /* strm->avail_in */
144#define next_out_strm 12 /* strm->next_out */
145#define avail_out_strm 16 /* strm->avail_out */
146#define msg_strm 24 /* strm->msg */
147#define state_strm 28 /* strm->state */
148
149/*
150 * struct inflate_state offsets, in inflate.h
151 */
152#define mode_state 0 /* state->mode */
153#define wsize_state 32 /* state->wsize */
154#define write_state 40 /* state->write */
155#define window_state 44 /* state->window */
156#define hold_state 48 /* state->hold */
157#define bits_state 52 /* state->bits */
158#define lencode_state 68 /* state->lencode */
159#define distcode_state 72 /* state->distcode */
160#define lenbits_state 76 /* state->lenbits */
161#define distbits_state 80 /* state->distbits */
162
163/*
164 * inflate_fast's activation record
165 */
166#define local_var_size 64 /* how much local space for vars */
167#define strm_sp 88 /* first arg: z_stream * (local_var_size + 24) */
168#define start_sp 92 /* second arg: unsigned int (local_var_size + 28) */
169
170/*
171 * offsets for local vars on stack
172 */
173#define out 60 /* unsigned char* */
174#define window 56 /* unsigned char* */
175#define wsize 52 /* unsigned int */
176#define write 48 /* unsigned int */
177#define in 44 /* unsigned char* */
178#define beg 40 /* unsigned char* */
179#define buf 28 /* char[ 12 ] */
180#define len 24 /* unsigned int */
181#define last 20 /* unsigned char* */
182#define end 16 /* unsigned char* */
183#define dcode 12 /* code* */
184#define lcode 8 /* code* */
185#define dmask 4 /* unsigned int */
186#define lmask 0 /* unsigned int */
187
188/*
189 * typedef enum inflate_mode consts, in inflate.h
190 */
191#ifndef NO_GUNZIP
192#define GUNZIP
193#endif
194
195#ifdef GUNZIP
196#define INFLATE_MODE_TYPE 11 /* state->mode flags enum-ed in inflate.h */
197#define INFLATE_MODE_BAD 26
198#else
199#define INFLATE_MODE_TYPE 3
200#define INFLATE_MODE_BAD 17
201#endif
202
203
204#if ! defined( USE_MMX ) && ! defined( NO_MMX )
205
206#define RUN_TIME_MMX
207
208#define CHECK_MMX 1
209#define DO_USE_MMX 2
210#define DONT_USE_MMX 3
211
212.globl inflate_fast_use_mmx
213
214.data
215
216.align 4,0
217inflate_fast_use_mmx: /* integer flag for run time control 1=check,2=mmx,3=no */
218.long CHECK_MMX
219
220#if defined( GAS_ELF )
221/* elf info */
222.type inflate_fast_use_mmx,@object
223.size inflate_fast_use_mmx,4
224#endif
225
226#endif /* RUN_TIME_MMX */
227
228#if defined( GAS_COFF )
229/* coff info: scl 2 = extern, type 32 = function */
230.def inflate_fast; .scl 2; .type 32; .endef
231#endif
232
233.text
234
235.align 32,0x90
236inflate_fast:
237 pushl %edi
238 pushl %esi
239 pushl %ebp
240 pushl %ebx
241 pushf /* save eflags (strm_sp, state_sp assumes this is 32 bits) */
242 subl $local_var_size, %esp
243 cld
244
245#define strm_r %esi
246#define state_r %edi
247
248 movl strm_sp(%esp), strm_r
249 movl state_strm(strm_r), state_r
250
251 /* in = strm->next_in;
252 * out = strm->next_out;
253 * last = in + strm->avail_in - 11;
254 * beg = out - (start - strm->avail_out);
255 * end = out + (strm->avail_out - 257);
256 */
257 movl avail_in_strm(strm_r), %edx
258 movl next_in_strm(strm_r), %eax
259
260 addl %eax, %edx /* avail_in += next_in */
261 subl $11, %edx /* avail_in -= 11 */
262
263 movl %eax, in(%esp)
264 movl %edx, last(%esp)
265
266 movl start_sp(%esp), %ebp
267 movl avail_out_strm(strm_r), %ecx
268 movl next_out_strm(strm_r), %ebx
269
270 subl %ecx, %ebp /* start -= avail_out */
271 negl %ebp /* start = -start */
272 addl %ebx, %ebp /* start += next_out */
273
274 subl $257, %ecx /* avail_out -= 257 */
275 addl %ebx, %ecx /* avail_out += out */
276
277 movl %ebx, out(%esp)
278 movl %ebp, beg(%esp)
279 movl %ecx, end(%esp)
280
281 /* wsize = state->wsize;
282 * write = state->write;
283 * window = state->window;
284 * hold = state->hold;
285 * bits = state->bits;
286 * lcode = state->lencode;
287 * dcode = state->distcode;
288 * lmask = ( 1 << state->lenbits ) - 1;
289 * dmask = ( 1 << state->distbits ) - 1;
290 */
291
292 movl lencode_state(state_r), %eax
293 movl distcode_state(state_r), %ecx
294
295 movl %eax, lcode(%esp)
296 movl %ecx, dcode(%esp)
297
298 movl $1, %eax
299 movl lenbits_state(state_r), %ecx
300 shll %cl, %eax
301 decl %eax
302 movl %eax, lmask(%esp)
303
304 movl $1, %eax
305 movl distbits_state(state_r), %ecx
306 shll %cl, %eax
307 decl %eax
308 movl %eax, dmask(%esp)
309
310 movl wsize_state(state_r), %eax
311 movl write_state(state_r), %ecx
312 movl window_state(state_r), %edx
313
314 movl %eax, wsize(%esp)
315 movl %ecx, write(%esp)
316 movl %edx, window(%esp)
317
318 movl hold_state(state_r), %ebp
319 movl bits_state(state_r), %ebx
320
321#undef strm_r
322#undef state_r
323
324#define in_r %esi
325#define from_r %esi
326#define out_r %edi
327
328 movl in(%esp), in_r
329 movl last(%esp), %ecx
330 cmpl in_r, %ecx
331 ja .L_align_long /* if in < last */
332
333 addl $11, %ecx /* ecx = &in[ avail_in ] */
334 subl in_r, %ecx /* ecx = avail_in */
335 movl $12, %eax
336 subl %ecx, %eax /* eax = 12 - avail_in */
337 leal buf(%esp), %edi
338 rep movsb /* memcpy( buf, in, avail_in ) */
339 movl %eax, %ecx
340 xorl %eax, %eax
341 rep stosb /* memset( &buf[ avail_in ], 0, 12 - avail_in ) */
342 leal buf(%esp), in_r /* in = buf */
343 movl in_r, last(%esp) /* last = in, do just one iteration */
344 jmp .L_is_aligned
345
346 /* align in_r on long boundary */
347.L_align_long:
348 testl $3, in_r
349 jz .L_is_aligned
350 xorl %eax, %eax
351 movb (in_r), %al
352 incl in_r
353 movl %ebx, %ecx
354 addl $8, %ebx
355 shll %cl, %eax
356 orl %eax, %ebp
357 jmp .L_align_long
358
359.L_is_aligned:
360 movl out(%esp), out_r
361
362#if defined( NO_MMX )
363 jmp .L_do_loop
364#endif
365
366#if defined( USE_MMX )
367 jmp .L_init_mmx
368#endif
369
370/*** Runtime MMX check ***/
371
372#if defined( RUN_TIME_MMX )
373.L_check_mmx:
374 cmpl $DO_USE_MMX, inflate_fast_use_mmx
375 je .L_init_mmx
376 ja .L_do_loop /* > 2 */
377
378 pushl %eax
379 pushl %ebx
380 pushl %ecx
381 pushl %edx
382 pushf
383 movl (%esp), %eax /* copy eflags to eax */
384 xorl $0x200000, (%esp) /* try toggling ID bit of eflags (bit 21)
385 * to see if cpu supports cpuid...
386 * ID bit method not supported by NexGen but
387 * bios may load a cpuid instruction and
388 * cpuid may be disabled on Cyrix 5-6x86 */
389 popf
390 pushf
391 popl %edx /* copy new eflags to edx */
392 xorl %eax, %edx /* test if ID bit is flipped */
393 jz .L_dont_use_mmx /* not flipped if zero */
394 xorl %eax, %eax
395 cpuid
396 cmpl $0x756e6547, %ebx /* check for GenuineIntel in ebx,ecx,edx */
397 jne .L_dont_use_mmx
398 cmpl $0x6c65746e, %ecx
399 jne .L_dont_use_mmx
400 cmpl $0x49656e69, %edx
401 jne .L_dont_use_mmx
402 movl $1, %eax
403 cpuid /* get cpu features */
404 shrl $8, %eax
405 andl $15, %eax
406 cmpl $6, %eax /* check for Pentium family, is 0xf for P4 */
407 jne .L_dont_use_mmx
408 testl $0x800000, %edx /* test if MMX feature is set (bit 23) */
409 jnz .L_use_mmx
410 jmp .L_dont_use_mmx
411.L_use_mmx:
412 movl $DO_USE_MMX, inflate_fast_use_mmx
413 jmp .L_check_mmx_pop
414.L_dont_use_mmx:
415 movl $DONT_USE_MMX, inflate_fast_use_mmx
416.L_check_mmx_pop:
417 popl %edx
418 popl %ecx
419 popl %ebx
420 popl %eax
421 jmp .L_check_mmx
422#endif
423
424
425/*** Non-MMX code ***/
426
427#if defined ( NO_MMX ) || defined( RUN_TIME_MMX )
428
429#define hold_r %ebp
430#define bits_r %bl
431#define bitslong_r %ebx
432
433.align 32,0x90
434.L_while_test:
435 /* while (in < last && out < end)
436 */
437 cmpl out_r, end(%esp)
438 jbe .L_break_loop /* if (out >= end) */
439
440 cmpl in_r, last(%esp)
441 jbe .L_break_loop
442
443.L_do_loop:
444 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
445 *
446 * do {
447 * if (bits < 15) {
448 * hold |= *((unsigned short *)in)++ << bits;
449 * bits += 16
450 * }
451 * this = lcode[hold & lmask]
452 */
453 cmpb $15, bits_r
454 ja .L_get_length_code /* if (15 < bits) */
455
456 xorl %eax, %eax
457 lodsw /* al = *(ushort *)in++ */
458 movb bits_r, %cl /* cl = bits, needs it for shifting */
459 addb $16, bits_r /* bits += 16 */
460 shll %cl, %eax
461 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
462
463.L_get_length_code:
464 movl lmask(%esp), %edx /* edx = lmask */
465 movl lcode(%esp), %ecx /* ecx = lcode */
466 andl hold_r, %edx /* edx &= hold */
467 movl (%ecx,%edx,4), %eax /* eax = lcode[hold & lmask] */
468
469.L_dolen:
470 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
471 *
472 * dolen:
473 * bits -= this.bits;
474 * hold >>= this.bits
475 */
476 movb %ah, %cl /* cl = this.bits */
477 subb %ah, bits_r /* bits -= this.bits */
478 shrl %cl, hold_r /* hold >>= this.bits */
479
480 /* check if op is a literal
481 * if (op == 0) {
482 * PUP(out) = this.val;
483 * }
484 */
485 testb %al, %al
486 jnz .L_test_for_length_base /* if (op != 0) 45.7% */
487
488 shrl $16, %eax /* output this.val char */
489 stosb
490 jmp .L_while_test
491
492.L_test_for_length_base:
493 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = len
494 *
495 * else if (op & 16) {
496 * len = this.val
497 * op &= 15
498 * if (op) {
499 * if (op > bits) {
500 * hold |= *((unsigned short *)in)++ << bits;
501 * bits += 16
502 * }
503 * len += hold & mask[op];
504 * bits -= op;
505 * hold >>= op;
506 * }
507 */
508#define len_r %edx
509 movl %eax, len_r /* len = this */
510 shrl $16, len_r /* len = this.val */
511 movb %al, %cl
512
513 testb $16, %al
514 jz .L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
515 andb $15, %cl /* op &= 15 */
516 jz .L_save_len /* if (!op) */
517 cmpb %cl, bits_r
518 jae .L_add_bits_to_len /* if (op <= bits) */
519
520 movb %cl, %ch /* stash op in ch, freeing cl */
521 xorl %eax, %eax
522 lodsw /* al = *(ushort *)in++ */
523 movb bits_r, %cl /* cl = bits, needs it for shifting */
524 addb $16, bits_r /* bits += 16 */
525 shll %cl, %eax
526 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
527 movb %ch, %cl /* move op back to ecx */
528
529.L_add_bits_to_len:
530 movl $1, %eax
531 shll %cl, %eax
532 decl %eax
533 subb %cl, bits_r
534 andl hold_r, %eax /* eax &= hold */
535 shrl %cl, hold_r
536 addl %eax, len_r /* len += hold & mask[op] */
537
538.L_save_len:
539 movl len_r, len(%esp) /* save len */
540#undef len_r
541
542.L_decode_distance:
543 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = dist
544 *
545 * if (bits < 15) {
546 * hold |= *((unsigned short *)in)++ << bits;
547 * bits += 16
548 * }
549 * this = dcode[hold & dmask];
550 * dodist:
551 * bits -= this.bits;
552 * hold >>= this.bits;
553 * op = this.op;
554 */
555
556 cmpb $15, bits_r
557 ja .L_get_distance_code /* if (15 < bits) */
558
559 xorl %eax, %eax
560 lodsw /* al = *(ushort *)in++ */
561 movb bits_r, %cl /* cl = bits, needs it for shifting */
562 addb $16, bits_r /* bits += 16 */
563 shll %cl, %eax
564 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
565
566.L_get_distance_code:
567 movl dmask(%esp), %edx /* edx = dmask */
568 movl dcode(%esp), %ecx /* ecx = dcode */
569 andl hold_r, %edx /* edx &= hold */
570 movl (%ecx,%edx,4), %eax /* eax = dcode[hold & dmask] */
571
572#define dist_r %edx
573.L_dodist:
574 movl %eax, dist_r /* dist = this */
575 shrl $16, dist_r /* dist = this.val */
576 movb %ah, %cl
577 subb %ah, bits_r /* bits -= this.bits */
578 shrl %cl, hold_r /* hold >>= this.bits */
579
580 /* if (op & 16) {
581 * dist = this.val
582 * op &= 15
583 * if (op > bits) {
584 * hold |= *((unsigned short *)in)++ << bits;
585 * bits += 16
586 * }
587 * dist += hold & mask[op];
588 * bits -= op;
589 * hold >>= op;
590 */
591 movb %al, %cl /* cl = this.op */
592
593 testb $16, %al /* if ((op & 16) == 0) */
594 jz .L_test_for_second_level_dist
595 andb $15, %cl /* op &= 15 */
596 jz .L_check_dist_one
597 cmpb %cl, bits_r
598 jae .L_add_bits_to_dist /* if (op <= bits) 97.6% */
599
600 movb %cl, %ch /* stash op in ch, freeing cl */
601 xorl %eax, %eax
602 lodsw /* al = *(ushort *)in++ */
603 movb bits_r, %cl /* cl = bits, needs it for shifting */
604 addb $16, bits_r /* bits += 16 */
605 shll %cl, %eax
606 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
607 movb %ch, %cl /* move op back to ecx */
608
609.L_add_bits_to_dist:
610 movl $1, %eax
611 shll %cl, %eax
612 decl %eax /* (1 << op) - 1 */
613 subb %cl, bits_r
614 andl hold_r, %eax /* eax &= hold */
615 shrl %cl, hold_r
616 addl %eax, dist_r /* dist += hold & ((1 << op) - 1) */
617 jmp .L_check_window
618
619.L_check_window:
620 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
621 * %ecx = nbytes
622 *
623 * nbytes = out - beg;
624 * if (dist <= nbytes) {
625 * from = out - dist;
626 * do {
627 * PUP(out) = PUP(from);
628 * } while (--len > 0) {
629 * }
630 */
631
632 movl in_r, in(%esp) /* save in so from can use it's reg */
633 movl out_r, %eax
634 subl beg(%esp), %eax /* nbytes = out - beg */
635
636 cmpl dist_r, %eax
637 jb .L_clip_window /* if (dist > nbytes) 4.2% */
638
639 movl len(%esp), %ecx
640 movl out_r, from_r
641 subl dist_r, from_r /* from = out - dist */
642
643 subl $3, %ecx
644 movb (from_r), %al
645 movb %al, (out_r)
646 movb 1(from_r), %al
647 movb 2(from_r), %dl
648 addl $3, from_r
649 movb %al, 1(out_r)
650 movb %dl, 2(out_r)
651 addl $3, out_r
652 rep movsb
653
654 movl in(%esp), in_r /* move in back to %esi, toss from */
655 jmp .L_while_test
656
657.align 16,0x90
658.L_check_dist_one:
659 cmpl $1, dist_r
660 jne .L_check_window
661 cmpl out_r, beg(%esp)
662 je .L_check_window
663
664 decl out_r
665 movl len(%esp), %ecx
666 movb (out_r), %al
667 subl $3, %ecx
668
669 movb %al, 1(out_r)
670 movb %al, 2(out_r)
671 movb %al, 3(out_r)
672 addl $4, out_r
673 rep stosb
674
675 jmp .L_while_test
676
677.align 16,0x90
678.L_test_for_second_level_length:
679 /* else if ((op & 64) == 0) {
680 * this = lcode[this.val + (hold & mask[op])];
681 * }
682 */
683 testb $64, %al
684 jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */
685
686 movl $1, %eax
687 shll %cl, %eax
688 decl %eax
689 andl hold_r, %eax /* eax &= hold */
690 addl %edx, %eax /* eax += this.val */
691 movl lcode(%esp), %edx /* edx = lcode */
692 movl (%edx,%eax,4), %eax /* eax = lcode[val + (hold&mask[op])] */
693 jmp .L_dolen
694
695.align 16,0x90
696.L_test_for_second_level_dist:
697 /* else if ((op & 64) == 0) {
698 * this = dcode[this.val + (hold & mask[op])];
699 * }
700 */
701 testb $64, %al
702 jnz .L_invalid_distance_code /* if ((op & 64) != 0) */
703
704 movl $1, %eax
705 shll %cl, %eax
706 decl %eax
707 andl hold_r, %eax /* eax &= hold */
708 addl %edx, %eax /* eax += this.val */
709 movl dcode(%esp), %edx /* edx = dcode */
710 movl (%edx,%eax,4), %eax /* eax = dcode[val + (hold&mask[op])] */
711 jmp .L_dodist
712
713.align 16,0x90
714.L_clip_window:
715 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
716 * %ecx = nbytes
717 *
718 * else {
719 * if (dist > wsize) {
720 * invalid distance
721 * }
722 * from = window;
723 * nbytes = dist - nbytes;
724 * if (write == 0) {
725 * from += wsize - nbytes;
726 */
727#define nbytes_r %ecx
728 movl %eax, nbytes_r
729 movl wsize(%esp), %eax /* prepare for dist compare */
730 negl nbytes_r /* nbytes = -nbytes */
731 movl window(%esp), from_r /* from = window */
732
733 cmpl dist_r, %eax
734 jb .L_invalid_distance_too_far /* if (dist > wsize) */
735
736 addl dist_r, nbytes_r /* nbytes = dist - nbytes */
737 cmpl $0, write(%esp)
738 jne .L_wrap_around_window /* if (write != 0) */
739
740 subl nbytes_r, %eax
741 addl %eax, from_r /* from += wsize - nbytes */
742
743 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
744 * %ecx = nbytes, %eax = len
745 *
746 * if (nbytes < len) {
747 * len -= nbytes;
748 * do {
749 * PUP(out) = PUP(from);
750 * } while (--nbytes);
751 * from = out - dist;
752 * }
753 * }
754 */
755#define len_r %eax
756 movl len(%esp), len_r
757 cmpl nbytes_r, len_r
758 jbe .L_do_copy1 /* if (nbytes >= len) */
759
760 subl nbytes_r, len_r /* len -= nbytes */
761 rep movsb
762 movl out_r, from_r
763 subl dist_r, from_r /* from = out - dist */
764 jmp .L_do_copy1
765
766 cmpl nbytes_r, len_r
767 jbe .L_do_copy1 /* if (nbytes >= len) */
768
769 subl nbytes_r, len_r /* len -= nbytes */
770 rep movsb
771 movl out_r, from_r
772 subl dist_r, from_r /* from = out - dist */
773 jmp .L_do_copy1
774
775.L_wrap_around_window:
776 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
777 * %ecx = nbytes, %eax = write, %eax = len
778 *
779 * else if (write < nbytes) {
780 * from += wsize + write - nbytes;
781 * nbytes -= write;
782 * if (nbytes < len) {
783 * len -= nbytes;
784 * do {
785 * PUP(out) = PUP(from);
786 * } while (--nbytes);
787 * from = window;
788 * nbytes = write;
789 * if (nbytes < len) {
790 * len -= nbytes;
791 * do {
792 * PUP(out) = PUP(from);
793 * } while(--nbytes);
794 * from = out - dist;
795 * }
796 * }
797 * }
798 */
799#define write_r %eax
800 movl write(%esp), write_r
801 cmpl write_r, nbytes_r
802 jbe .L_contiguous_in_window /* if (write >= nbytes) */
803
804 addl wsize(%esp), from_r
805 addl write_r, from_r
806 subl nbytes_r, from_r /* from += wsize + write - nbytes */
807 subl write_r, nbytes_r /* nbytes -= write */
808#undef write_r
809
810 movl len(%esp), len_r
811 cmpl nbytes_r, len_r
812 jbe .L_do_copy1 /* if (nbytes >= len) */
813
814 subl nbytes_r, len_r /* len -= nbytes */
815 rep movsb
816 movl window(%esp), from_r /* from = window */
817 movl write(%esp), nbytes_r /* nbytes = write */
818 cmpl nbytes_r, len_r
819 jbe .L_do_copy1 /* if (nbytes >= len) */
820
821 subl nbytes_r, len_r /* len -= nbytes */
822 rep movsb
823 movl out_r, from_r
824 subl dist_r, from_r /* from = out - dist */
825 jmp .L_do_copy1
826
827.L_contiguous_in_window:
828 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
829 * %ecx = nbytes, %eax = write, %eax = len
830 *
831 * else {
832 * from += write - nbytes;
833 * if (nbytes < len) {
834 * len -= nbytes;
835 * do {
836 * PUP(out) = PUP(from);
837 * } while (--nbytes);
838 * from = out - dist;
839 * }
840 * }
841 */
842#define write_r %eax
843 addl write_r, from_r
844 subl nbytes_r, from_r /* from += write - nbytes */
845#undef write_r
846
847 movl len(%esp), len_r
848 cmpl nbytes_r, len_r
849 jbe .L_do_copy1 /* if (nbytes >= len) */
850
851 subl nbytes_r, len_r /* len -= nbytes */
852 rep movsb
853 movl out_r, from_r
854 subl dist_r, from_r /* from = out - dist */
855
856.L_do_copy1:
857 /* regs: %esi = from, %esi = in, %ebp = hold, %bl = bits, %edi = out
858 * %eax = len
859 *
860 * while (len > 0) {
861 * PUP(out) = PUP(from);
862 * len--;
863 * }
864 * }
865 * } while (in < last && out < end);
866 */
867#undef nbytes_r
868#define in_r %esi
869 movl len_r, %ecx
870 rep movsb
871
872 movl in(%esp), in_r /* move in back to %esi, toss from */
873 jmp .L_while_test
874
875#undef len_r
876#undef dist_r
877
878#endif /* NO_MMX || RUN_TIME_MMX */
879
880
881/*** MMX code ***/
882
883#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
884
885.align 32,0x90
886.L_init_mmx:
887 emms
888
889#undef bits_r
890#undef bitslong_r
891#define bitslong_r %ebp
892#define hold_mm %mm0
893 movd %ebp, hold_mm
894 movl %ebx, bitslong_r
895
896#define used_mm %mm1
897#define dmask2_mm %mm2
898#define lmask2_mm %mm3
899#define lmask_mm %mm4
900#define dmask_mm %mm5
901#define tmp_mm %mm6
902
903 movd lmask(%esp), lmask_mm
904 movq lmask_mm, lmask2_mm
905 movd dmask(%esp), dmask_mm
906 movq dmask_mm, dmask2_mm
907 pxor used_mm, used_mm
908 movl lcode(%esp), %ebx /* ebx = lcode */
909 jmp .L_do_loop_mmx
910
911.align 32,0x90
912.L_while_test_mmx:
913 /* while (in < last && out < end)
914 */
915 cmpl out_r, end(%esp)
916 jbe .L_break_loop /* if (out >= end) */
917
918 cmpl in_r, last(%esp)
919 jbe .L_break_loop
920
921.L_do_loop_mmx:
922 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
923
924 cmpl $32, bitslong_r
925 ja .L_get_length_code_mmx /* if (32 < bits) */
926
927 movd bitslong_r, tmp_mm
928 movd (in_r), %mm7
929 addl $4, in_r
930 psllq tmp_mm, %mm7
931 addl $32, bitslong_r
932 por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */
933
934.L_get_length_code_mmx:
935 pand hold_mm, lmask_mm
936 movd lmask_mm, %eax
937 movq lmask2_mm, lmask_mm
938 movl (%ebx,%eax,4), %eax /* eax = lcode[hold & lmask] */
939
940.L_dolen_mmx:
941 movzbl %ah, %ecx /* ecx = this.bits */
942 movd %ecx, used_mm
943 subl %ecx, bitslong_r /* bits -= this.bits */
944
945 testb %al, %al
946 jnz .L_test_for_length_base_mmx /* if (op != 0) 45.7% */
947
948 shrl $16, %eax /* output this.val char */
949 stosb
950 jmp .L_while_test_mmx
951
952.L_test_for_length_base_mmx:
953#define len_r %edx
954 movl %eax, len_r /* len = this */
955 shrl $16, len_r /* len = this.val */
956
957 testb $16, %al
958 jz .L_test_for_second_level_length_mmx /* if ((op & 16) == 0) 8% */
959 andl $15, %eax /* op &= 15 */
960 jz .L_decode_distance_mmx /* if (!op) */
961
962 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
963 movd %eax, used_mm
964 movd hold_mm, %ecx
965 subl %eax, bitslong_r
966 andl .L_mask(,%eax,4), %ecx
967 addl %ecx, len_r /* len += hold & mask[op] */
968
969.L_decode_distance_mmx:
970 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
971
972 cmpl $32, bitslong_r
973 ja .L_get_dist_code_mmx /* if (32 < bits) */
974
975 movd bitslong_r, tmp_mm
976 movd (in_r), %mm7
977 addl $4, in_r
978 psllq tmp_mm, %mm7
979 addl $32, bitslong_r
980 por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */
981
982.L_get_dist_code_mmx:
983 movl dcode(%esp), %ebx /* ebx = dcode */
984 pand hold_mm, dmask_mm
985 movd dmask_mm, %eax
986 movq dmask2_mm, dmask_mm
987 movl (%ebx,%eax,4), %eax /* eax = dcode[hold & lmask] */
988
989.L_dodist_mmx:
990#define dist_r %ebx
991 movzbl %ah, %ecx /* ecx = this.bits */
992 movl %eax, dist_r
993 shrl $16, dist_r /* dist = this.val */
994 subl %ecx, bitslong_r /* bits -= this.bits */
995 movd %ecx, used_mm
996
997 testb $16, %al /* if ((op & 16) == 0) */
998 jz .L_test_for_second_level_dist_mmx
999 andl $15, %eax /* op &= 15 */
1000 jz .L_check_dist_one_mmx
1001
1002.L_add_bits_to_dist_mmx:
1003 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
1004 movd %eax, used_mm /* save bit length of current op */
1005 movd hold_mm, %ecx /* get the next bits on input stream */
1006 subl %eax, bitslong_r /* bits -= op bits */
1007 andl .L_mask(,%eax,4), %ecx /* ecx = hold & mask[op] */
1008 addl %ecx, dist_r /* dist += hold & mask[op] */
1009
1010.L_check_window_mmx:
1011 movl in_r, in(%esp) /* save in so from can use it's reg */
1012 movl out_r, %eax
1013 subl beg(%esp), %eax /* nbytes = out - beg */
1014
1015 cmpl dist_r, %eax
1016 jb .L_clip_window_mmx /* if (dist > nbytes) 4.2% */
1017
1018 movl len_r, %ecx
1019 movl out_r, from_r
1020 subl dist_r, from_r /* from = out - dist */
1021
1022 subl $3, %ecx
1023 movb (from_r), %al
1024 movb %al, (out_r)
1025 movb 1(from_r), %al
1026 movb 2(from_r), %dl
1027 addl $3, from_r
1028 movb %al, 1(out_r)
1029 movb %dl, 2(out_r)
1030 addl $3, out_r
1031 rep movsb
1032
1033 movl in(%esp), in_r /* move in back to %esi, toss from */
1034 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
1035 jmp .L_while_test_mmx
1036
1037.align 16,0x90
1038.L_check_dist_one_mmx:
1039 cmpl $1, dist_r
1040 jne .L_check_window_mmx
1041 cmpl out_r, beg(%esp)
1042 je .L_check_window_mmx
1043
1044 decl out_r
1045 movl len_r, %ecx
1046 movb (out_r), %al
1047 subl $3, %ecx
1048
1049 movb %al, 1(out_r)
1050 movb %al, 2(out_r)
1051 movb %al, 3(out_r)
1052 addl $4, out_r
1053 rep stosb
1054
1055 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
1056 jmp .L_while_test_mmx
1057
1058.align 16,0x90
1059.L_test_for_second_level_length_mmx:
1060 testb $64, %al
1061 jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */
1062
1063 andl $15, %eax
1064 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
1065 movd hold_mm, %ecx
1066 andl .L_mask(,%eax,4), %ecx
1067 addl len_r, %ecx
1068 movl (%ebx,%ecx,4), %eax /* eax = lcode[hold & lmask] */
1069 jmp .L_dolen_mmx
1070
1071.align 16,0x90
1072.L_test_for_second_level_dist_mmx:
1073 testb $64, %al
1074 jnz .L_invalid_distance_code /* if ((op & 64) != 0) */
1075
1076 andl $15, %eax
1077 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
1078 movd hold_mm, %ecx
1079 andl .L_mask(,%eax,4), %ecx
1080 movl dcode(%esp), %eax /* ecx = dcode */
1081 addl dist_r, %ecx
1082 movl (%eax,%ecx,4), %eax /* eax = lcode[hold & lmask] */
1083 jmp .L_dodist_mmx
1084
1085.align 16,0x90
1086.L_clip_window_mmx:
1087#define nbytes_r %ecx
1088 movl %eax, nbytes_r
1089 movl wsize(%esp), %eax /* prepare for dist compare */
1090 negl nbytes_r /* nbytes = -nbytes */
1091 movl window(%esp), from_r /* from = window */
1092
1093 cmpl dist_r, %eax
1094 jb .L_invalid_distance_too_far /* if (dist > wsize) */
1095
1096 addl dist_r, nbytes_r /* nbytes = dist - nbytes */
1097 cmpl $0, write(%esp)
1098 jne .L_wrap_around_window_mmx /* if (write != 0) */
1099
1100 subl nbytes_r, %eax
1101 addl %eax, from_r /* from += wsize - nbytes */
1102
1103 cmpl nbytes_r, len_r
1104 jbe .L_do_copy1_mmx /* if (nbytes >= len) */
1105
1106 subl nbytes_r, len_r /* len -= nbytes */
1107 rep movsb
1108 movl out_r, from_r
1109 subl dist_r, from_r /* from = out - dist */
1110 jmp .L_do_copy1_mmx
1111
1112 cmpl nbytes_r, len_r
1113 jbe .L_do_copy1_mmx /* if (nbytes >= len) */
1114
1115 subl nbytes_r, len_r /* len -= nbytes */
1116 rep movsb
1117 movl out_r, from_r
1118 subl dist_r, from_r /* from = out - dist */
1119 jmp .L_do_copy1_mmx
1120
1121.L_wrap_around_window_mmx:
1122#define write_r %eax
1123 movl write(%esp), write_r
1124 cmpl write_r, nbytes_r
1125 jbe .L_contiguous_in_window_mmx /* if (write >= nbytes) */
1126
1127 addl wsize(%esp), from_r
1128 addl write_r, from_r
1129 subl nbytes_r, from_r /* from += wsize + write - nbytes */
1130 subl write_r, nbytes_r /* nbytes -= write */
1131#undef write_r
1132
1133 cmpl nbytes_r, len_r
1134 jbe .L_do_copy1_mmx /* if (nbytes >= len) */
1135
1136 subl nbytes_r, len_r /* len -= nbytes */
1137 rep movsb
1138 movl window(%esp), from_r /* from = window */
1139 movl write(%esp), nbytes_r /* nbytes = write */
1140 cmpl nbytes_r, len_r
1141 jbe .L_do_copy1_mmx /* if (nbytes >= len) */
1142
1143 subl nbytes_r, len_r /* len -= nbytes */
1144 rep movsb
1145 movl out_r, from_r
1146 subl dist_r, from_r /* from = out - dist */
1147 jmp .L_do_copy1_mmx
1148
1149.L_contiguous_in_window_mmx:
1150#define write_r %eax
1151 addl write_r, from_r
1152 subl nbytes_r, from_r /* from += write - nbytes */
1153#undef write_r
1154
1155 cmpl nbytes_r, len_r
1156 jbe .L_do_copy1_mmx /* if (nbytes >= len) */
1157
1158 subl nbytes_r, len_r /* len -= nbytes */
1159 rep movsb
1160 movl out_r, from_r
1161 subl dist_r, from_r /* from = out - dist */
1162
1163.L_do_copy1_mmx:
1164#undef nbytes_r
1165#define in_r %esi
1166 movl len_r, %ecx
1167 rep movsb
1168
1169 movl in(%esp), in_r /* move in back to %esi, toss from */
1170 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
1171 jmp .L_while_test_mmx
1172
1173#undef hold_r
1174#undef bitslong_r
1175
1176#endif /* USE_MMX || RUN_TIME_MMX */
1177
1178
1179/*** USE_MMX, NO_MMX, and RUNTIME_MMX from here on ***/
1180
1181.L_invalid_distance_code:
1182 /* else {
1183 * strm->msg = "invalid distance code";
1184 * state->mode = BAD;
1185 * }
1186 */
1187 movl $.L_invalid_distance_code_msg, %ecx
1188 movl $INFLATE_MODE_BAD, %edx
1189 jmp .L_update_stream_state
1190
1191.L_test_for_end_of_block:
1192 /* else if (op & 32) {
1193 * state->mode = TYPE;
1194 * break;
1195 * }
1196 */
1197 testb $32, %al
1198 jz .L_invalid_literal_length_code /* if ((op & 32) == 0) */
1199
1200 movl $0, %ecx
1201 movl $INFLATE_MODE_TYPE, %edx
1202 jmp .L_update_stream_state
1203
1204.L_invalid_literal_length_code:
1205 /* else {
1206 * strm->msg = "invalid literal/length code";
1207 * state->mode = BAD;
1208 * }
1209 */
1210 movl $.L_invalid_literal_length_code_msg, %ecx
1211 movl $INFLATE_MODE_BAD, %edx
1212 jmp .L_update_stream_state
1213
1214.L_invalid_distance_too_far:
1215 /* strm->msg = "invalid distance too far back";
1216 * state->mode = BAD;
1217 */
1218 movl in(%esp), in_r /* from_r has in's reg, put in back */
1219 movl $.L_invalid_distance_too_far_msg, %ecx
1220 movl $INFLATE_MODE_BAD, %edx
1221 jmp .L_update_stream_state
1222
1223.L_update_stream_state:
1224 /* set strm->msg = %ecx, strm->state->mode = %edx */
1225 movl strm_sp(%esp), %eax
1226 testl %ecx, %ecx /* if (msg != NULL) */
1227 jz .L_skip_msg
1228 movl %ecx, msg_strm(%eax) /* strm->msg = msg */
1229.L_skip_msg:
1230 movl state_strm(%eax), %eax /* state = strm->state */
1231 movl %edx, mode_state(%eax) /* state->mode = edx (BAD | TYPE) */
1232 jmp .L_break_loop
1233
1234.align 32,0x90
1235.L_break_loop:
1236
1237/*
1238 * Regs:
1239 *
1240 * bits = %ebp when mmx, and in %ebx when non-mmx
1241 * hold = %hold_mm when mmx, and in %ebp when non-mmx
1242 * in = %esi
1243 * out = %edi
1244 */
1245
1246#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
1247
1248#if defined( RUN_TIME_MMX )
1249
1250 cmpl $DO_USE_MMX, inflate_fast_use_mmx
1251 jne .L_update_next_in
1252
1253#endif /* RUN_TIME_MMX */
1254
1255 movl %ebp, %ebx
1256
1257.L_update_next_in:
1258
1259#endif
1260
1261#define strm_r %eax
1262#define state_r %edx
1263
1264 /* len = bits >> 3;
1265 * in -= len;
1266 * bits -= len << 3;
1267 * hold &= (1U << bits) - 1;
1268 * state->hold = hold;
1269 * state->bits = bits;
1270 * strm->next_in = in;
1271 * strm->next_out = out;
1272 */
1273 movl strm_sp(%esp), strm_r
1274 movl %ebx, %ecx
1275 movl state_strm(strm_r), state_r
1276 shrl $3, %ecx
1277 subl %ecx, in_r
1278 shll $3, %ecx
1279 subl %ecx, %ebx
1280 movl out_r, next_out_strm(strm_r)
1281 movl %ebx, bits_state(state_r)
1282 movl %ebx, %ecx
1283
1284 leal buf(%esp), %ebx
1285 cmpl %ebx, last(%esp)
1286 jne .L_buf_not_used /* if buf != last */
1287
1288 subl %ebx, in_r /* in -= buf */
1289 movl next_in_strm(strm_r), %ebx
1290 movl %ebx, last(%esp) /* last = strm->next_in */
1291 addl %ebx, in_r /* in += strm->next_in */
1292 movl avail_in_strm(strm_r), %ebx
1293 subl $11, %ebx
1294 addl %ebx, last(%esp) /* last = &strm->next_in[ avail_in - 11 ] */
1295
1296.L_buf_not_used:
1297 movl in_r, next_in_strm(strm_r)
1298
1299 movl $1, %ebx
1300 shll %cl, %ebx
1301 decl %ebx
1302
1303#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
1304
1305#if defined( RUN_TIME_MMX )
1306
1307 cmpl $DO_USE_MMX, inflate_fast_use_mmx
1308 jne .L_update_hold
1309
1310#endif /* RUN_TIME_MMX */
1311
1312 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
1313 movd hold_mm, %ebp
1314
1315 emms
1316
1317.L_update_hold:
1318
1319#endif /* USE_MMX || RUN_TIME_MMX */
1320
1321 andl %ebx, %ebp
1322 movl %ebp, hold_state(state_r)
1323
1324#define last_r %ebx
1325
1326 /* strm->avail_in = in < last ? 11 + (last - in) : 11 - (in - last) */
1327 movl last(%esp), last_r
1328 cmpl in_r, last_r
1329 jbe .L_last_is_smaller /* if (in >= last) */
1330
1331 subl in_r, last_r /* last -= in */
1332 addl $11, last_r /* last += 11 */
1333 movl last_r, avail_in_strm(strm_r)
1334 jmp .L_fixup_out
1335.L_last_is_smaller:
1336 subl last_r, in_r /* in -= last */
1337 negl in_r /* in = -in */
1338 addl $11, in_r /* in += 11 */
1339 movl in_r, avail_in_strm(strm_r)
1340
1341#undef last_r
1342#define end_r %ebx
1343
1344.L_fixup_out:
1345 /* strm->avail_out = out < end ? 257 + (end - out) : 257 - (out - end)*/
1346 movl end(%esp), end_r
1347 cmpl out_r, end_r
1348 jbe .L_end_is_smaller /* if (out >= end) */
1349
1350 subl out_r, end_r /* end -= out */
1351 addl $257, end_r /* end += 257 */
1352 movl end_r, avail_out_strm(strm_r)
1353 jmp .L_done
1354.L_end_is_smaller:
1355 subl end_r, out_r /* out -= end */
1356 negl out_r /* out = -out */
1357 addl $257, out_r /* out += 257 */
1358 movl out_r, avail_out_strm(strm_r)
1359
1360#undef end_r
1361#undef strm_r
1362#undef state_r
1363
1364.L_done:
1365 addl $local_var_size, %esp
1366 popf
1367 popl %ebx
1368 popl %ebp
1369 popl %esi
1370 popl %edi
1371 ret
1372
1373#if defined( GAS_ELF )
1374/* elf info */
1375.type inflate_fast,@function
1376.size inflate_fast,.-inflate_fast
1377#endif
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette