Mercurial > hg > CbC > CbC_gcc
comparison zlib/contrib/inflate86/inffast.S @ 51:ae3a4bfb450b
add some files of version 4.4.3 that have been forgotten.
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 07 Feb 2010 18:27:48 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
47:3bfb6c00c1e0 | 51:ae3a4bfb450b |
---|---|
1 /* | |
2 * inffast.S is a hand tuned assembler version of: | |
3 * | |
4 * inffast.c -- fast decoding | |
5 * Copyright (C) 1995-2003 Mark Adler | |
6 * For conditions of distribution and use, see copyright notice in zlib.h | |
7 * | |
8 * Copyright (C) 2003 Chris Anderson <christop@charm.net> | |
9 * Please use the copyright conditions above. | |
10 * | |
11 * This version (Jan-23-2003) of inflate_fast was coded and tested under | |
12 * GNU/Linux on a pentium 3, using the gcc-3.2 compiler distribution. On that | |
13 * machine, I found that gzip style archives decompressed about 20% faster than | |
14 * the gcc-3.2 -O3 -fomit-frame-pointer compiled version. Your results will | |
15 * depend on how large of a buffer is used for z_stream.next_in & next_out | |
16 * (8K-32K worked best for my 256K cpu cache) and how much overhead there is in | |
17 * stream processing I/O and crc32/addler32. In my case, this routine used | |
18 * 70% of the cpu time and crc32 used 20%. | |
19 * | |
20 * I am confident that this version will work in the general case, but I have | |
21 * not tested a wide variety of datasets or a wide variety of platforms. | |
22 * | |
23 * Jan-24-2003 -- Added -DUSE_MMX define for slightly faster inflating. | |
24 * It should be a runtime flag instead of compile time flag... | |
25 * | |
26 * Jan-26-2003 -- Added runtime check for MMX support with cpuid instruction. | |
27 * With -DUSE_MMX, only MMX code is compiled. With -DNO_MMX, only non-MMX code | |
28 * is compiled. Without either option, runtime detection is enabled. Runtime | |
29 * detection should work on all modern cpus and the recomended algorithm (flip | |
30 * ID bit on eflags and then use the cpuid instruction) is used in many | |
31 * multimedia applications. Tested under win2k with gcc-2.95 and gas-2.12 | |
32 * distributed with cygwin3. Compiling with gcc-2.95 -c inffast.S -o | |
33 * inffast.obj generates a COFF object which can then be linked with MSVC++ | |
34 * compiled code. Tested under FreeBSD 4.7 with gcc-2.95. | |
35 * | |
36 * Jan-28-2003 -- Tested Athlon XP... MMX mode is slower than no MMX (and | |
37 * slower than compiler generated code). Adjusted cpuid check to use the MMX | |
38 * code only for Pentiums < P4 until I have more data on the P4. Speed | |
39 * improvment is only about 15% on the Athlon when compared with code generated | |
40 * with MSVC++. Not sure yet, but I think the P4 will also be slower using the | |
41 * MMX mode because many of it's x86 ALU instructions execute in .5 cycles and | |
42 * have less latency than MMX ops. Added code to buffer the last 11 bytes of | |
43 * the input stream since the MMX code grabs bits in chunks of 32, which | |
44 * differs from the inffast.c algorithm. I don't think there would have been | |
45 * read overruns where a page boundary was crossed (a segfault), but there | |
46 * could have been overruns when next_in ends on unaligned memory (unintialized | |
47 * memory read). | |
48 * | |
49 * Mar-13-2003 -- P4 MMX is slightly slower than P4 NO_MMX. I created a C | |
50 * version of the non-MMX code so that it doesn't depend on zstrm and zstate | |
51 * structure offsets which are hard coded in this file. This was last tested | |
52 * with zlib-1.2.0 which is currently in beta testing, newer versions of this | |
53 * and inffas86.c can be found at http://www.eetbeetee.com/zlib/ and | |
54 * http://www.charm.net/~christop/zlib/ | |
55 */ | |
56 | |
57 | |
58 /* | |
59 * if you have underscore linking problems (_inflate_fast undefined), try | |
60 * using -DGAS_COFF | |
61 */ | |
62 #if ! defined( GAS_COFF ) && ! defined( GAS_ELF ) | |
63 | |
64 #if defined( WIN32 ) || defined( __CYGWIN__ ) | |
65 #define GAS_COFF /* windows object format */ | |
66 #else | |
67 #define GAS_ELF | |
68 #endif | |
69 | |
70 #endif /* ! GAS_COFF && ! GAS_ELF */ | |
71 | |
72 | |
73 #if defined( GAS_COFF ) | |
74 | |
75 /* coff externals have underscores */ | |
76 #define inflate_fast _inflate_fast | |
77 #define inflate_fast_use_mmx _inflate_fast_use_mmx | |
78 | |
79 #endif /* GAS_COFF */ | |
80 | |
81 | |
82 .file "inffast.S" | |
83 | |
84 .globl inflate_fast | |
85 | |
86 .text | |
87 .align 4,0 | |
88 .L_invalid_literal_length_code_msg: | |
89 .string "invalid literal/length code" | |
90 | |
91 .align 4,0 | |
92 .L_invalid_distance_code_msg: | |
93 .string "invalid distance code" | |
94 | |
95 .align 4,0 | |
96 .L_invalid_distance_too_far_msg: | |
97 .string "invalid distance too far back" | |
98 | |
99 #if ! defined( NO_MMX ) | |
100 .align 4,0 | |
101 .L_mask: /* mask[N] = ( 1 << N ) - 1 */ | |
102 .long 0 | |
103 .long 1 | |
104 .long 3 | |
105 .long 7 | |
106 .long 15 | |
107 .long 31 | |
108 .long 63 | |
109 .long 127 | |
110 .long 255 | |
111 .long 511 | |
112 .long 1023 | |
113 .long 2047 | |
114 .long 4095 | |
115 .long 8191 | |
116 .long 16383 | |
117 .long 32767 | |
118 .long 65535 | |
119 .long 131071 | |
120 .long 262143 | |
121 .long 524287 | |
122 .long 1048575 | |
123 .long 2097151 | |
124 .long 4194303 | |
125 .long 8388607 | |
126 .long 16777215 | |
127 .long 33554431 | |
128 .long 67108863 | |
129 .long 134217727 | |
130 .long 268435455 | |
131 .long 536870911 | |
132 .long 1073741823 | |
133 .long 2147483647 | |
134 .long 4294967295 | |
135 #endif /* NO_MMX */ | |
136 | |
137 .text | |
138 | |
139 /* | |
140 * struct z_stream offsets, in zlib.h | |
141 */ | |
142 #define next_in_strm 0 /* strm->next_in */ | |
143 #define avail_in_strm 4 /* strm->avail_in */ | |
144 #define next_out_strm 12 /* strm->next_out */ | |
145 #define avail_out_strm 16 /* strm->avail_out */ | |
146 #define msg_strm 24 /* strm->msg */ | |
147 #define state_strm 28 /* strm->state */ | |
148 | |
149 /* | |
150 * struct inflate_state offsets, in inflate.h | |
151 */ | |
152 #define mode_state 0 /* state->mode */ | |
153 #define wsize_state 32 /* state->wsize */ | |
154 #define write_state 40 /* state->write */ | |
155 #define window_state 44 /* state->window */ | |
156 #define hold_state 48 /* state->hold */ | |
157 #define bits_state 52 /* state->bits */ | |
158 #define lencode_state 68 /* state->lencode */ | |
159 #define distcode_state 72 /* state->distcode */ | |
160 #define lenbits_state 76 /* state->lenbits */ | |
161 #define distbits_state 80 /* state->distbits */ | |
162 | |
163 /* | |
164 * inflate_fast's activation record | |
165 */ | |
166 #define local_var_size 64 /* how much local space for vars */ | |
167 #define strm_sp 88 /* first arg: z_stream * (local_var_size + 24) */ | |
168 #define start_sp 92 /* second arg: unsigned int (local_var_size + 28) */ | |
169 | |
170 /* | |
171 * offsets for local vars on stack | |
172 */ | |
173 #define out 60 /* unsigned char* */ | |
174 #define window 56 /* unsigned char* */ | |
175 #define wsize 52 /* unsigned int */ | |
176 #define write 48 /* unsigned int */ | |
177 #define in 44 /* unsigned char* */ | |
178 #define beg 40 /* unsigned char* */ | |
179 #define buf 28 /* char[ 12 ] */ | |
180 #define len 24 /* unsigned int */ | |
181 #define last 20 /* unsigned char* */ | |
182 #define end 16 /* unsigned char* */ | |
183 #define dcode 12 /* code* */ | |
184 #define lcode 8 /* code* */ | |
185 #define dmask 4 /* unsigned int */ | |
186 #define lmask 0 /* unsigned int */ | |
187 | |
188 /* | |
189 * typedef enum inflate_mode consts, in inflate.h | |
190 */ | |
191 #define INFLATE_MODE_TYPE 11 /* state->mode flags enum-ed in inflate.h */ | |
192 #define INFLATE_MODE_BAD 26 | |
193 | |
194 | |
195 #if ! defined( USE_MMX ) && ! defined( NO_MMX ) | |
196 | |
197 #define RUN_TIME_MMX | |
198 | |
199 #define CHECK_MMX 1 | |
200 #define DO_USE_MMX 2 | |
201 #define DONT_USE_MMX 3 | |
202 | |
203 .globl inflate_fast_use_mmx | |
204 | |
205 .data | |
206 | |
207 .align 4,0 | |
208 inflate_fast_use_mmx: /* integer flag for run time control 1=check,2=mmx,3=no */ | |
209 .long CHECK_MMX | |
210 | |
211 #if defined( GAS_ELF ) | |
212 /* elf info */ | |
213 .type inflate_fast_use_mmx,@object | |
214 .size inflate_fast_use_mmx,4 | |
215 #endif | |
216 | |
217 #endif /* RUN_TIME_MMX */ | |
218 | |
219 #if defined( GAS_COFF ) | |
220 /* coff info: scl 2 = extern, type 32 = function */ | |
221 .def inflate_fast; .scl 2; .type 32; .endef | |
222 #endif | |
223 | |
224 .text | |
225 | |
226 .align 32,0x90 | |
227 inflate_fast: | |
228 pushl %edi | |
229 pushl %esi | |
230 pushl %ebp | |
231 pushl %ebx | |
232 pushf /* save eflags (strm_sp, state_sp assumes this is 32 bits) */ | |
233 subl $local_var_size, %esp | |
234 cld | |
235 | |
236 #define strm_r %esi | |
237 #define state_r %edi | |
238 | |
239 movl strm_sp(%esp), strm_r | |
240 movl state_strm(strm_r), state_r | |
241 | |
242 /* in = strm->next_in; | |
243 * out = strm->next_out; | |
244 * last = in + strm->avail_in - 11; | |
245 * beg = out - (start - strm->avail_out); | |
246 * end = out + (strm->avail_out - 257); | |
247 */ | |
248 movl avail_in_strm(strm_r), %edx | |
249 movl next_in_strm(strm_r), %eax | |
250 | |
251 addl %eax, %edx /* avail_in += next_in */ | |
252 subl $11, %edx /* avail_in -= 11 */ | |
253 | |
254 movl %eax, in(%esp) | |
255 movl %edx, last(%esp) | |
256 | |
257 movl start_sp(%esp), %ebp | |
258 movl avail_out_strm(strm_r), %ecx | |
259 movl next_out_strm(strm_r), %ebx | |
260 | |
261 subl %ecx, %ebp /* start -= avail_out */ | |
262 negl %ebp /* start = -start */ | |
263 addl %ebx, %ebp /* start += next_out */ | |
264 | |
265 subl $257, %ecx /* avail_out -= 257 */ | |
266 addl %ebx, %ecx /* avail_out += out */ | |
267 | |
268 movl %ebx, out(%esp) | |
269 movl %ebp, beg(%esp) | |
270 movl %ecx, end(%esp) | |
271 | |
272 /* wsize = state->wsize; | |
273 * write = state->write; | |
274 * window = state->window; | |
275 * hold = state->hold; | |
276 * bits = state->bits; | |
277 * lcode = state->lencode; | |
278 * dcode = state->distcode; | |
279 * lmask = ( 1 << state->lenbits ) - 1; | |
280 * dmask = ( 1 << state->distbits ) - 1; | |
281 */ | |
282 | |
283 movl lencode_state(state_r), %eax | |
284 movl distcode_state(state_r), %ecx | |
285 | |
286 movl %eax, lcode(%esp) | |
287 movl %ecx, dcode(%esp) | |
288 | |
289 movl $1, %eax | |
290 movl lenbits_state(state_r), %ecx | |
291 shll %cl, %eax | |
292 decl %eax | |
293 movl %eax, lmask(%esp) | |
294 | |
295 movl $1, %eax | |
296 movl distbits_state(state_r), %ecx | |
297 shll %cl, %eax | |
298 decl %eax | |
299 movl %eax, dmask(%esp) | |
300 | |
301 movl wsize_state(state_r), %eax | |
302 movl write_state(state_r), %ecx | |
303 movl window_state(state_r), %edx | |
304 | |
305 movl %eax, wsize(%esp) | |
306 movl %ecx, write(%esp) | |
307 movl %edx, window(%esp) | |
308 | |
309 movl hold_state(state_r), %ebp | |
310 movl bits_state(state_r), %ebx | |
311 | |
312 #undef strm_r | |
313 #undef state_r | |
314 | |
315 #define in_r %esi | |
316 #define from_r %esi | |
317 #define out_r %edi | |
318 | |
319 movl in(%esp), in_r | |
320 movl last(%esp), %ecx | |
321 cmpl in_r, %ecx | |
322 ja .L_align_long /* if in < last */ | |
323 | |
324 addl $11, %ecx /* ecx = &in[ avail_in ] */ | |
325 subl in_r, %ecx /* ecx = avail_in */ | |
326 movl $12, %eax | |
327 subl %ecx, %eax /* eax = 12 - avail_in */ | |
328 leal buf(%esp), %edi | |
329 rep movsb /* memcpy( buf, in, avail_in ) */ | |
330 movl %eax, %ecx | |
331 xorl %eax, %eax | |
332 rep stosb /* memset( &buf[ avail_in ], 0, 12 - avail_in ) */ | |
333 leal buf(%esp), in_r /* in = buf */ | |
334 movl in_r, last(%esp) /* last = in, do just one iteration */ | |
335 jmp .L_is_aligned | |
336 | |
337 /* align in_r on long boundary */ | |
338 .L_align_long: | |
339 testl $3, in_r | |
340 jz .L_is_aligned | |
341 xorl %eax, %eax | |
342 movb (in_r), %al | |
343 incl in_r | |
344 movl %ebx, %ecx | |
345 addl $8, %ebx | |
346 shll %cl, %eax | |
347 orl %eax, %ebp | |
348 jmp .L_align_long | |
349 | |
350 .L_is_aligned: | |
351 movl out(%esp), out_r | |
352 | |
353 #if defined( NO_MMX ) | |
354 jmp .L_do_loop | |
355 #endif | |
356 | |
357 #if defined( USE_MMX ) | |
358 jmp .L_init_mmx | |
359 #endif | |
360 | |
361 /*** Runtime MMX check ***/ | |
362 | |
363 #if defined( RUN_TIME_MMX ) | |
364 .L_check_mmx: | |
365 cmpl $DO_USE_MMX, inflate_fast_use_mmx | |
366 je .L_init_mmx | |
367 ja .L_do_loop /* > 2 */ | |
368 | |
369 pushl %eax | |
370 pushl %ebx | |
371 pushl %ecx | |
372 pushl %edx | |
373 pushf | |
374 movl (%esp), %eax /* copy eflags to eax */ | |
375 xorl $0x200000, (%esp) /* try toggling ID bit of eflags (bit 21) | |
376 * to see if cpu supports cpuid... | |
377 * ID bit method not supported by NexGen but | |
378 * bios may load a cpuid instruction and | |
379 * cpuid may be disabled on Cyrix 5-6x86 */ | |
380 popf | |
381 pushf | |
382 popl %edx /* copy new eflags to edx */ | |
383 xorl %eax, %edx /* test if ID bit is flipped */ | |
384 jz .L_dont_use_mmx /* not flipped if zero */ | |
385 xorl %eax, %eax | |
386 cpuid | |
387 cmpl $0x756e6547, %ebx /* check for GenuineIntel in ebx,ecx,edx */ | |
388 jne .L_dont_use_mmx | |
389 cmpl $0x6c65746e, %ecx | |
390 jne .L_dont_use_mmx | |
391 cmpl $0x49656e69, %edx | |
392 jne .L_dont_use_mmx | |
393 movl $1, %eax | |
394 cpuid /* get cpu features */ | |
395 shrl $8, %eax | |
396 andl $15, %eax | |
397 cmpl $6, %eax /* check for Pentium family, is 0xf for P4 */ | |
398 jne .L_dont_use_mmx | |
399 testl $0x800000, %edx /* test if MMX feature is set (bit 23) */ | |
400 jnz .L_use_mmx | |
401 jmp .L_dont_use_mmx | |
402 .L_use_mmx: | |
403 movl $DO_USE_MMX, inflate_fast_use_mmx | |
404 jmp .L_check_mmx_pop | |
405 .L_dont_use_mmx: | |
406 movl $DONT_USE_MMX, inflate_fast_use_mmx | |
407 .L_check_mmx_pop: | |
408 popl %edx | |
409 popl %ecx | |
410 popl %ebx | |
411 popl %eax | |
412 jmp .L_check_mmx | |
413 #endif | |
414 | |
415 | |
416 /*** Non-MMX code ***/ | |
417 | |
418 #if defined ( NO_MMX ) || defined( RUN_TIME_MMX ) | |
419 | |
420 #define hold_r %ebp | |
421 #define bits_r %bl | |
422 #define bitslong_r %ebx | |
423 | |
424 .align 32,0x90 | |
425 .L_while_test: | |
426 /* while (in < last && out < end) | |
427 */ | |
428 cmpl out_r, end(%esp) | |
429 jbe .L_break_loop /* if (out >= end) */ | |
430 | |
431 cmpl in_r, last(%esp) | |
432 jbe .L_break_loop | |
433 | |
434 .L_do_loop: | |
435 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out | |
436 * | |
437 * do { | |
438 * if (bits < 15) { | |
439 * hold |= *((unsigned short *)in)++ << bits; | |
440 * bits += 16 | |
441 * } | |
442 * this = lcode[hold & lmask] | |
443 */ | |
444 cmpb $15, bits_r | |
445 ja .L_get_length_code /* if (15 < bits) */ | |
446 | |
447 xorl %eax, %eax | |
448 lodsw /* al = *(ushort *)in++ */ | |
449 movb bits_r, %cl /* cl = bits, needs it for shifting */ | |
450 addb $16, bits_r /* bits += 16 */ | |
451 shll %cl, %eax | |
452 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */ | |
453 | |
454 .L_get_length_code: | |
455 movl lmask(%esp), %edx /* edx = lmask */ | |
456 movl lcode(%esp), %ecx /* ecx = lcode */ | |
457 andl hold_r, %edx /* edx &= hold */ | |
458 movl (%ecx,%edx,4), %eax /* eax = lcode[hold & lmask] */ | |
459 | |
460 .L_dolen: | |
461 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out | |
462 * | |
463 * dolen: | |
464 * bits -= this.bits; | |
465 * hold >>= this.bits | |
466 */ | |
467 movb %ah, %cl /* cl = this.bits */ | |
468 subb %ah, bits_r /* bits -= this.bits */ | |
469 shrl %cl, hold_r /* hold >>= this.bits */ | |
470 | |
471 /* check if op is a literal | |
472 * if (op == 0) { | |
473 * PUP(out) = this.val; | |
474 * } | |
475 */ | |
476 testb %al, %al | |
477 jnz .L_test_for_length_base /* if (op != 0) 45.7% */ | |
478 | |
479 shrl $16, %eax /* output this.val char */ | |
480 stosb | |
481 jmp .L_while_test | |
482 | |
483 .L_test_for_length_base: | |
484 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = len | |
485 * | |
486 * else if (op & 16) { | |
487 * len = this.val | |
488 * op &= 15 | |
489 * if (op) { | |
490 * if (op > bits) { | |
491 * hold |= *((unsigned short *)in)++ << bits; | |
492 * bits += 16 | |
493 * } | |
494 * len += hold & mask[op]; | |
495 * bits -= op; | |
496 * hold >>= op; | |
497 * } | |
498 */ | |
499 #define len_r %edx | |
500 movl %eax, len_r /* len = this */ | |
501 shrl $16, len_r /* len = this.val */ | |
502 movb %al, %cl | |
503 | |
504 testb $16, %al | |
505 jz .L_test_for_second_level_length /* if ((op & 16) == 0) 8% */ | |
506 andb $15, %cl /* op &= 15 */ | |
507 jz .L_save_len /* if (!op) */ | |
508 cmpb %cl, bits_r | |
509 jae .L_add_bits_to_len /* if (op <= bits) */ | |
510 | |
511 movb %cl, %ch /* stash op in ch, freeing cl */ | |
512 xorl %eax, %eax | |
513 lodsw /* al = *(ushort *)in++ */ | |
514 movb bits_r, %cl /* cl = bits, needs it for shifting */ | |
515 addb $16, bits_r /* bits += 16 */ | |
516 shll %cl, %eax | |
517 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */ | |
518 movb %ch, %cl /* move op back to ecx */ | |
519 | |
520 .L_add_bits_to_len: | |
521 movl $1, %eax | |
522 shll %cl, %eax | |
523 decl %eax | |
524 subb %cl, bits_r | |
525 andl hold_r, %eax /* eax &= hold */ | |
526 shrl %cl, hold_r | |
527 addl %eax, len_r /* len += hold & mask[op] */ | |
528 | |
529 .L_save_len: | |
530 movl len_r, len(%esp) /* save len */ | |
531 #undef len_r | |
532 | |
533 .L_decode_distance: | |
534 /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = dist | |
535 * | |
536 * if (bits < 15) { | |
537 * hold |= *((unsigned short *)in)++ << bits; | |
538 * bits += 16 | |
539 * } | |
540 * this = dcode[hold & dmask]; | |
541 * dodist: | |
542 * bits -= this.bits; | |
543 * hold >>= this.bits; | |
544 * op = this.op; | |
545 */ | |
546 | |
547 cmpb $15, bits_r | |
548 ja .L_get_distance_code /* if (15 < bits) */ | |
549 | |
550 xorl %eax, %eax | |
551 lodsw /* al = *(ushort *)in++ */ | |
552 movb bits_r, %cl /* cl = bits, needs it for shifting */ | |
553 addb $16, bits_r /* bits += 16 */ | |
554 shll %cl, %eax | |
555 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */ | |
556 | |
557 .L_get_distance_code: | |
558 movl dmask(%esp), %edx /* edx = dmask */ | |
559 movl dcode(%esp), %ecx /* ecx = dcode */ | |
560 andl hold_r, %edx /* edx &= hold */ | |
561 movl (%ecx,%edx,4), %eax /* eax = dcode[hold & dmask] */ | |
562 | |
563 #define dist_r %edx | |
564 .L_dodist: | |
565 movl %eax, dist_r /* dist = this */ | |
566 shrl $16, dist_r /* dist = this.val */ | |
567 movb %ah, %cl | |
568 subb %ah, bits_r /* bits -= this.bits */ | |
569 shrl %cl, hold_r /* hold >>= this.bits */ | |
570 | |
571 /* if (op & 16) { | |
572 * dist = this.val | |
573 * op &= 15 | |
574 * if (op > bits) { | |
575 * hold |= *((unsigned short *)in)++ << bits; | |
576 * bits += 16 | |
577 * } | |
578 * dist += hold & mask[op]; | |
579 * bits -= op; | |
580 * hold >>= op; | |
581 */ | |
582 movb %al, %cl /* cl = this.op */ | |
583 | |
584 testb $16, %al /* if ((op & 16) == 0) */ | |
585 jz .L_test_for_second_level_dist | |
586 andb $15, %cl /* op &= 15 */ | |
587 jz .L_check_dist_one | |
588 cmpb %cl, bits_r | |
589 jae .L_add_bits_to_dist /* if (op <= bits) 97.6% */ | |
590 | |
591 movb %cl, %ch /* stash op in ch, freeing cl */ | |
592 xorl %eax, %eax | |
593 lodsw /* al = *(ushort *)in++ */ | |
594 movb bits_r, %cl /* cl = bits, needs it for shifting */ | |
595 addb $16, bits_r /* bits += 16 */ | |
596 shll %cl, %eax | |
597 orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */ | |
598 movb %ch, %cl /* move op back to ecx */ | |
599 | |
600 .L_add_bits_to_dist: | |
601 movl $1, %eax | |
602 shll %cl, %eax | |
603 decl %eax /* (1 << op) - 1 */ | |
604 subb %cl, bits_r | |
605 andl hold_r, %eax /* eax &= hold */ | |
606 shrl %cl, hold_r | |
607 addl %eax, dist_r /* dist += hold & ((1 << op) - 1) */ | |
608 jmp .L_check_window | |
609 | |
610 .L_check_window: | |
611 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist | |
612 * %ecx = nbytes | |
613 * | |
614 * nbytes = out - beg; | |
615 * if (dist <= nbytes) { | |
616 * from = out - dist; | |
617 * do { | |
618 * PUP(out) = PUP(from); | |
619 * } while (--len > 0) { | |
620 * } | |
621 */ | |
622 | |
623 movl in_r, in(%esp) /* save in so from can use it's reg */ | |
624 movl out_r, %eax | |
625 subl beg(%esp), %eax /* nbytes = out - beg */ | |
626 | |
627 cmpl dist_r, %eax | |
628 jb .L_clip_window /* if (dist > nbytes) 4.2% */ | |
629 | |
630 movl len(%esp), %ecx | |
631 movl out_r, from_r | |
632 subl dist_r, from_r /* from = out - dist */ | |
633 | |
634 subl $3, %ecx | |
635 movb (from_r), %al | |
636 movb %al, (out_r) | |
637 movb 1(from_r), %al | |
638 movb 2(from_r), %dl | |
639 addl $3, from_r | |
640 movb %al, 1(out_r) | |
641 movb %dl, 2(out_r) | |
642 addl $3, out_r | |
643 rep movsb | |
644 | |
645 movl in(%esp), in_r /* move in back to %esi, toss from */ | |
646 jmp .L_while_test | |
647 | |
648 .align 16,0x90 | |
649 .L_check_dist_one: | |
650 cmpl $1, dist_r | |
651 jne .L_check_window | |
652 cmpl out_r, beg(%esp) | |
653 je .L_check_window | |
654 | |
655 decl out_r | |
656 movl len(%esp), %ecx | |
657 movb (out_r), %al | |
658 subl $3, %ecx | |
659 | |
660 movb %al, 1(out_r) | |
661 movb %al, 2(out_r) | |
662 movb %al, 3(out_r) | |
663 addl $4, out_r | |
664 rep stosb | |
665 | |
666 jmp .L_while_test | |
667 | |
668 .align 16,0x90 | |
669 .L_test_for_second_level_length: | |
670 /* else if ((op & 64) == 0) { | |
671 * this = lcode[this.val + (hold & mask[op])]; | |
672 * } | |
673 */ | |
674 testb $64, %al | |
675 jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */ | |
676 | |
677 movl $1, %eax | |
678 shll %cl, %eax | |
679 decl %eax | |
680 andl hold_r, %eax /* eax &= hold */ | |
681 addl %edx, %eax /* eax += this.val */ | |
682 movl lcode(%esp), %edx /* edx = lcode */ | |
683 movl (%edx,%eax,4), %eax /* eax = lcode[val + (hold&mask[op])] */ | |
684 jmp .L_dolen | |
685 | |
686 .align 16,0x90 | |
687 .L_test_for_second_level_dist: | |
688 /* else if ((op & 64) == 0) { | |
689 * this = dcode[this.val + (hold & mask[op])]; | |
690 * } | |
691 */ | |
692 testb $64, %al | |
693 jnz .L_invalid_distance_code /* if ((op & 64) != 0) */ | |
694 | |
695 movl $1, %eax | |
696 shll %cl, %eax | |
697 decl %eax | |
698 andl hold_r, %eax /* eax &= hold */ | |
699 addl %edx, %eax /* eax += this.val */ | |
700 movl dcode(%esp), %edx /* edx = dcode */ | |
701 movl (%edx,%eax,4), %eax /* eax = dcode[val + (hold&mask[op])] */ | |
702 jmp .L_dodist | |
703 | |
704 .align 16,0x90 | |
705 .L_clip_window: | |
706 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist | |
707 * %ecx = nbytes | |
708 * | |
709 * else { | |
710 * if (dist > wsize) { | |
711 * invalid distance | |
712 * } | |
713 * from = window; | |
714 * nbytes = dist - nbytes; | |
715 * if (write == 0) { | |
716 * from += wsize - nbytes; | |
717 */ | |
718 #define nbytes_r %ecx | |
719 movl %eax, nbytes_r | |
720 movl wsize(%esp), %eax /* prepare for dist compare */ | |
721 negl nbytes_r /* nbytes = -nbytes */ | |
722 movl window(%esp), from_r /* from = window */ | |
723 | |
724 cmpl dist_r, %eax | |
725 jb .L_invalid_distance_too_far /* if (dist > wsize) */ | |
726 | |
727 addl dist_r, nbytes_r /* nbytes = dist - nbytes */ | |
728 cmpl $0, write(%esp) | |
729 jne .L_wrap_around_window /* if (write != 0) */ | |
730 | |
731 subl nbytes_r, %eax | |
732 addl %eax, from_r /* from += wsize - nbytes */ | |
733 | |
734 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist | |
735 * %ecx = nbytes, %eax = len | |
736 * | |
737 * if (nbytes < len) { | |
738 * len -= nbytes; | |
739 * do { | |
740 * PUP(out) = PUP(from); | |
741 * } while (--nbytes); | |
742 * from = out - dist; | |
743 * } | |
744 * } | |
745 */ | |
746 #define len_r %eax | |
747 movl len(%esp), len_r | |
748 cmpl nbytes_r, len_r | |
749 jbe .L_do_copy1 /* if (nbytes >= len) */ | |
750 | |
751 subl nbytes_r, len_r /* len -= nbytes */ | |
752 rep movsb | |
753 movl out_r, from_r | |
754 subl dist_r, from_r /* from = out - dist */ | |
755 jmp .L_do_copy1 | |
756 | |
757 cmpl nbytes_r, len_r | |
758 jbe .L_do_copy1 /* if (nbytes >= len) */ | |
759 | |
760 subl nbytes_r, len_r /* len -= nbytes */ | |
761 rep movsb | |
762 movl out_r, from_r | |
763 subl dist_r, from_r /* from = out - dist */ | |
764 jmp .L_do_copy1 | |
765 | |
766 .L_wrap_around_window: | |
767 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist | |
768 * %ecx = nbytes, %eax = write, %eax = len | |
769 * | |
770 * else if (write < nbytes) { | |
771 * from += wsize + write - nbytes; | |
772 * nbytes -= write; | |
773 * if (nbytes < len) { | |
774 * len -= nbytes; | |
775 * do { | |
776 * PUP(out) = PUP(from); | |
777 * } while (--nbytes); | |
778 * from = window; | |
779 * nbytes = write; | |
780 * if (nbytes < len) { | |
781 * len -= nbytes; | |
782 * do { | |
783 * PUP(out) = PUP(from); | |
784 * } while(--nbytes); | |
785 * from = out - dist; | |
786 * } | |
787 * } | |
788 * } | |
789 */ | |
790 #define write_r %eax | |
791 movl write(%esp), write_r | |
792 cmpl write_r, nbytes_r | |
793 jbe .L_contiguous_in_window /* if (write >= nbytes) */ | |
794 | |
795 addl wsize(%esp), from_r | |
796 addl write_r, from_r | |
797 subl nbytes_r, from_r /* from += wsize + write - nbytes */ | |
798 subl write_r, nbytes_r /* nbytes -= write */ | |
799 #undef write_r | |
800 | |
801 movl len(%esp), len_r | |
802 cmpl nbytes_r, len_r | |
803 jbe .L_do_copy1 /* if (nbytes >= len) */ | |
804 | |
805 subl nbytes_r, len_r /* len -= nbytes */ | |
806 rep movsb | |
807 movl window(%esp), from_r /* from = window */ | |
808 movl write(%esp), nbytes_r /* nbytes = write */ | |
809 cmpl nbytes_r, len_r | |
810 jbe .L_do_copy1 /* if (nbytes >= len) */ | |
811 | |
812 subl nbytes_r, len_r /* len -= nbytes */ | |
813 rep movsb | |
814 movl out_r, from_r | |
815 subl dist_r, from_r /* from = out - dist */ | |
816 jmp .L_do_copy1 | |
817 | |
818 .L_contiguous_in_window: | |
819 /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist | |
820 * %ecx = nbytes, %eax = write, %eax = len | |
821 * | |
822 * else { | |
823 * from += write - nbytes; | |
824 * if (nbytes < len) { | |
825 * len -= nbytes; | |
826 * do { | |
827 * PUP(out) = PUP(from); | |
828 * } while (--nbytes); | |
829 * from = out - dist; | |
830 * } | |
831 * } | |
832 */ | |
833 #define write_r %eax | |
834 addl write_r, from_r | |
835 subl nbytes_r, from_r /* from += write - nbytes */ | |
836 #undef write_r | |
837 | |
838 movl len(%esp), len_r | |
839 cmpl nbytes_r, len_r | |
840 jbe .L_do_copy1 /* if (nbytes >= len) */ | |
841 | |
842 subl nbytes_r, len_r /* len -= nbytes */ | |
843 rep movsb | |
844 movl out_r, from_r | |
845 subl dist_r, from_r /* from = out - dist */ | |
846 | |
847 .L_do_copy1: | |
848 /* regs: %esi = from, %esi = in, %ebp = hold, %bl = bits, %edi = out | |
849 * %eax = len | |
850 * | |
851 * while (len > 0) { | |
852 * PUP(out) = PUP(from); | |
853 * len--; | |
854 * } | |
855 * } | |
856 * } while (in < last && out < end); | |
857 */ | |
858 #undef nbytes_r | |
859 #define in_r %esi | |
860 movl len_r, %ecx | |
861 rep movsb | |
862 | |
863 movl in(%esp), in_r /* move in back to %esi, toss from */ | |
864 jmp .L_while_test | |
865 | |
866 #undef len_r | |
867 #undef dist_r | |
868 | |
869 #endif /* NO_MMX || RUN_TIME_MMX */ | |
870 | |
871 | |
872 /*** MMX code ***/ | |
873 | |
874 #if defined( USE_MMX ) || defined( RUN_TIME_MMX ) | |
875 | |
876 .align 32,0x90 | |
877 .L_init_mmx: | |
878 emms | |
879 | |
880 #undef bits_r | |
881 #undef bitslong_r | |
882 #define bitslong_r %ebp | |
883 #define hold_mm %mm0 | |
884 movd %ebp, hold_mm | |
885 movl %ebx, bitslong_r | |
886 | |
887 #define used_mm %mm1 | |
888 #define dmask2_mm %mm2 | |
889 #define lmask2_mm %mm3 | |
890 #define lmask_mm %mm4 | |
891 #define dmask_mm %mm5 | |
892 #define tmp_mm %mm6 | |
893 | |
894 movd lmask(%esp), lmask_mm | |
895 movq lmask_mm, lmask2_mm | |
896 movd dmask(%esp), dmask_mm | |
897 movq dmask_mm, dmask2_mm | |
898 pxor used_mm, used_mm | |
899 movl lcode(%esp), %ebx /* ebx = lcode */ | |
900 jmp .L_do_loop_mmx | |
901 | |
902 .align 32,0x90 | |
903 .L_while_test_mmx: | |
904 /* while (in < last && out < end) | |
905 */ | |
906 cmpl out_r, end(%esp) | |
907 jbe .L_break_loop /* if (out >= end) */ | |
908 | |
909 cmpl in_r, last(%esp) | |
910 jbe .L_break_loop | |
911 | |
912 .L_do_loop_mmx: | |
913 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ | |
914 | |
915 cmpl $32, bitslong_r | |
916 ja .L_get_length_code_mmx /* if (32 < bits) */ | |
917 | |
918 movd bitslong_r, tmp_mm | |
919 movd (in_r), %mm7 | |
920 addl $4, in_r | |
921 psllq tmp_mm, %mm7 | |
922 addl $32, bitslong_r | |
923 por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */ | |
924 | |
925 .L_get_length_code_mmx: | |
926 pand hold_mm, lmask_mm | |
927 movd lmask_mm, %eax | |
928 movq lmask2_mm, lmask_mm | |
929 movl (%ebx,%eax,4), %eax /* eax = lcode[hold & lmask] */ | |
930 | |
931 .L_dolen_mmx: | |
932 movzbl %ah, %ecx /* ecx = this.bits */ | |
933 movd %ecx, used_mm | |
934 subl %ecx, bitslong_r /* bits -= this.bits */ | |
935 | |
936 testb %al, %al | |
937 jnz .L_test_for_length_base_mmx /* if (op != 0) 45.7% */ | |
938 | |
939 shrl $16, %eax /* output this.val char */ | |
940 stosb | |
941 jmp .L_while_test_mmx | |
942 | |
943 .L_test_for_length_base_mmx: | |
944 #define len_r %edx | |
945 movl %eax, len_r /* len = this */ | |
946 shrl $16, len_r /* len = this.val */ | |
947 | |
948 testb $16, %al | |
949 jz .L_test_for_second_level_length_mmx /* if ((op & 16) == 0) 8% */ | |
950 andl $15, %eax /* op &= 15 */ | |
951 jz .L_decode_distance_mmx /* if (!op) */ | |
952 | |
953 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ | |
954 movd %eax, used_mm | |
955 movd hold_mm, %ecx | |
956 subl %eax, bitslong_r | |
957 andl .L_mask(,%eax,4), %ecx | |
958 addl %ecx, len_r /* len += hold & mask[op] */ | |
959 | |
960 .L_decode_distance_mmx: | |
961 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ | |
962 | |
963 cmpl $32, bitslong_r | |
964 ja .L_get_dist_code_mmx /* if (32 < bits) */ | |
965 | |
966 movd bitslong_r, tmp_mm | |
967 movd (in_r), %mm7 | |
968 addl $4, in_r | |
969 psllq tmp_mm, %mm7 | |
970 addl $32, bitslong_r | |
971 por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */ | |
972 | |
973 .L_get_dist_code_mmx: | |
974 movl dcode(%esp), %ebx /* ebx = dcode */ | |
975 pand hold_mm, dmask_mm | |
976 movd dmask_mm, %eax | |
977 movq dmask2_mm, dmask_mm | |
978 movl (%ebx,%eax,4), %eax /* eax = dcode[hold & lmask] */ | |
979 | |
980 .L_dodist_mmx: | |
981 #define dist_r %ebx | |
982 movzbl %ah, %ecx /* ecx = this.bits */ | |
983 movl %eax, dist_r | |
984 shrl $16, dist_r /* dist = this.val */ | |
985 subl %ecx, bitslong_r /* bits -= this.bits */ | |
986 movd %ecx, used_mm | |
987 | |
988 testb $16, %al /* if ((op & 16) == 0) */ | |
989 jz .L_test_for_second_level_dist_mmx | |
990 andl $15, %eax /* op &= 15 */ | |
991 jz .L_check_dist_one_mmx | |
992 | |
993 .L_add_bits_to_dist_mmx: | |
994 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ | |
995 movd %eax, used_mm /* save bit length of current op */ | |
996 movd hold_mm, %ecx /* get the next bits on input stream */ | |
997 subl %eax, bitslong_r /* bits -= op bits */ | |
998 andl .L_mask(,%eax,4), %ecx /* ecx = hold & mask[op] */ | |
999 addl %ecx, dist_r /* dist += hold & mask[op] */ | |
1000 | |
1001 .L_check_window_mmx: | |
1002 movl in_r, in(%esp) /* save in so from can use it's reg */ | |
1003 movl out_r, %eax | |
1004 subl beg(%esp), %eax /* nbytes = out - beg */ | |
1005 | |
1006 cmpl dist_r, %eax | |
1007 jb .L_clip_window_mmx /* if (dist > nbytes) 4.2% */ | |
1008 | |
1009 movl len_r, %ecx | |
1010 movl out_r, from_r | |
1011 subl dist_r, from_r /* from = out - dist */ | |
1012 | |
1013 subl $3, %ecx | |
1014 movb (from_r), %al | |
1015 movb %al, (out_r) | |
1016 movb 1(from_r), %al | |
1017 movb 2(from_r), %dl | |
1018 addl $3, from_r | |
1019 movb %al, 1(out_r) | |
1020 movb %dl, 2(out_r) | |
1021 addl $3, out_r | |
1022 rep movsb | |
1023 | |
1024 movl in(%esp), in_r /* move in back to %esi, toss from */ | |
1025 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */ | |
1026 jmp .L_while_test_mmx | |
1027 | |
1028 .align 16,0x90 | |
1029 .L_check_dist_one_mmx: | |
1030 cmpl $1, dist_r | |
1031 jne .L_check_window_mmx | |
1032 cmpl out_r, beg(%esp) | |
1033 je .L_check_window_mmx | |
1034 | |
1035 decl out_r | |
1036 movl len_r, %ecx | |
1037 movb (out_r), %al | |
1038 subl $3, %ecx | |
1039 | |
1040 movb %al, 1(out_r) | |
1041 movb %al, 2(out_r) | |
1042 movb %al, 3(out_r) | |
1043 addl $4, out_r | |
1044 rep stosb | |
1045 | |
1046 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */ | |
1047 jmp .L_while_test_mmx | |
1048 | |
1049 .align 16,0x90 | |
1050 .L_test_for_second_level_length_mmx: | |
1051 testb $64, %al | |
1052 jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */ | |
1053 | |
1054 andl $15, %eax | |
1055 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ | |
1056 movd hold_mm, %ecx | |
1057 andl .L_mask(,%eax,4), %ecx | |
1058 addl len_r, %ecx | |
1059 movl (%ebx,%ecx,4), %eax /* eax = lcode[hold & lmask] */ | |
1060 jmp .L_dolen_mmx | |
1061 | |
1062 .align 16,0x90 | |
1063 .L_test_for_second_level_dist_mmx: | |
1064 testb $64, %al | |
1065 jnz .L_invalid_distance_code /* if ((op & 64) != 0) */ | |
1066 | |
1067 andl $15, %eax | |
1068 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ | |
1069 movd hold_mm, %ecx | |
1070 andl .L_mask(,%eax,4), %ecx | |
1071 movl dcode(%esp), %eax /* ecx = dcode */ | |
1072 addl dist_r, %ecx | |
1073 movl (%eax,%ecx,4), %eax /* eax = lcode[hold & lmask] */ | |
1074 jmp .L_dodist_mmx | |
1075 | |
1076 .align 16,0x90 | |
1077 .L_clip_window_mmx: | |
1078 #define nbytes_r %ecx | |
1079 movl %eax, nbytes_r | |
1080 movl wsize(%esp), %eax /* prepare for dist compare */ | |
1081 negl nbytes_r /* nbytes = -nbytes */ | |
1082 movl window(%esp), from_r /* from = window */ | |
1083 | |
1084 cmpl dist_r, %eax | |
1085 jb .L_invalid_distance_too_far /* if (dist > wsize) */ | |
1086 | |
1087 addl dist_r, nbytes_r /* nbytes = dist - nbytes */ | |
1088 cmpl $0, write(%esp) | |
1089 jne .L_wrap_around_window_mmx /* if (write != 0) */ | |
1090 | |
1091 subl nbytes_r, %eax | |
1092 addl %eax, from_r /* from += wsize - nbytes */ | |
1093 | |
1094 cmpl nbytes_r, len_r | |
1095 jbe .L_do_copy1_mmx /* if (nbytes >= len) */ | |
1096 | |
1097 subl nbytes_r, len_r /* len -= nbytes */ | |
1098 rep movsb | |
1099 movl out_r, from_r | |
1100 subl dist_r, from_r /* from = out - dist */ | |
1101 jmp .L_do_copy1_mmx | |
1102 | |
1103 cmpl nbytes_r, len_r | |
1104 jbe .L_do_copy1_mmx /* if (nbytes >= len) */ | |
1105 | |
1106 subl nbytes_r, len_r /* len -= nbytes */ | |
1107 rep movsb | |
1108 movl out_r, from_r | |
1109 subl dist_r, from_r /* from = out - dist */ | |
1110 jmp .L_do_copy1_mmx | |
1111 | |
1112 .L_wrap_around_window_mmx: | |
1113 #define write_r %eax | |
1114 movl write(%esp), write_r | |
1115 cmpl write_r, nbytes_r | |
1116 jbe .L_contiguous_in_window_mmx /* if (write >= nbytes) */ | |
1117 | |
1118 addl wsize(%esp), from_r | |
1119 addl write_r, from_r | |
1120 subl nbytes_r, from_r /* from += wsize + write - nbytes */ | |
1121 subl write_r, nbytes_r /* nbytes -= write */ | |
1122 #undef write_r | |
1123 | |
1124 cmpl nbytes_r, len_r | |
1125 jbe .L_do_copy1_mmx /* if (nbytes >= len) */ | |
1126 | |
1127 subl nbytes_r, len_r /* len -= nbytes */ | |
1128 rep movsb | |
1129 movl window(%esp), from_r /* from = window */ | |
1130 movl write(%esp), nbytes_r /* nbytes = write */ | |
1131 cmpl nbytes_r, len_r | |
1132 jbe .L_do_copy1_mmx /* if (nbytes >= len) */ | |
1133 | |
1134 subl nbytes_r, len_r /* len -= nbytes */ | |
1135 rep movsb | |
1136 movl out_r, from_r | |
1137 subl dist_r, from_r /* from = out - dist */ | |
1138 jmp .L_do_copy1_mmx | |
1139 | |
1140 .L_contiguous_in_window_mmx: | |
1141 #define write_r %eax | |
1142 addl write_r, from_r | |
1143 subl nbytes_r, from_r /* from += write - nbytes */ | |
1144 #undef write_r | |
1145 | |
1146 cmpl nbytes_r, len_r | |
1147 jbe .L_do_copy1_mmx /* if (nbytes >= len) */ | |
1148 | |
1149 subl nbytes_r, len_r /* len -= nbytes */ | |
1150 rep movsb | |
1151 movl out_r, from_r | |
1152 subl dist_r, from_r /* from = out - dist */ | |
1153 | |
1154 .L_do_copy1_mmx: | |
1155 #undef nbytes_r | |
1156 #define in_r %esi | |
1157 movl len_r, %ecx | |
1158 rep movsb | |
1159 | |
1160 movl in(%esp), in_r /* move in back to %esi, toss from */ | |
1161 movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */ | |
1162 jmp .L_while_test_mmx | |
1163 | |
1164 #undef hold_r | |
1165 #undef bitslong_r | |
1166 | |
1167 #endif /* USE_MMX || RUN_TIME_MMX */ | |
1168 | |
1169 | |
1170 /*** USE_MMX, NO_MMX, and RUNTIME_MMX from here on ***/ | |
1171 | |
1172 .L_invalid_distance_code: | |
1173 /* else { | |
1174 * strm->msg = "invalid distance code"; | |
1175 * state->mode = BAD; | |
1176 * } | |
1177 */ | |
1178 movl $.L_invalid_distance_code_msg, %ecx | |
1179 movl $INFLATE_MODE_BAD, %edx | |
1180 jmp .L_update_stream_state | |
1181 | |
1182 .L_test_for_end_of_block: | |
1183 /* else if (op & 32) { | |
1184 * state->mode = TYPE; | |
1185 * break; | |
1186 * } | |
1187 */ | |
1188 testb $32, %al | |
1189 jz .L_invalid_literal_length_code /* if ((op & 32) == 0) */ | |
1190 | |
1191 movl $0, %ecx | |
1192 movl $INFLATE_MODE_TYPE, %edx | |
1193 jmp .L_update_stream_state | |
1194 | |
1195 .L_invalid_literal_length_code: | |
1196 /* else { | |
1197 * strm->msg = "invalid literal/length code"; | |
1198 * state->mode = BAD; | |
1199 * } | |
1200 */ | |
1201 movl $.L_invalid_literal_length_code_msg, %ecx | |
1202 movl $INFLATE_MODE_BAD, %edx | |
1203 jmp .L_update_stream_state | |
1204 | |
1205 .L_invalid_distance_too_far: | |
1206 /* strm->msg = "invalid distance too far back"; | |
1207 * state->mode = BAD; | |
1208 */ | |
1209 movl in(%esp), in_r /* from_r has in's reg, put in back */ | |
1210 movl $.L_invalid_distance_too_far_msg, %ecx | |
1211 movl $INFLATE_MODE_BAD, %edx | |
1212 jmp .L_update_stream_state | |
1213 | |
1214 .L_update_stream_state: | |
1215 /* set strm->msg = %ecx, strm->state->mode = %edx */ | |
1216 movl strm_sp(%esp), %eax | |
1217 testl %ecx, %ecx /* if (msg != NULL) */ | |
1218 jz .L_skip_msg | |
1219 movl %ecx, msg_strm(%eax) /* strm->msg = msg */ | |
1220 .L_skip_msg: | |
1221 movl state_strm(%eax), %eax /* state = strm->state */ | |
1222 movl %edx, mode_state(%eax) /* state->mode = edx (BAD | TYPE) */ | |
1223 jmp .L_break_loop | |
1224 | |
1225 .align 32,0x90 | |
1226 .L_break_loop: | |
1227 | |
1228 /* | |
1229 * Regs: | |
1230 * | |
1231 * bits = %ebp when mmx, and in %ebx when non-mmx | |
1232 * hold = %hold_mm when mmx, and in %ebp when non-mmx | |
1233 * in = %esi | |
1234 * out = %edi | |
1235 */ | |
1236 | |
1237 #if defined( USE_MMX ) || defined( RUN_TIME_MMX ) | |
1238 | |
1239 #if defined( RUN_TIME_MMX ) | |
1240 | |
1241 cmpl $DO_USE_MMX, inflate_fast_use_mmx | |
1242 jne .L_update_next_in | |
1243 | |
1244 #endif /* RUN_TIME_MMX */ | |
1245 | |
1246 movl %ebp, %ebx | |
1247 | |
1248 .L_update_next_in: | |
1249 | |
1250 #endif | |
1251 | |
1252 #define strm_r %eax | |
1253 #define state_r %edx | |
1254 | |
1255 /* len = bits >> 3; | |
1256 * in -= len; | |
1257 * bits -= len << 3; | |
1258 * hold &= (1U << bits) - 1; | |
1259 * state->hold = hold; | |
1260 * state->bits = bits; | |
1261 * strm->next_in = in; | |
1262 * strm->next_out = out; | |
1263 */ | |
1264 movl strm_sp(%esp), strm_r | |
1265 movl %ebx, %ecx | |
1266 movl state_strm(strm_r), state_r | |
1267 shrl $3, %ecx | |
1268 subl %ecx, in_r | |
1269 shll $3, %ecx | |
1270 subl %ecx, %ebx | |
1271 movl out_r, next_out_strm(strm_r) | |
1272 movl %ebx, bits_state(state_r) | |
1273 movl %ebx, %ecx | |
1274 | |
1275 leal buf(%esp), %ebx | |
1276 cmpl %ebx, last(%esp) | |
1277 jne .L_buf_not_used /* if buf != last */ | |
1278 | |
1279 subl %ebx, in_r /* in -= buf */ | |
1280 movl next_in_strm(strm_r), %ebx | |
1281 movl %ebx, last(%esp) /* last = strm->next_in */ | |
1282 addl %ebx, in_r /* in += strm->next_in */ | |
1283 movl avail_in_strm(strm_r), %ebx | |
1284 subl $11, %ebx | |
1285 addl %ebx, last(%esp) /* last = &strm->next_in[ avail_in - 11 ] */ | |
1286 | |
1287 .L_buf_not_used: | |
1288 movl in_r, next_in_strm(strm_r) | |
1289 | |
1290 movl $1, %ebx | |
1291 shll %cl, %ebx | |
1292 decl %ebx | |
1293 | |
1294 #if defined( USE_MMX ) || defined( RUN_TIME_MMX ) | |
1295 | |
1296 #if defined( RUN_TIME_MMX ) | |
1297 | |
1298 cmpl $DO_USE_MMX, inflate_fast_use_mmx | |
1299 jne .L_update_hold | |
1300 | |
1301 #endif /* RUN_TIME_MMX */ | |
1302 | |
1303 psrlq used_mm, hold_mm /* hold_mm >>= last bit length */ | |
1304 movd hold_mm, %ebp | |
1305 | |
1306 emms | |
1307 | |
1308 .L_update_hold: | |
1309 | |
1310 #endif /* USE_MMX || RUN_TIME_MMX */ | |
1311 | |
1312 andl %ebx, %ebp | |
1313 movl %ebp, hold_state(state_r) | |
1314 | |
1315 #define last_r %ebx | |
1316 | |
1317 /* strm->avail_in = in < last ? 11 + (last - in) : 11 - (in - last) */ | |
1318 movl last(%esp), last_r | |
1319 cmpl in_r, last_r | |
1320 jbe .L_last_is_smaller /* if (in >= last) */ | |
1321 | |
1322 subl in_r, last_r /* last -= in */ | |
1323 addl $11, last_r /* last += 11 */ | |
1324 movl last_r, avail_in_strm(strm_r) | |
1325 jmp .L_fixup_out | |
1326 .L_last_is_smaller: | |
1327 subl last_r, in_r /* in -= last */ | |
1328 negl in_r /* in = -in */ | |
1329 addl $11, in_r /* in += 11 */ | |
1330 movl in_r, avail_in_strm(strm_r) | |
1331 | |
1332 #undef last_r | |
1333 #define end_r %ebx | |
1334 | |
1335 .L_fixup_out: | |
1336 /* strm->avail_out = out < end ? 257 + (end - out) : 257 - (out - end)*/ | |
1337 movl end(%esp), end_r | |
1338 cmpl out_r, end_r | |
1339 jbe .L_end_is_smaller /* if (out >= end) */ | |
1340 | |
1341 subl out_r, end_r /* end -= out */ | |
1342 addl $257, end_r /* end += 257 */ | |
1343 movl end_r, avail_out_strm(strm_r) | |
1344 jmp .L_done | |
1345 .L_end_is_smaller: | |
1346 subl end_r, out_r /* out -= end */ | |
1347 negl out_r /* out = -out */ | |
1348 addl $257, out_r /* out += 257 */ | |
1349 movl out_r, avail_out_strm(strm_r) | |
1350 | |
1351 #undef end_r | |
1352 #undef strm_r | |
1353 #undef state_r | |
1354 | |
1355 .L_done: | |
1356 addl $local_var_size, %esp | |
1357 popf | |
1358 popl %ebx | |
1359 popl %ebp | |
1360 popl %esi | |
1361 popl %edi | |
1362 ret | |
1363 | |
1364 #if defined( GAS_ELF ) | |
1365 /* elf info */ | |
1366 .type inflate_fast,@function | |
1367 .size inflate_fast,.-inflate_fast | |
1368 #endif |