Mercurial > hg > CbC > CbC_gcc
comparison zlib/contrib/masmx86/inffas32.asm @ 51:ae3a4bfb450b
add some files of version 4.4.3 that have been forgotten.
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 07 Feb 2010 18:27:48 +0900 |
parents | |
children | 04ced10e8804 |
comparison
equal
deleted
inserted
replaced
47:3bfb6c00c1e0 | 51:ae3a4bfb450b |
---|---|
1 ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding | |
2 ; * | |
3 ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code | |
4 ; * | |
5 ; * Copyright (C) 1995-2003 Mark Adler | |
6 ; * For conditions of distribution and use, see copyright notice in zlib.h | |
7 ; * | |
8 ; * Copyright (C) 2003 Chris Anderson <christop@charm.net> | |
9 ; * Please use the copyright conditions above. | |
10 ; * | |
11 ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from | |
12 ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at | |
13 ; * the moment. I have successfully compiled and tested this code with gcc2.96, | |
14 ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S | |
15 ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX | |
16 ; * enabled. I will attempt to merge the MMX code into this version. Newer | |
17 ; * versions of this and inffast.S can be found at | |
18 ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/ | |
19 ; * | |
20 ; * 2005 : modification by Gilles Vollant | |
21 ; */ | |
22 ; For Visual C++ 4.x and higher and ML 6.x and higher | |
23 ; ml.exe is in directory \MASM611C of Win95 DDK | |
24 ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm | |
25 ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/ | |
26 ; | |
27 ; | |
28 ; compile with command line option | |
29 ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm | |
30 | |
31 ; if you define NO_GZIP (see inflate.h), compile with | |
32 ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm | |
33 | |
34 | |
35 ; zlib122sup is 0 fort zlib 1.2.2.1 and lower | |
36 ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head | |
37 ; in inflate_state in inflate.h) | |
38 zlib1222sup equ 8 | |
39 | |
40 | |
41 IFDEF GUNZIP | |
42 INFLATE_MODE_TYPE equ 11 | |
43 INFLATE_MODE_BAD equ 26 | |
44 ELSE | |
45 IFNDEF NO_GUNZIP | |
46 INFLATE_MODE_TYPE equ 11 | |
47 INFLATE_MODE_BAD equ 26 | |
48 ELSE | |
49 INFLATE_MODE_TYPE equ 3 | |
50 INFLATE_MODE_BAD equ 17 | |
51 ENDIF | |
52 ENDIF | |
53 | |
54 | |
55 ; 75 "inffast.S" | |
56 ;FILE "inffast.S" | |
57 | |
58 ;;;GLOBAL _inflate_fast | |
59 | |
60 ;;;SECTION .text | |
61 | |
62 | |
63 | |
64 .586p | |
65 .mmx | |
66 | |
67 name inflate_fast_x86 | |
68 .MODEL FLAT | |
69 | |
70 _DATA segment | |
71 inflate_fast_use_mmx: | |
72 dd 1 | |
73 | |
74 | |
75 _TEXT segment | |
76 PUBLIC _inflate_fast | |
77 | |
78 ALIGN 4 | |
79 _inflate_fast: | |
80 jmp inflate_fast_entry | |
81 | |
82 | |
83 | |
84 ALIGN 4 | |
85 db 'Fast decoding Code from Chris Anderson' | |
86 db 0 | |
87 | |
88 ALIGN 4 | |
89 invalid_literal_length_code_msg: | |
90 db 'invalid literal/length code' | |
91 db 0 | |
92 | |
93 ALIGN 4 | |
94 invalid_distance_code_msg: | |
95 db 'invalid distance code' | |
96 db 0 | |
97 | |
98 ALIGN 4 | |
99 invalid_distance_too_far_msg: | |
100 db 'invalid distance too far back' | |
101 db 0 | |
102 | |
103 | |
104 ALIGN 4 | |
105 inflate_fast_mask: | |
106 dd 0 | |
107 dd 1 | |
108 dd 3 | |
109 dd 7 | |
110 dd 15 | |
111 dd 31 | |
112 dd 63 | |
113 dd 127 | |
114 dd 255 | |
115 dd 511 | |
116 dd 1023 | |
117 dd 2047 | |
118 dd 4095 | |
119 dd 8191 | |
120 dd 16383 | |
121 dd 32767 | |
122 dd 65535 | |
123 dd 131071 | |
124 dd 262143 | |
125 dd 524287 | |
126 dd 1048575 | |
127 dd 2097151 | |
128 dd 4194303 | |
129 dd 8388607 | |
130 dd 16777215 | |
131 dd 33554431 | |
132 dd 67108863 | |
133 dd 134217727 | |
134 dd 268435455 | |
135 dd 536870911 | |
136 dd 1073741823 | |
137 dd 2147483647 | |
138 dd 4294967295 | |
139 | |
140 | |
141 mode_state equ 0 ;/* state->mode */ | |
142 wsize_state equ (32+zlib1222sup) ;/* state->wsize */ | |
143 write_state equ (36+4+zlib1222sup) ;/* state->write */ | |
144 window_state equ (40+4+zlib1222sup) ;/* state->window */ | |
145 hold_state equ (44+4+zlib1222sup) ;/* state->hold */ | |
146 bits_state equ (48+4+zlib1222sup) ;/* state->bits */ | |
147 lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */ | |
148 distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */ | |
149 lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */ | |
150 distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */ | |
151 | |
152 | |
153 ;;SECTION .text | |
154 ; 205 "inffast.S" | |
155 ;GLOBAL inflate_fast_use_mmx | |
156 | |
157 ;SECTION .data | |
158 | |
159 | |
160 ; GLOBAL inflate_fast_use_mmx:object | |
161 ;.size inflate_fast_use_mmx, 4 | |
162 ; 226 "inffast.S" | |
163 ;SECTION .text | |
164 | |
165 ALIGN 4 | |
166 inflate_fast_entry: | |
167 push edi | |
168 push esi | |
169 push ebp | |
170 push ebx | |
171 pushfd | |
172 sub esp,64 | |
173 cld | |
174 | |
175 | |
176 | |
177 | |
178 mov esi, [esp+88] | |
179 mov edi, [esi+28] | |
180 | |
181 | |
182 | |
183 | |
184 | |
185 | |
186 | |
187 mov edx, [esi+4] | |
188 mov eax, [esi+0] | |
189 | |
190 add edx,eax | |
191 sub edx,11 | |
192 | |
193 mov [esp+44],eax | |
194 mov [esp+20],edx | |
195 | |
196 mov ebp, [esp+92] | |
197 mov ecx, [esi+16] | |
198 mov ebx, [esi+12] | |
199 | |
200 sub ebp,ecx | |
201 neg ebp | |
202 add ebp,ebx | |
203 | |
204 sub ecx,257 | |
205 add ecx,ebx | |
206 | |
207 mov [esp+60],ebx | |
208 mov [esp+40],ebp | |
209 mov [esp+16],ecx | |
210 ; 285 "inffast.S" | |
211 mov eax, [edi+lencode_state] | |
212 mov ecx, [edi+distcode_state] | |
213 | |
214 mov [esp+8],eax | |
215 mov [esp+12],ecx | |
216 | |
217 mov eax,1 | |
218 mov ecx, [edi+lenbits_state] | |
219 shl eax,cl | |
220 dec eax | |
221 mov [esp+0],eax | |
222 | |
223 mov eax,1 | |
224 mov ecx, [edi+distbits_state] | |
225 shl eax,cl | |
226 dec eax | |
227 mov [esp+4],eax | |
228 | |
229 mov eax, [edi+wsize_state] | |
230 mov ecx, [edi+write_state] | |
231 mov edx, [edi+window_state] | |
232 | |
233 mov [esp+52],eax | |
234 mov [esp+48],ecx | |
235 mov [esp+56],edx | |
236 | |
237 mov ebp, [edi+hold_state] | |
238 mov ebx, [edi+bits_state] | |
239 ; 321 "inffast.S" | |
240 mov esi, [esp+44] | |
241 mov ecx, [esp+20] | |
242 cmp ecx,esi | |
243 ja L_align_long | |
244 | |
245 add ecx,11 | |
246 sub ecx,esi | |
247 mov eax,12 | |
248 sub eax,ecx | |
249 lea edi, [esp+28] | |
250 rep movsb | |
251 mov ecx,eax | |
252 xor eax,eax | |
253 rep stosb | |
254 lea esi, [esp+28] | |
255 mov [esp+20],esi | |
256 jmp L_is_aligned | |
257 | |
258 | |
259 L_align_long: | |
260 test esi,3 | |
261 jz L_is_aligned | |
262 xor eax,eax | |
263 mov al, [esi] | |
264 inc esi | |
265 mov ecx,ebx | |
266 add ebx,8 | |
267 shl eax,cl | |
268 or ebp,eax | |
269 jmp L_align_long | |
270 | |
271 L_is_aligned: | |
272 mov edi, [esp+60] | |
273 ; 366 "inffast.S" | |
274 L_check_mmx: | |
275 cmp dword ptr [inflate_fast_use_mmx],2 | |
276 je L_init_mmx | |
277 ja L_do_loop | |
278 | |
279 push eax | |
280 push ebx | |
281 push ecx | |
282 push edx | |
283 pushfd | |
284 mov eax, [esp] | |
285 xor dword ptr [esp],0200000h | |
286 | |
287 | |
288 | |
289 | |
290 popfd | |
291 pushfd | |
292 pop edx | |
293 xor edx,eax | |
294 jz L_dont_use_mmx | |
295 xor eax,eax | |
296 cpuid | |
297 cmp ebx,0756e6547h | |
298 jne L_dont_use_mmx | |
299 cmp ecx,06c65746eh | |
300 jne L_dont_use_mmx | |
301 cmp edx,049656e69h | |
302 jne L_dont_use_mmx | |
303 mov eax,1 | |
304 cpuid | |
305 shr eax,8 | |
306 and eax,15 | |
307 cmp eax,6 | |
308 jne L_dont_use_mmx | |
309 test edx,0800000h | |
310 jnz L_use_mmx | |
311 jmp L_dont_use_mmx | |
312 L_use_mmx: | |
313 mov dword ptr [inflate_fast_use_mmx],2 | |
314 jmp L_check_mmx_pop | |
315 L_dont_use_mmx: | |
316 mov dword ptr [inflate_fast_use_mmx],3 | |
317 L_check_mmx_pop: | |
318 pop edx | |
319 pop ecx | |
320 pop ebx | |
321 pop eax | |
322 jmp L_check_mmx | |
323 ; 426 "inffast.S" | |
324 ALIGN 4 | |
325 L_do_loop: | |
326 ; 437 "inffast.S" | |
327 cmp bl,15 | |
328 ja L_get_length_code | |
329 | |
330 xor eax,eax | |
331 lodsw | |
332 mov cl,bl | |
333 add bl,16 | |
334 shl eax,cl | |
335 or ebp,eax | |
336 | |
337 L_get_length_code: | |
338 mov edx, [esp+0] | |
339 mov ecx, [esp+8] | |
340 and edx,ebp | |
341 mov eax, [ecx+edx*4] | |
342 | |
343 L_dolen: | |
344 | |
345 | |
346 | |
347 | |
348 | |
349 | |
350 mov cl,ah | |
351 sub bl,ah | |
352 shr ebp,cl | |
353 | |
354 | |
355 | |
356 | |
357 | |
358 | |
359 test al,al | |
360 jnz L_test_for_length_base | |
361 | |
362 shr eax,16 | |
363 stosb | |
364 | |
365 L_while_test: | |
366 | |
367 | |
368 cmp [esp+16],edi | |
369 jbe L_break_loop | |
370 | |
371 cmp [esp+20],esi | |
372 ja L_do_loop | |
373 jmp L_break_loop | |
374 | |
375 L_test_for_length_base: | |
376 ; 502 "inffast.S" | |
377 mov edx,eax | |
378 shr edx,16 | |
379 mov cl,al | |
380 | |
381 test al,16 | |
382 jz L_test_for_second_level_length | |
383 and cl,15 | |
384 jz L_save_len | |
385 cmp bl,cl | |
386 jae L_add_bits_to_len | |
387 | |
388 mov ch,cl | |
389 xor eax,eax | |
390 lodsw | |
391 mov cl,bl | |
392 add bl,16 | |
393 shl eax,cl | |
394 or ebp,eax | |
395 mov cl,ch | |
396 | |
397 L_add_bits_to_len: | |
398 mov eax,1 | |
399 shl eax,cl | |
400 dec eax | |
401 sub bl,cl | |
402 and eax,ebp | |
403 shr ebp,cl | |
404 add edx,eax | |
405 | |
406 L_save_len: | |
407 mov [esp+24],edx | |
408 | |
409 | |
410 L_decode_distance: | |
411 ; 549 "inffast.S" | |
412 cmp bl,15 | |
413 ja L_get_distance_code | |
414 | |
415 xor eax,eax | |
416 lodsw | |
417 mov cl,bl | |
418 add bl,16 | |
419 shl eax,cl | |
420 or ebp,eax | |
421 | |
422 L_get_distance_code: | |
423 mov edx, [esp+4] | |
424 mov ecx, [esp+12] | |
425 and edx,ebp | |
426 mov eax, [ecx+edx*4] | |
427 | |
428 | |
429 L_dodist: | |
430 mov edx,eax | |
431 shr edx,16 | |
432 mov cl,ah | |
433 sub bl,ah | |
434 shr ebp,cl | |
435 ; 584 "inffast.S" | |
436 mov cl,al | |
437 | |
438 test al,16 | |
439 jz L_test_for_second_level_dist | |
440 and cl,15 | |
441 jz L_check_dist_one | |
442 cmp bl,cl | |
443 jae L_add_bits_to_dist | |
444 | |
445 mov ch,cl | |
446 xor eax,eax | |
447 lodsw | |
448 mov cl,bl | |
449 add bl,16 | |
450 shl eax,cl | |
451 or ebp,eax | |
452 mov cl,ch | |
453 | |
454 L_add_bits_to_dist: | |
455 mov eax,1 | |
456 shl eax,cl | |
457 dec eax | |
458 sub bl,cl | |
459 and eax,ebp | |
460 shr ebp,cl | |
461 add edx,eax | |
462 jmp L_check_window | |
463 | |
464 L_check_window: | |
465 ; 625 "inffast.S" | |
466 mov [esp+44],esi | |
467 mov eax,edi | |
468 sub eax, [esp+40] | |
469 | |
470 cmp eax,edx | |
471 jb L_clip_window | |
472 | |
473 mov ecx, [esp+24] | |
474 mov esi,edi | |
475 sub esi,edx | |
476 | |
477 sub ecx,3 | |
478 mov al, [esi] | |
479 mov [edi],al | |
480 mov al, [esi+1] | |
481 mov dl, [esi+2] | |
482 add esi,3 | |
483 mov [edi+1],al | |
484 mov [edi+2],dl | |
485 add edi,3 | |
486 rep movsb | |
487 | |
488 mov esi, [esp+44] | |
489 jmp L_while_test | |
490 | |
491 ALIGN 4 | |
492 L_check_dist_one: | |
493 cmp edx,1 | |
494 jne L_check_window | |
495 cmp [esp+40],edi | |
496 je L_check_window | |
497 | |
498 dec edi | |
499 mov ecx, [esp+24] | |
500 mov al, [edi] | |
501 sub ecx,3 | |
502 | |
503 mov [edi+1],al | |
504 mov [edi+2],al | |
505 mov [edi+3],al | |
506 add edi,4 | |
507 rep stosb | |
508 | |
509 jmp L_while_test | |
510 | |
511 ALIGN 4 | |
512 L_test_for_second_level_length: | |
513 | |
514 | |
515 | |
516 | |
517 test al,64 | |
518 jnz L_test_for_end_of_block | |
519 | |
520 mov eax,1 | |
521 shl eax,cl | |
522 dec eax | |
523 and eax,ebp | |
524 add eax,edx | |
525 mov edx, [esp+8] | |
526 mov eax, [edx+eax*4] | |
527 jmp L_dolen | |
528 | |
529 ALIGN 4 | |
530 L_test_for_second_level_dist: | |
531 | |
532 | |
533 | |
534 | |
535 test al,64 | |
536 jnz L_invalid_distance_code | |
537 | |
538 mov eax,1 | |
539 shl eax,cl | |
540 dec eax | |
541 and eax,ebp | |
542 add eax,edx | |
543 mov edx, [esp+12] | |
544 mov eax, [edx+eax*4] | |
545 jmp L_dodist | |
546 | |
547 ALIGN 4 | |
548 L_clip_window: | |
549 ; 721 "inffast.S" | |
550 mov ecx,eax | |
551 mov eax, [esp+52] | |
552 neg ecx | |
553 mov esi, [esp+56] | |
554 | |
555 cmp eax,edx | |
556 jb L_invalid_distance_too_far | |
557 | |
558 add ecx,edx | |
559 cmp dword ptr [esp+48],0 | |
560 jne L_wrap_around_window | |
561 | |
562 sub eax,ecx | |
563 add esi,eax | |
564 ; 749 "inffast.S" | |
565 mov eax, [esp+24] | |
566 cmp eax,ecx | |
567 jbe L_do_copy1 | |
568 | |
569 sub eax,ecx | |
570 rep movsb | |
571 mov esi,edi | |
572 sub esi,edx | |
573 jmp L_do_copy1 | |
574 | |
575 cmp eax,ecx | |
576 jbe L_do_copy1 | |
577 | |
578 sub eax,ecx | |
579 rep movsb | |
580 mov esi,edi | |
581 sub esi,edx | |
582 jmp L_do_copy1 | |
583 | |
584 L_wrap_around_window: | |
585 ; 793 "inffast.S" | |
586 mov eax, [esp+48] | |
587 cmp ecx,eax | |
588 jbe L_contiguous_in_window | |
589 | |
590 add esi, [esp+52] | |
591 add esi,eax | |
592 sub esi,ecx | |
593 sub ecx,eax | |
594 | |
595 | |
596 mov eax, [esp+24] | |
597 cmp eax,ecx | |
598 jbe L_do_copy1 | |
599 | |
600 sub eax,ecx | |
601 rep movsb | |
602 mov esi, [esp+56] | |
603 mov ecx, [esp+48] | |
604 cmp eax,ecx | |
605 jbe L_do_copy1 | |
606 | |
607 sub eax,ecx | |
608 rep movsb | |
609 mov esi,edi | |
610 sub esi,edx | |
611 jmp L_do_copy1 | |
612 | |
613 L_contiguous_in_window: | |
614 ; 836 "inffast.S" | |
615 add esi,eax | |
616 sub esi,ecx | |
617 | |
618 | |
619 mov eax, [esp+24] | |
620 cmp eax,ecx | |
621 jbe L_do_copy1 | |
622 | |
623 sub eax,ecx | |
624 rep movsb | |
625 mov esi,edi | |
626 sub esi,edx | |
627 | |
628 L_do_copy1: | |
629 ; 862 "inffast.S" | |
630 mov ecx,eax | |
631 rep movsb | |
632 | |
633 mov esi, [esp+44] | |
634 jmp L_while_test | |
635 ; 878 "inffast.S" | |
636 ALIGN 4 | |
637 L_init_mmx: | |
638 emms | |
639 | |
640 | |
641 | |
642 | |
643 | |
644 movd mm0,ebp | |
645 mov ebp,ebx | |
646 ; 896 "inffast.S" | |
647 movd mm4,[esp+0] | |
648 movq mm3,mm4 | |
649 movd mm5,[esp+4] | |
650 movq mm2,mm5 | |
651 pxor mm1,mm1 | |
652 mov ebx, [esp+8] | |
653 jmp L_do_loop_mmx | |
654 | |
655 ALIGN 4 | |
656 L_do_loop_mmx: | |
657 psrlq mm0,mm1 | |
658 | |
659 cmp ebp,32 | |
660 ja L_get_length_code_mmx | |
661 | |
662 movd mm6,ebp | |
663 movd mm7,[esi] | |
664 add esi,4 | |
665 psllq mm7,mm6 | |
666 add ebp,32 | |
667 por mm0,mm7 | |
668 | |
669 L_get_length_code_mmx: | |
670 pand mm4,mm0 | |
671 movd eax,mm4 | |
672 movq mm4,mm3 | |
673 mov eax, [ebx+eax*4] | |
674 | |
675 L_dolen_mmx: | |
676 movzx ecx,ah | |
677 movd mm1,ecx | |
678 sub ebp,ecx | |
679 | |
680 test al,al | |
681 jnz L_test_for_length_base_mmx | |
682 | |
683 shr eax,16 | |
684 stosb | |
685 | |
686 L_while_test_mmx: | |
687 | |
688 | |
689 cmp [esp+16],edi | |
690 jbe L_break_loop | |
691 | |
692 cmp [esp+20],esi | |
693 ja L_do_loop_mmx | |
694 jmp L_break_loop | |
695 | |
696 L_test_for_length_base_mmx: | |
697 | |
698 mov edx,eax | |
699 shr edx,16 | |
700 | |
701 test al,16 | |
702 jz L_test_for_second_level_length_mmx | |
703 and eax,15 | |
704 jz L_decode_distance_mmx | |
705 | |
706 psrlq mm0,mm1 | |
707 movd mm1,eax | |
708 movd ecx,mm0 | |
709 sub ebp,eax | |
710 and ecx, [inflate_fast_mask+eax*4] | |
711 add edx,ecx | |
712 | |
713 L_decode_distance_mmx: | |
714 psrlq mm0,mm1 | |
715 | |
716 cmp ebp,32 | |
717 ja L_get_dist_code_mmx | |
718 | |
719 movd mm6,ebp | |
720 movd mm7,[esi] | |
721 add esi,4 | |
722 psllq mm7,mm6 | |
723 add ebp,32 | |
724 por mm0,mm7 | |
725 | |
726 L_get_dist_code_mmx: | |
727 mov ebx, [esp+12] | |
728 pand mm5,mm0 | |
729 movd eax,mm5 | |
730 movq mm5,mm2 | |
731 mov eax, [ebx+eax*4] | |
732 | |
733 L_dodist_mmx: | |
734 | |
735 movzx ecx,ah | |
736 mov ebx,eax | |
737 shr ebx,16 | |
738 sub ebp,ecx | |
739 movd mm1,ecx | |
740 | |
741 test al,16 | |
742 jz L_test_for_second_level_dist_mmx | |
743 and eax,15 | |
744 jz L_check_dist_one_mmx | |
745 | |
746 L_add_bits_to_dist_mmx: | |
747 psrlq mm0,mm1 | |
748 movd mm1,eax | |
749 movd ecx,mm0 | |
750 sub ebp,eax | |
751 and ecx, [inflate_fast_mask+eax*4] | |
752 add ebx,ecx | |
753 | |
754 L_check_window_mmx: | |
755 mov [esp+44],esi | |
756 mov eax,edi | |
757 sub eax, [esp+40] | |
758 | |
759 cmp eax,ebx | |
760 jb L_clip_window_mmx | |
761 | |
762 mov ecx,edx | |
763 mov esi,edi | |
764 sub esi,ebx | |
765 | |
766 sub ecx,3 | |
767 mov al, [esi] | |
768 mov [edi],al | |
769 mov al, [esi+1] | |
770 mov dl, [esi+2] | |
771 add esi,3 | |
772 mov [edi+1],al | |
773 mov [edi+2],dl | |
774 add edi,3 | |
775 rep movsb | |
776 | |
777 mov esi, [esp+44] | |
778 mov ebx, [esp+8] | |
779 jmp L_while_test_mmx | |
780 | |
781 ALIGN 4 | |
782 L_check_dist_one_mmx: | |
783 cmp ebx,1 | |
784 jne L_check_window_mmx | |
785 cmp [esp+40],edi | |
786 je L_check_window_mmx | |
787 | |
788 dec edi | |
789 mov ecx,edx | |
790 mov al, [edi] | |
791 sub ecx,3 | |
792 | |
793 mov [edi+1],al | |
794 mov [edi+2],al | |
795 mov [edi+3],al | |
796 add edi,4 | |
797 rep stosb | |
798 | |
799 mov ebx, [esp+8] | |
800 jmp L_while_test_mmx | |
801 | |
802 ALIGN 4 | |
803 L_test_for_second_level_length_mmx: | |
804 test al,64 | |
805 jnz L_test_for_end_of_block | |
806 | |
807 and eax,15 | |
808 psrlq mm0,mm1 | |
809 movd ecx,mm0 | |
810 and ecx, [inflate_fast_mask+eax*4] | |
811 add ecx,edx | |
812 mov eax, [ebx+ecx*4] | |
813 jmp L_dolen_mmx | |
814 | |
815 ALIGN 4 | |
816 L_test_for_second_level_dist_mmx: | |
817 test al,64 | |
818 jnz L_invalid_distance_code | |
819 | |
820 and eax,15 | |
821 psrlq mm0,mm1 | |
822 movd ecx,mm0 | |
823 and ecx, [inflate_fast_mask+eax*4] | |
824 mov eax, [esp+12] | |
825 add ecx,ebx | |
826 mov eax, [eax+ecx*4] | |
827 jmp L_dodist_mmx | |
828 | |
829 ALIGN 4 | |
830 L_clip_window_mmx: | |
831 | |
832 mov ecx,eax | |
833 mov eax, [esp+52] | |
834 neg ecx | |
835 mov esi, [esp+56] | |
836 | |
837 cmp eax,ebx | |
838 jb L_invalid_distance_too_far | |
839 | |
840 add ecx,ebx | |
841 cmp dword ptr [esp+48],0 | |
842 jne L_wrap_around_window_mmx | |
843 | |
844 sub eax,ecx | |
845 add esi,eax | |
846 | |
847 cmp edx,ecx | |
848 jbe L_do_copy1_mmx | |
849 | |
850 sub edx,ecx | |
851 rep movsb | |
852 mov esi,edi | |
853 sub esi,ebx | |
854 jmp L_do_copy1_mmx | |
855 | |
856 cmp edx,ecx | |
857 jbe L_do_copy1_mmx | |
858 | |
859 sub edx,ecx | |
860 rep movsb | |
861 mov esi,edi | |
862 sub esi,ebx | |
863 jmp L_do_copy1_mmx | |
864 | |
865 L_wrap_around_window_mmx: | |
866 | |
867 mov eax, [esp+48] | |
868 cmp ecx,eax | |
869 jbe L_contiguous_in_window_mmx | |
870 | |
871 add esi, [esp+52] | |
872 add esi,eax | |
873 sub esi,ecx | |
874 sub ecx,eax | |
875 | |
876 | |
877 cmp edx,ecx | |
878 jbe L_do_copy1_mmx | |
879 | |
880 sub edx,ecx | |
881 rep movsb | |
882 mov esi, [esp+56] | |
883 mov ecx, [esp+48] | |
884 cmp edx,ecx | |
885 jbe L_do_copy1_mmx | |
886 | |
887 sub edx,ecx | |
888 rep movsb | |
889 mov esi,edi | |
890 sub esi,ebx | |
891 jmp L_do_copy1_mmx | |
892 | |
893 L_contiguous_in_window_mmx: | |
894 | |
895 add esi,eax | |
896 sub esi,ecx | |
897 | |
898 | |
899 cmp edx,ecx | |
900 jbe L_do_copy1_mmx | |
901 | |
902 sub edx,ecx | |
903 rep movsb | |
904 mov esi,edi | |
905 sub esi,ebx | |
906 | |
907 L_do_copy1_mmx: | |
908 | |
909 | |
910 mov ecx,edx | |
911 rep movsb | |
912 | |
913 mov esi, [esp+44] | |
914 mov ebx, [esp+8] | |
915 jmp L_while_test_mmx | |
916 ; 1174 "inffast.S" | |
917 L_invalid_distance_code: | |
918 | |
919 | |
920 | |
921 | |
922 | |
923 mov ecx, invalid_distance_code_msg | |
924 mov edx,INFLATE_MODE_BAD | |
925 jmp L_update_stream_state | |
926 | |
927 L_test_for_end_of_block: | |
928 | |
929 | |
930 | |
931 | |
932 | |
933 test al,32 | |
934 jz L_invalid_literal_length_code | |
935 | |
936 mov ecx,0 | |
937 mov edx,INFLATE_MODE_TYPE | |
938 jmp L_update_stream_state | |
939 | |
940 L_invalid_literal_length_code: | |
941 | |
942 | |
943 | |
944 | |
945 | |
946 mov ecx, invalid_literal_length_code_msg | |
947 mov edx,INFLATE_MODE_BAD | |
948 jmp L_update_stream_state | |
949 | |
950 L_invalid_distance_too_far: | |
951 | |
952 | |
953 | |
954 mov esi, [esp+44] | |
955 mov ecx, invalid_distance_too_far_msg | |
956 mov edx,INFLATE_MODE_BAD | |
957 jmp L_update_stream_state | |
958 | |
959 L_update_stream_state: | |
960 | |
961 mov eax, [esp+88] | |
962 test ecx,ecx | |
963 jz L_skip_msg | |
964 mov [eax+24],ecx | |
965 L_skip_msg: | |
966 mov eax, [eax+28] | |
967 mov [eax+mode_state],edx | |
968 jmp L_break_loop | |
969 | |
970 ALIGN 4 | |
971 L_break_loop: | |
972 ; 1243 "inffast.S" | |
973 cmp dword ptr [inflate_fast_use_mmx],2 | |
974 jne L_update_next_in | |
975 | |
976 | |
977 | |
978 mov ebx,ebp | |
979 | |
980 L_update_next_in: | |
981 ; 1266 "inffast.S" | |
982 mov eax, [esp+88] | |
983 mov ecx,ebx | |
984 mov edx, [eax+28] | |
985 shr ecx,3 | |
986 sub esi,ecx | |
987 shl ecx,3 | |
988 sub ebx,ecx | |
989 mov [eax+12],edi | |
990 mov [edx+bits_state],ebx | |
991 mov ecx,ebx | |
992 | |
993 lea ebx, [esp+28] | |
994 cmp [esp+20],ebx | |
995 jne L_buf_not_used | |
996 | |
997 sub esi,ebx | |
998 mov ebx, [eax+0] | |
999 mov [esp+20],ebx | |
1000 add esi,ebx | |
1001 mov ebx, [eax+4] | |
1002 sub ebx,11 | |
1003 add [esp+20],ebx | |
1004 | |
1005 L_buf_not_used: | |
1006 mov [eax+0],esi | |
1007 | |
1008 mov ebx,1 | |
1009 shl ebx,cl | |
1010 dec ebx | |
1011 | |
1012 | |
1013 | |
1014 | |
1015 | |
1016 cmp dword ptr [inflate_fast_use_mmx],2 | |
1017 jne L_update_hold | |
1018 | |
1019 | |
1020 | |
1021 psrlq mm0,mm1 | |
1022 movd ebp,mm0 | |
1023 | |
1024 emms | |
1025 | |
1026 L_update_hold: | |
1027 | |
1028 | |
1029 | |
1030 and ebp,ebx | |
1031 mov [edx+hold_state],ebp | |
1032 | |
1033 | |
1034 | |
1035 | |
1036 mov ebx, [esp+20] | |
1037 cmp ebx,esi | |
1038 jbe L_last_is_smaller | |
1039 | |
1040 sub ebx,esi | |
1041 add ebx,11 | |
1042 mov [eax+4],ebx | |
1043 jmp L_fixup_out | |
1044 L_last_is_smaller: | |
1045 sub esi,ebx | |
1046 neg esi | |
1047 add esi,11 | |
1048 mov [eax+4],esi | |
1049 | |
1050 | |
1051 | |
1052 | |
1053 L_fixup_out: | |
1054 | |
1055 mov ebx, [esp+16] | |
1056 cmp ebx,edi | |
1057 jbe L_end_is_smaller | |
1058 | |
1059 sub ebx,edi | |
1060 add ebx,257 | |
1061 mov [eax+16],ebx | |
1062 jmp L_done | |
1063 L_end_is_smaller: | |
1064 sub edi,ebx | |
1065 neg edi | |
1066 add edi,257 | |
1067 mov [eax+16],edi | |
1068 | |
1069 | |
1070 | |
1071 | |
1072 | |
1073 L_done: | |
1074 add esp,64 | |
1075 popfd | |
1076 pop ebx | |
1077 pop ebp | |
1078 pop esi | |
1079 pop edi | |
1080 ret | |
1081 | |
1082 _TEXT ends | |
1083 end |