1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2; Copyright(c) 2011-2018 Intel Corporation All rights reserved. 3; 4; Redistribution and use in source and binary forms, with or without 5; modification, are permitted provided that the following conditions 6; are met: 7; * Redistributions of source code must retain the above copyright 8; notice, this list of conditions and the following disclaimer. 9; * Redistributions in binary form must reproduce the above copyright 10; notice, this list of conditions and the following disclaimer in 11; the documentation and/or other materials provided with the 12; distribution. 13; * Neither the name of Intel Corporation nor the names of its 14; contributors may be used to endorse or promote products derived 15; from this software without specific prior written permission. 16; 17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 29 30default rel 31 32%include "reg_sizes.asm" 33 34%define DECOMP_OK 0 35%define END_INPUT 1 36%define OUT_OVERFLOW 2 37%define INVALID_BLOCK -1 38%define INVALID_SYMBOL -2 39%define INVALID_LOOKBACK -3 40 41%define ISAL_DECODE_LONG_BITS 12 42%define ISAL_DECODE_SHORT_BITS 10 43 44%define COPY_SIZE 16 45%define COPY_LEN_MAX 258 46 47%define IN_BUFFER_SLOP 8 48%define OUT_BUFFER_SLOP COPY_SIZE + COPY_LEN_MAX 49 50%include "inflate_data_structs.asm" 51%include "stdmac.asm" 52 53extern rfc1951_lookup_table 54 55 56 57%define LARGE_SHORT_SYM_LEN 25 58%define LARGE_SHORT_SYM_MASK ((1 << LARGE_SHORT_SYM_LEN) - 1) 59%define LARGE_LONG_SYM_LEN 10 60%define LARGE_LONG_SYM_MASK ((1 << LARGE_LONG_SYM_LEN) - 1) 61%define LARGE_SHORT_CODE_LEN_OFFSET 28 62%define LARGE_LONG_CODE_LEN_OFFSET 10 63%define LARGE_FLAG_BIT_OFFSET 25 64%define LARGE_FLAG_BIT (1 << LARGE_FLAG_BIT_OFFSET) 65%define LARGE_SYM_COUNT_OFFSET 26 66%define LARGE_SYM_COUNT_LEN 2 67%define LARGE_SYM_COUNT_MASK ((1 << LARGE_SYM_COUNT_LEN) - 1) 68%define LARGE_SHORT_MAX_LEN_OFFSET 26 69 70%define SMALL_SHORT_SYM_LEN 9 71%define SMALL_SHORT_SYM_MASK ((1 << SMALL_SHORT_SYM_LEN) - 1) 72%define SMALL_LONG_SYM_LEN 9 73%define SMALL_LONG_SYM_MASK ((1 << SMALL_LONG_SYM_LEN) - 1) 74%define SMALL_SHORT_CODE_LEN_OFFSET 11 75%define SMALL_LONG_CODE_LEN_OFFSET 10 76%define SMALL_FLAG_BIT_OFFSET 10 77%define SMALL_FLAG_BIT (1 << SMALL_FLAG_BIT_OFFSET) 78 79%define DIST_SYM_OFFSET 0 80%define DIST_SYM_LEN 5 81%define DIST_SYM_MASK ((1 << DIST_SYM_LEN) - 1) 82%define DIST_SYM_EXTRA_OFFSET 5 83%define DIST_SYM_EXTRA_LEN 4 84%define DIST_SYM_EXTRA_MASK ((1 << DIST_SYM_EXTRA_LEN) - 1) 85 86;; rax 87%define tmp3 rax 88%define read_in_2 rax 89%define look_back_dist rax 90 91;; rcx 92;; rdx arg3 93%define next_sym2 rdx 94%define copy_start rdx 95%define tmp4 rdx 96 97;; rdi arg1 98%define tmp1 rdi 99%define look_back_dist2 rdi 100%define next_bits2 rdi 101%define next_sym3 rdi 102 103;; rsi arg2 104%define tmp2 rsi 105%define next_sym_num rsi 106%define next_bits rsi 107 108;; rbx ; Saved 109%define next_in rbx 110 111;; rbp ; Saved 112%define end_in rbp 113 114;; r8 115%define repeat_length r8 116 117;; r9 118%define read_in r9 119 120;; r10 121%define read_in_length r10 122 123;; r11 124%define state r11 125 126;; r12 ; Saved 127%define next_out r12 128 129;; r13 ; Saved 130%define end_out r13 131 132;; r14 ; Saved 133%define next_sym r14 134 135;; r15 ; Saved 136%define rfc_lookup r15 137 138start_out_mem_offset equ 0 139read_in_mem_offset equ 8 140read_in_length_mem_offset equ 16 141next_out_mem_offset equ 24 142gpr_save_mem_offset equ 32 143stack_size equ 4 * 8 + 8 * 8 144 145%define _dist_extra_bit_count 264 146%define _dist_start _dist_extra_bit_count + 1*32 147%define _len_extra_bit_count _dist_start + 4*32 148%define _len_start _len_extra_bit_count + 1*32 149 150%ifidn __OUTPUT_FORMAT__, elf64 151%define arg0 rdi 152%define arg1 rsi 153 154%macro FUNC_SAVE 0 155%ifdef ALIGN_STACK 156 push rbp 157 mov rbp, rsp 158 sub rsp, stack_size 159 and rsp, ~15 160%else 161 sub rsp, stack_size 162%endif 163 164 mov [rsp + gpr_save_mem_offset + 0*8], rbx 165 mov [rsp + gpr_save_mem_offset + 1*8], rbp 166 mov [rsp + gpr_save_mem_offset + 2*8], r12 167 mov [rsp + gpr_save_mem_offset + 3*8], r13 168 mov [rsp + gpr_save_mem_offset + 4*8], r14 169 mov [rsp + gpr_save_mem_offset + 5*8], r15 170%endm 171 172%macro FUNC_RESTORE 0 173 mov rbx, [rsp + gpr_save_mem_offset + 0*8] 174 mov rbp, [rsp + gpr_save_mem_offset + 1*8] 175 mov r12, [rsp + gpr_save_mem_offset + 2*8] 176 mov r13, [rsp + gpr_save_mem_offset + 3*8] 177 mov r14, [rsp + gpr_save_mem_offset + 4*8] 178 mov r15, [rsp + gpr_save_mem_offset + 5*8] 179 180%ifndef ALIGN_STACK 181 add rsp, stack_size 182%else 183 mov rsp, rbp 184 pop rbp 185%endif 186%endm 187%endif 188 189%ifidn __OUTPUT_FORMAT__, win64 190%define arg0 rcx 191%define arg1 rdx 192 193%macro FUNC_SAVE 0 194%ifdef ALIGN_STACK 195 push rbp 196 mov rbp, rsp 197 sub rsp, stack_size 198 and rsp, ~15 199%else 200 sub rsp, stack_size 201%endif 202 203 mov [rsp + gpr_save_mem_offset + 0*8], rbx 204 mov [rsp + gpr_save_mem_offset + 1*8], rsi 205 mov [rsp + gpr_save_mem_offset + 2*8], rdi 206 mov [rsp + gpr_save_mem_offset + 3*8], rbp 207 mov [rsp + gpr_save_mem_offset + 4*8], r12 208 mov [rsp + gpr_save_mem_offset + 5*8], r13 209 mov [rsp + gpr_save_mem_offset + 6*8], r14 210 mov [rsp + gpr_save_mem_offset + 7*8], r15 211%endm 212 213%macro FUNC_RESTORE 0 214 mov rbx, [rsp + gpr_save_mem_offset + 0*8] 215 mov rsi, [rsp + gpr_save_mem_offset + 1*8] 216 mov rdi, [rsp + gpr_save_mem_offset + 2*8] 217 mov rbp, [rsp + gpr_save_mem_offset + 3*8] 218 mov r12, [rsp + gpr_save_mem_offset + 4*8] 219 mov r13, [rsp + gpr_save_mem_offset + 5*8] 220 mov r14, [rsp + gpr_save_mem_offset + 6*8] 221 mov r15, [rsp + gpr_save_mem_offset + 7*8] 222 223%ifndef ALIGN_STACK 224 add rsp, stack_size 225%else 226 mov rsp, rbp 227 pop rbp 228%endif 229%endm 230%endif 231 232;; Load read_in and updated in_buffer accordingly 233;; when there are at least 8 bytes in the in buffer 234;; Clobbers rcx, unless rcx is %%read_in_length 235%macro inflate_in_load 6 236%define %%next_in %1 237%define %%end_in %2 238%define %%read_in %3 239%define %%read_in_length %4 240%define %%tmp1 %5 ; Tmp registers 241%define %%tmp2 %6 242 243 SHLX %%tmp1, [%%next_in], %%read_in_length 244 or %%read_in, %%tmp1 245 246 mov %%tmp1, 64 247 sub %%tmp1, %%read_in_length 248 shr %%tmp1, 3 249 250 add %%next_in, %%tmp1 251 lea %%read_in_length, [%%read_in_length + 8 * %%tmp1] 252%%end: 253%endm 254 255;; Load read_in and updated in_buffer accordingly 256;; Clobbers rcx, unless rcx is %%read_in_length 257%macro inflate_in_small_load 6 258%define %%next_in %1 259%define %%end_in %2 260%define %%read_in %3 261%define %%read_in_length %4 262%define %%avail_in %5 ; Tmp registers 263%define %%tmp1 %5 264%define %%loop_count %6 265 266 mov %%avail_in, %%end_in 267 sub %%avail_in, %%next_in 268 269%ifnidn %%read_in_length, rcx 270 mov rcx, %%read_in_length 271%endif 272 273 mov %%loop_count, 64 274 sub %%loop_count, %%read_in_length 275 shr %%loop_count, 3 276 277 cmp %%loop_count, %%avail_in 278 cmovg %%loop_count, %%avail_in 279 cmp %%loop_count, 0 280 je %%end 281 282%%load_byte: 283 xor %%tmp1, %%tmp1 284 mov %%tmp1 %+ b, byte [%%next_in] 285 SHLX %%tmp1, %%tmp1, rcx 286 or %%read_in, %%tmp1 287 add rcx, 8 288 add %%next_in, 1 289 sub %%loop_count, 1 290 jg %%load_byte 291%ifnidn %%read_in_length, rcx 292 mov %%read_in_length, rcx 293%endif 294%%end: 295%endm 296 297;; Clears all bits at index %%bit_count and above in %%next_bits 298;; May clobber rcx and %%bit_count 299%macro CLEAR_HIGH_BITS 3 300%define %%next_bits %1 301%define %%bit_count %2 302%define %%lookup_size %3 303 304 sub %%bit_count, 0x40 + %%lookup_size 305;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first DECODE_LOOKUP_SIZE bits. 306%ifdef USE_HSWNI 307 and %%bit_count, 0x1F 308 bzhi %%next_bits, %%next_bits, %%bit_count 309%else 310%ifnidn %%bit_count, rcx 311 mov rcx, %%bit_count 312%endif 313 neg rcx 314 shl %%next_bits, cl 315 shr %%next_bits, cl 316%endif 317 318%endm 319 320;; Decode next symbol 321;; Clobber rcx 322%macro decode_next_lit_len 8 323%define %%state %1 ; State structure associated with compressed stream 324%define %%lookup_size %2 ; Number of bits used for small lookup 325%define %%state_offset %3 ; Type of huff code, should be either LIT or DIST 326%define %%read_in %4 ; Bits read in from compressed stream 327%define %%read_in_length %5 ; Number of valid bits in read_in 328%define %%next_sym %6 ; Returned symbols 329%define %%next_sym_num %7 ; Returned symbols count 330%define %%next_bits %8 331 332 mov %%next_sym_num, %%next_sym 333 mov rcx, %%next_sym 334 shr rcx, LARGE_SHORT_CODE_LEN_OFFSET 335 jz invalid_symbol 336 337 and %%next_sym_num, LARGE_SYM_COUNT_MASK << LARGE_SYM_COUNT_OFFSET 338 shr %%next_sym_num, LARGE_SYM_COUNT_OFFSET 339 340 ;; Check if symbol or hint was looked up 341 and %%next_sym, LARGE_FLAG_BIT | LARGE_SHORT_SYM_MASK 342 test %%next_sym, LARGE_FLAG_BIT 343 jz %%end 344 345 shl rcx, LARGE_SYM_COUNT_LEN 346 or rcx, %%next_sym_num 347 348 ;; Save length associated with symbol 349 mov %%next_bits, %%read_in 350 shr %%next_bits, %%lookup_size 351 352 ;; Extract the bits beyond the first %%lookup_size bits. 353 CLEAR_HIGH_BITS %%next_bits, rcx, %%lookup_size 354 355 and %%next_sym, LARGE_SHORT_SYM_MASK 356 add %%next_sym, %%next_bits 357 358 ;; Lookup actual next symbol 359 movzx %%next_sym, word [%%state + LARGE_LONG_CODE_SIZE * %%next_sym + %%state_offset + LARGE_SHORT_CODE_SIZE * (1 << %%lookup_size)] 360 mov %%next_sym_num, 1 361 362 ;; Save length associated with symbol 363 mov rcx, %%next_sym 364 shr rcx, LARGE_LONG_CODE_LEN_OFFSET 365 jz invalid_symbol 366 and %%next_sym, LARGE_LONG_SYM_MASK 367 368%%end: 369;; Updated read_in to reflect the bits which were decoded 370 SHRX %%read_in, %%read_in, rcx 371 sub %%read_in_length, rcx 372%endm 373 374;; Decode next symbol 375;; Clobber rcx 376%macro decode_next_lit_len_with_load 8 377%define %%state %1 ; State structure associated with compressed stream 378%define %%lookup_size %2 ; Number of bits used for small lookup 379%define %%state_offset %3 380%define %%read_in %4 ; Bits read in from compressed stream 381%define %%read_in_length %5 ; Number of valid bits in read_in 382%define %%next_sym %6 ; Returned symbols 383%define %%next_sym_num %7 ; Returned symbols count 384%define %%next_bits %8 385 386 ;; Lookup possible next symbol 387 mov %%next_bits, %%read_in 388 and %%next_bits, (1 << %%lookup_size) - 1 389 mov %%next_sym %+ d, dword [%%state + %%state_offset + LARGE_SHORT_CODE_SIZE * %%next_bits] 390 391 decode_next_lit_len %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_sym_num, %%next_bits 392%endm 393 394;; Decode next symbol 395;; Clobber rcx 396%macro decode_next_dist 8 397%define %%state %1 ; State structure associated with compressed stream 398%define %%lookup_size %2 ; Number of bits used for small lookup 399%define %%state_offset %3 ; Type of huff code, should be either LIT or DIST 400%define %%read_in %4 ; Bits read in from compressed stream 401%define %%read_in_length %5 ; Number of valid bits in read_in 402%define %%next_sym %6 ; Returned symbol 403%define %%next_extra_bits %7 404%define %%next_bits %8 405 406 mov rcx, %%next_sym 407 shr rcx, SMALL_SHORT_CODE_LEN_OFFSET 408 jz invalid_dist_symbol_ %+ %%next_sym 409 410 ;; Check if symbol or hint was looked up 411 and %%next_sym, SMALL_FLAG_BIT | SMALL_SHORT_SYM_MASK 412 test %%next_sym, SMALL_FLAG_BIT 413 jz %%end 414 415 ;; Save length associated with symbol 416 mov %%next_bits, %%read_in 417 shr %%next_bits, %%lookup_size 418 419 ;; Extract the 15-DECODE_LOOKUP_SIZE bits beyond the first %%lookup_size bits. 420 lea %%next_sym, [%%state + SMALL_LONG_CODE_SIZE * %%next_sym] 421 422 CLEAR_HIGH_BITS %%next_bits, rcx, %%lookup_size 423 424 ;; Lookup actual next symbol 425 movzx %%next_sym, word [%%next_sym + %%state_offset + SMALL_LONG_CODE_SIZE * %%next_bits + SMALL_SHORT_CODE_SIZE * (1 << %%lookup_size) - SMALL_LONG_CODE_SIZE * SMALL_FLAG_BIT] 426 427 ;; Save length associated with symbol 428 mov rcx, %%next_sym 429 shr rcx, SMALL_LONG_CODE_LEN_OFFSET 430 jz invalid_dist_symbol_ %+ %%next_sym 431 and %%next_sym, SMALL_SHORT_SYM_MASK 432 433%%end: 434 ;; Updated read_in to reflect the bits which were decoded 435 SHRX %%read_in, %%read_in, rcx 436 sub %%read_in_length, rcx 437 mov rcx, %%next_sym 438 shr rcx, DIST_SYM_EXTRA_OFFSET 439 and %%next_sym, DIST_SYM_MASK 440%endm 441 442;; Decode next symbol 443;; Clobber rcx 444%macro decode_next_dist_with_load 8 445%define %%state %1 ; State structure associated with compressed stream 446%define %%lookup_size %2 ; Number of bits used for small lookup 447%define %%state_offset %3 448%define %%read_in %4 ; Bits read in from compressed stream 449%define %%read_in_length %5 ; Number of valid bits in read_in 450%define %%next_sym %6 ; Returned symbol 451%define %%next_extra_bits %7 452%define %%next_bits %8 453 454 ;; Lookup possible next symbol 455 mov %%next_bits, %%read_in 456 and %%next_bits, (1 << %%lookup_size) - 1 457 movzx %%next_sym, word [%%state + %%state_offset + SMALL_SHORT_CODE_SIZE * %%next_bits] 458 459 decode_next_dist %%state, %%lookup_size, %%state_offset, %%read_in, %%read_in_length, %%next_sym, %%next_extra_bits, %%next_bits 460%endm 461 462[bits 64] 463default rel 464section .text 465 466global decode_huffman_code_block_stateless_ %+ ARCH 467decode_huffman_code_block_stateless_ %+ ARCH %+ : 468 endbranch 469 470 FUNC_SAVE 471 472 mov state, arg0 473 mov [rsp + start_out_mem_offset], arg1 474 lea rfc_lookup, [rfc1951_lookup_table] 475 476 mov read_in,[state + _read_in] 477 mov read_in_length %+ d, dword [state + _read_in_length] 478 mov next_out, [state + _next_out] 479 mov end_out %+ d, dword [state + _avail_out] 480 add end_out, next_out 481 mov next_in, [state + _next_in] 482 mov end_in %+ d, dword [state + _avail_in] 483 add end_in, next_in 484 485 mov dword [state + _copy_overflow_len], 0 486 mov dword [state + _copy_overflow_dist], 0 487 488 sub end_out, OUT_BUFFER_SLOP 489 sub end_in, IN_BUFFER_SLOP 490 491 cmp next_in, end_in 492 jg end_loop_block_pre 493 494 cmp read_in_length, 64 495 je skip_load 496 497 inflate_in_load next_in, end_in, read_in, read_in_length, tmp1, tmp2 498 499skip_load: 500 mov tmp3, read_in 501 and tmp3, (1 << ISAL_DECODE_LONG_BITS) - 1 502 mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * tmp3] 503 504;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 505; Main Loop 506;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 507loop_block: 508 ;; Check if near end of in buffer or out buffer 509 cmp next_in, end_in 510 jg end_loop_block_pre 511 cmp next_out, end_out 512 jg end_loop_block_pre 513 514 ;; Decode next symbol and reload the read_in buffer 515 decode_next_lit_len state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, next_sym_num, tmp1 516 517 ;; Specutively write next_sym if it is a literal 518 mov [next_out], next_sym 519 add next_out, next_sym_num 520 lea next_sym2, [8 * next_sym_num - 8] 521 SHRX next_sym2, next_sym, next_sym2 522 523 ;; Find index to specutively preload next_sym from 524 mov tmp3, (1 << ISAL_DECODE_LONG_BITS) - 1 525 and tmp3, read_in 526 527 ;; Start reloading read_in 528 mov tmp1, [next_in] 529 SHLX tmp1, tmp1, read_in_length 530 or read_in, tmp1 531 532 ;; Specutively load data associated with length symbol 533 lea repeat_length, [next_sym2 - 254] 534 535 ;; Test for end of block symbol 536 cmp next_sym2, 256 537 je end_symbol_pre 538 539 ;; Specutively load next_sym for next loop if a literal was decoded 540 mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * tmp3] 541 542 ;; Finish updating read_in_length for read_in 543 mov tmp1, 64 544 sub tmp1, read_in_length 545 shr tmp1, 3 546 add next_in, tmp1 547 lea read_in_length, [read_in_length + 8 * tmp1] 548 549 ;; Specultively load next dist code 550 mov next_bits2, (1 << ISAL_DECODE_SHORT_BITS) - 1 551 and next_bits2, read_in 552 movzx next_sym3, word [state + _dist_huff_code + SMALL_SHORT_CODE_SIZE * next_bits2] 553 554 ;; Check if next_sym2 is a literal, length, or end of block symbol 555 cmp next_sym2, 256 556 jl loop_block 557 558decode_len_dist: 559 ;; Determine next_out after the copy is finished 560 lea next_out, [next_out + repeat_length - 1] 561 562 ;; Decode distance code 563 decode_next_dist state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym3, rcx, tmp2 564 565 mov look_back_dist2 %+ d, [rfc_lookup + _dist_start + 4 * next_sym3] 566 567 ; ;; Load distance code extra bits 568 mov next_bits, read_in 569 570 ;; Calculate the look back distance 571 BZHI next_bits, next_bits, rcx, tmp4 572 SHRX read_in, read_in, rcx 573 574 ;; Setup next_sym, read_in, and read_in_length for next loop 575 mov read_in_2, (1 << ISAL_DECODE_LONG_BITS) - 1 576 and read_in_2, read_in 577 mov next_sym %+ d, dword [state + _lit_huff_code + LARGE_SHORT_CODE_SIZE * read_in_2] 578 sub read_in_length, rcx 579 580 ;; Copy distance in len/dist pair 581 add look_back_dist2, next_bits 582 583 ;; Find beginning of copy 584 mov copy_start, next_out 585 sub copy_start, repeat_length 586 sub copy_start, look_back_dist2 587 588 ;; Check if a valid look back distances was decoded 589 cmp copy_start, [rsp + start_out_mem_offset] 590 jl invalid_look_back_distance 591 MOVDQU xmm1, [copy_start] 592 593 ;; Set tmp2 to be the minimum of COPY_SIZE and repeat_length 594 ;; This is to decrease use of small_byte_copy branch 595 mov tmp2, COPY_SIZE 596 cmp tmp2, repeat_length 597 cmovg tmp2, repeat_length 598 599 ;; Check for overlapping memory in the copy 600 cmp look_back_dist2, tmp2 601 jl small_byte_copy_pre 602 603large_byte_copy: 604 ;; Copy length distance pair when memory overlap is not an issue 605 MOVDQU [copy_start + look_back_dist2], xmm1 606 607 sub repeat_length, COPY_SIZE 608 jle loop_block 609 610 add copy_start, COPY_SIZE 611 MOVDQU xmm1, [copy_start] 612 jmp large_byte_copy 613 614small_byte_copy_pre: 615 ;; Copy length distance pair when source and destination overlap 616 add repeat_length, look_back_dist2 617small_byte_copy: 618 MOVDQU [copy_start + look_back_dist2], xmm1 619 620 shl look_back_dist2, 1 621 MOVDQU xmm1, [copy_start] 622 cmp look_back_dist2, COPY_SIZE 623 jl small_byte_copy 624 625 sub repeat_length, look_back_dist2 626 jge large_byte_copy 627 jmp loop_block 628 629;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 630; Finish Main Loop 631;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 632end_loop_block_pre: 633 ;; Fix up in buffer and out buffer to reflect the actual buffer end 634 add end_out, OUT_BUFFER_SLOP 635 add end_in, IN_BUFFER_SLOP 636 637end_loop_block: 638 ;; Load read in buffer and decode next lit/len symbol 639 inflate_in_small_load next_in, end_in, read_in, read_in_length, tmp1, tmp2 640 mov [rsp + read_in_mem_offset], read_in 641 mov [rsp + read_in_length_mem_offset], read_in_length 642 mov [rsp + next_out_mem_offset], next_out 643 644 decode_next_lit_len_with_load state, ISAL_DECODE_LONG_BITS, _lit_huff_code, read_in, read_in_length, next_sym, next_sym_num, tmp1 645 646 ;; Check that enough input was available to decode symbol 647 cmp read_in_length, 0 648 jl end_of_input 649 650multi_symbol_start: 651 cmp next_sym_num, 1 652 jg decode_literal 653 654 cmp next_sym, 256 655 jl decode_literal 656 je end_symbol 657 658decode_len_dist_2: 659 lea repeat_length, [next_sym - 254] 660 ;; Decode distance code 661 decode_next_dist_with_load state, ISAL_DECODE_SHORT_BITS, _dist_huff_code, read_in, read_in_length, next_sym, rcx, tmp1 662 663 ;; Load distance code extra bits 664 mov next_bits, read_in 665 mov look_back_dist %+ d, [rfc_lookup + _dist_start + 4 * next_sym] 666 667 ;; Calculate the look back distance and check for enough input 668 BZHI next_bits, next_bits, rcx, tmp1 669 SHRX read_in, read_in, rcx 670 add look_back_dist, next_bits 671 sub read_in_length, rcx 672 jl end_of_input 673 674 ;; Setup code for byte copy using rep movsb 675 mov rsi, next_out 676 mov rdi, rsi 677 mov rcx, repeat_length 678 sub rsi, look_back_dist 679 680 ;; Check if a valid look back distance was decoded 681 cmp rsi, [rsp + start_out_mem_offset] 682 jl invalid_look_back_distance 683 684 ;; Check for out buffer overflow 685 add repeat_length, next_out 686 cmp repeat_length, end_out 687 jg out_buffer_overflow_repeat 688 689 mov next_out, repeat_length 690 691 rep movsb 692 jmp end_loop_block 693 694decode_literal: 695 ;; Store literal decoded from the input stream 696 cmp next_out, end_out 697 jge out_buffer_overflow_lit 698 add next_out, 1 699 mov byte [next_out - 1], next_sym %+ b 700 sub next_sym_num, 1 701 jz end_loop_block 702 shr next_sym, 8 703 jmp multi_symbol_start 704 705;; Set exit codes 706end_of_input: 707 mov read_in, [rsp + read_in_mem_offset] 708 mov read_in_length, [rsp + read_in_length_mem_offset] 709 mov next_out, [rsp + next_out_mem_offset] 710 xor tmp1, tmp1 711 mov dword [state + _write_overflow_lits], tmp1 %+ d 712 mov dword [state + _write_overflow_len], tmp1 %+ d 713 mov rax, END_INPUT 714 jmp end 715 716out_buffer_overflow_repeat: 717 mov rcx, end_out 718 sub rcx, next_out 719 sub repeat_length, rcx 720 sub repeat_length, next_out 721 rep movsb 722 723 mov [state + _copy_overflow_len], repeat_length %+ d 724 mov [state + _copy_overflow_dist], look_back_dist %+ d 725 726 mov next_out, end_out 727 728 mov rax, OUT_OVERFLOW 729 jmp end 730 731out_buffer_overflow_lit: 732 mov dword [state + _write_overflow_lits], next_sym %+ d 733 mov dword [state + _write_overflow_len], next_sym_num %+ d 734 sub next_sym_num, 1 735 shl next_sym_num, 3 736 SHRX next_sym, next_sym, next_sym_num 737 mov rax, OUT_OVERFLOW 738 shr next_sym_num, 3 739 cmp next_sym, 256 740 jl end 741 mov dword [state + _write_overflow_len], next_sym_num %+ d 742 jg decode_len_dist_2 743 jmp end_state 744 745invalid_look_back_distance: 746 mov rax, INVALID_LOOKBACK 747 jmp end 748 749invalid_dist_symbol_ %+ next_sym: 750 cmp read_in_length, next_sym 751 jl end_of_input 752 jmp invalid_symbol 753invalid_dist_symbol_ %+ next_sym3: 754 cmp read_in_length, next_sym3 755 jl end_of_input 756invalid_symbol: 757 mov rax, INVALID_SYMBOL 758 jmp end 759 760end_symbol_pre: 761 ;; Fix up in buffer and out buffer to reflect the actual buffer 762 sub next_out, 1 763 add end_out, OUT_BUFFER_SLOP 764 add end_in, IN_BUFFER_SLOP 765end_symbol: 766 xor rax, rax 767end_state: 768 ;; Set flag identifying a new block is required 769 mov byte [state + _block_state], ISAL_BLOCK_NEW_HDR 770 cmp dword [state + _bfinal], 0 771 je end 772 mov byte [state + _block_state], ISAL_BLOCK_INPUT_DONE 773 774end: 775 ;; Save current buffer states 776 mov [state + _read_in], read_in 777 mov [state + _read_in_length], read_in_length %+ d 778 779 ;; Set avail_out 780 sub end_out, next_out 781 mov dword [state + _avail_out], end_out %+ d 782 783 ;; Set total_out 784 mov tmp1, next_out 785 sub tmp1, [state + _next_out] 786 add [state + _total_out], tmp1 %+ d 787 788 ;; Set next_out 789 mov [state + _next_out], next_out 790 791 ;; Set next_in 792 mov [state + _next_in], next_in 793 794 ;; Set avail_in 795 sub end_in, next_in 796 mov [state + _avail_in], end_in %+ d 797 798 FUNC_RESTORE 799 800 ret 801