1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2; Copyright(c) 2011-2016 Intel Corporation All rights reserved. 3; 4; Redistribution and use in source and binary forms, with or without 5; modification, are permitted provided that the following conditions 6; are met: 7; * Redistributions of source code must retain the above copyright 8; notice, this list of conditions and the following disclaimer. 9; * Redistributions in binary form must reproduce the above copyright 10; notice, this list of conditions and the following disclaimer in 11; the documentation and/or other materials provided with the 12; distribution. 13; * Neither the name of Intel Corporation nor the names of its 14; contributors may be used to endorse or promote products derived 15; from this software without specific prior written permission. 16; 17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 29 30%include "options.asm" 31%include "lz0a_const.asm" 32%include "data_struct2.asm" 33%include "bitbuf2.asm" 34%include "huffman.asm" 35%include "igzip_compare_types.asm" 36 37%include "stdmac.asm" 38%include "reg_sizes.asm" 39 40;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 41;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 42;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 43 44%define curr_data rax 45%define tmp1 rax 46 47%define f_index rbx 48%define code rbx 49%define tmp4 rbx 50%define tmp5 rbx 51%define tmp6 rbx 52 53%define tmp2 rcx 54%define hash rcx 55 56%define tmp3 rdx 57 58%define stream rsi 59 60%define f_i rdi 61 62%define code_len2 rbp 63%define hmask1 rbp 64 65%define m_out_buf r8 66 67%define level_buf r9 68 69%define dist r10 70%define hmask2 r10 71 72%define code2 r12 73%define f_end_i r12 74 75%define file_start r13 76 77%define len r14 78 79%define hufftables r15 80 81%define hash_table level_buf + _hash8k_hash_table 82%define lit_len_hist level_buf + _hist_lit_len 83%define dist_hist level_buf + _hist_dist 84 85;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 86;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 87;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 88f_end_i_mem_offset equ 0 ; local variable (8 bytes) 89m_out_end equ 8 90m_out_start equ 16 91dist_mask_offset equ 24 92hash_mask_offset equ 32 93stack_size equ 5*8 94 95%xdefine METHOD hash_hist 96 97[bits 64] 98default rel 99section .text 100 101; void isal_deflate_icf_finish ( isal_zstream *stream ) 102; arg 1: rcx: addr of stream 103global isal_deflate_icf_finish_ %+ METHOD %+ _01 104isal_deflate_icf_finish_ %+ METHOD %+ _01: 105 endbranch 106 PUSH_ALL rbx, rsi, rdi, rbp, r12, r13, r14, r15 107 sub rsp, stack_size 108 109%ifidn __OUTPUT_FORMAT__, win64 110 mov stream, rcx 111%else 112 mov stream, rdi 113%endif 114 115 ; state->bitbuf.set_buf(stream->next_out, stream->avail_out); 116 mov tmp2 %+ d, dword [stream + _internal_state_dist_mask] 117 mov tmp3 %+ d, dword [stream + _internal_state_hash_mask] 118 mov level_buf, [stream + _level_buf] 119 mov m_out_buf, [level_buf + _icf_buf_next] 120 mov [rsp + m_out_start], m_out_buf 121 mov tmp1, [level_buf + _icf_buf_avail_out] 122 add tmp1, m_out_buf 123 sub tmp1, 4 124 125 mov [rsp + dist_mask_offset], tmp2 126 mov [rsp + hash_mask_offset], tmp3 127 mov [rsp + m_out_end], tmp1 128 129 mov hufftables, [stream + _hufftables] 130 131 mov file_start, [stream + _next_in] 132 133 mov f_i %+ d, dword [stream + _total_in] 134 sub file_start, f_i 135 136 mov f_end_i %+ d, dword [stream + _avail_in] 137 add f_end_i, f_i 138 139 sub f_end_i, LAST_BYTES_COUNT 140 mov [rsp + f_end_i_mem_offset], f_end_i 141 ; for (f_i = f_start_i; f_i < f_end_i; f_i++) { 142 cmp f_i, f_end_i 143 jge .end_loop_2 144 145 mov curr_data %+ d, [file_start + f_i] 146 147 cmp byte [stream + _internal_state_has_hist], IGZIP_NO_HIST 148 jne .skip_write_first_byte 149 150 cmp m_out_buf, [rsp + m_out_end] 151 ja .end_loop_2 152 153 mov hmask1 %+ d, [rsp + hash_mask_offset] 154 compute_hash hash, curr_data 155 and hash %+ d, hmask1 %+ d 156 mov [hash_table + 2 * hash], f_i %+ w 157 mov byte [stream + _internal_state_has_hist], IGZIP_HIST 158 jmp .encode_literal 159 160.skip_write_first_byte: 161 162.loop2: 163 mov tmp3 %+ d, [rsp + dist_mask_offset] 164 mov hmask1 %+ d, [rsp + hash_mask_offset] 165 ; if (state->bitbuf.is_full()) { 166 cmp m_out_buf, [rsp + m_out_end] 167 ja .end_loop_2 168 169 ; hash = compute_hash(state->file_start + f_i) & hash_mask; 170 mov curr_data %+ d, [file_start + f_i] 171 compute_hash hash, curr_data 172 and hash %+ d, hmask1 %+ d 173 174 ; f_index = state->head[hash]; 175 movzx f_index %+ d, word [hash_table + 2 * hash] 176 177 ; state->head[hash] = (uint16_t) f_i; 178 mov [hash_table + 2 * hash], f_i %+ w 179 180 ; dist = f_i - f_index; // mod 64k 181 mov dist %+ d, f_i %+ d 182 sub dist %+ d, f_index %+ d 183 and dist %+ d, 0xFFFF 184 185 ; if ((dist-1) <= (D-1)) { 186 mov tmp1 %+ d, dist %+ d 187 sub tmp1 %+ d, 1 188 cmp tmp1 %+ d, tmp3 %+ d 189 jae .encode_literal 190 191 ; len = f_end_i - f_i; 192 mov tmp4, [rsp + f_end_i_mem_offset] 193 sub tmp4, f_i 194 add tmp4, LAST_BYTES_COUNT 195 196 ; if (len > 258) len = 258; 197 cmp tmp4, 258 198 cmovg tmp4, [c258] 199 200 ; len = compare(state->file_start + f_i, 201 ; state->file_start + f_i - dist, len); 202 lea tmp1, [file_start + f_i] 203 mov tmp2, tmp1 204 sub tmp2, dist 205 compare tmp4, tmp1, tmp2, len, tmp3 206 207 ; if (len >= SHORTEST_MATCH) { 208 cmp len, SHORTEST_MATCH 209 jb .encode_literal 210 211 ;; encode as dist/len 212 213 ; get_dist_code(dist, &code2, &code_len2); 214 dec dist 215 get_dist_icf_code dist, code2, tmp3 ;; clobbers dist, rcx 216 217 ;; get_len_code 218 lea code, [len + 254] 219 220 mov hmask2 %+ d, [rsp + hash_mask_offset] 221 222 or code2, code 223 inc dword [lit_len_hist + HIST_ELEM_SIZE*code] 224 225 ; for (k = f_i+1, f_i += len-1; k <= f_i; k++) { 226 lea tmp3, [f_i + 1] ; tmp3 <= k 227 add f_i, len 228 cmp f_i, [rsp + f_end_i_mem_offset] 229 jae .skip_hash_update 230 231 ; only update hash twice 232 233 ; hash = compute_hash(state->file_start + k) & hash_mask; 234 mov tmp6 %+ d, dword [file_start + tmp3] 235 compute_hash hash, tmp6 236 and hash %+ d, hmask2 %+ d 237 ; state->head[hash] = k; 238 mov [hash_table + 2 * hash], tmp3 %+ w 239 240 add tmp3, 1 241 242 ; hash = compute_hash(state->file_start + k) & hash_mask; 243 mov tmp6 %+ d, dword [file_start + tmp3] 244 compute_hash hash, tmp6 245 and hash %+ d, hmask2 %+ d 246 ; state->head[hash] = k; 247 mov [hash_table + 2 * hash], tmp3 %+ w 248 249.skip_hash_update: 250 write_dword code2, m_out_buf 251 shr code2, DIST_OFFSET 252 and code2, 0x1F 253 inc dword [dist_hist + HIST_ELEM_SIZE*code2] 254 ; continue 255 cmp f_i, [rsp + f_end_i_mem_offset] 256 jl .loop2 257 jmp .end_loop_2 258 259.encode_literal: 260 ; get_lit_code(state->file_start[f_i], &code2, &code_len2); 261 movzx tmp5, byte [file_start + f_i] 262 inc dword [lit_len_hist + HIST_ELEM_SIZE*tmp5] 263 or tmp5, LIT 264 write_dword tmp5, m_out_buf 265 ; continue 266 add f_i, 1 267 cmp f_i, [rsp + f_end_i_mem_offset] 268 jl .loop2 269 270.end_loop_2: 271 mov f_end_i, [rsp + f_end_i_mem_offset] 272 add f_end_i, LAST_BYTES_COUNT 273 mov [rsp + f_end_i_mem_offset], f_end_i 274 ; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) { 275 cmp f_i, f_end_i 276 jge .input_end 277 278 xor tmp5, tmp5 279.final_bytes: 280 cmp m_out_buf, [rsp + m_out_end] 281 ja .out_end 282 283 movzx tmp5, byte [file_start + f_i] 284 inc dword [lit_len_hist + HIST_ELEM_SIZE*tmp5] 285 or tmp5, LIT 286 write_dword tmp5, m_out_buf 287 288 inc f_i 289 cmp f_i, [rsp + f_end_i_mem_offset] 290 jl .final_bytes 291 292.input_end: 293 cmp word [stream + _end_of_stream], 0 294 jne .out_end 295 cmp word [stream + _flush], _NO_FLUSH 296 jne .out_end 297 jmp .end 298 299.out_end: 300 mov dword [stream + _internal_state_state], ZSTATE_CREATE_HDR 301.end: 302 ;; Update input buffer 303 mov f_end_i, [rsp + f_end_i_mem_offset] 304 mov [stream + _total_in], f_i %+ d 305 mov [stream + _internal_state_block_end], f_i %+ d 306 307 add file_start, f_i 308 mov [stream + _next_in], file_start 309 sub f_end_i, f_i 310 mov [stream + _avail_in], f_end_i %+ d 311 312 ;; Update output buffer 313 mov [level_buf + _icf_buf_next], m_out_buf 314 315 ; len = state->bitbuf.buffer_used(); 316 sub m_out_buf, [rsp + m_out_start] 317 318 ; stream->avail_out -= len; 319 sub [level_buf + _icf_buf_avail_out], m_out_buf 320 321 add rsp, stack_size 322 POP_ALL 323 ret 324 325section .data 326 align 4 327c258: dq 258 328