1;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2; Copyright(c) 2011-2016 Intel Corporation All rights reserved. 3; 4; Redistribution and use in source and binary forms, with or without 5; modification, are permitted provided that the following conditions 6; are met: 7; * Redistributions of source code must retain the above copyright 8; notice, this list of conditions and the following disclaimer. 9; * Redistributions in binary form must reproduce the above copyright 10; notice, this list of conditions and the following disclaimer in 11; the documentation and/or other materials provided with the 12; distribution. 13; * Neither the name of Intel Corporation nor the names of its 14; contributors may be used to endorse or promote products derived 15; from this software without specific prior written permission. 16; 17; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 29 30%include "options.asm" 31%include "lz0a_const.asm" 32%include "stdmac.asm" 33 34; Macros for doing Huffman Encoding 35 36%ifdef LONGER_HUFFTABLE 37 %if (D > 8192) 38 %error History D is larger than 8K, cannot use %LONGER_HUFFTABLE 39 % error 40 %else 41 %define DIST_TABLE_SIZE 8192 42 %define DECODE_OFFSET 26 43 %endif 44%else 45 %define DIST_TABLE_SIZE 2 46 %define DECODE_OFFSET 0 47%endif 48 49%define LEN_TABLE_SIZE 256 50%define LIT_TABLE_SIZE 257 51 52%define DIST_TABLE_START (ISAL_DEF_MAX_HDR_SIZE + 8) 53%define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1) 54%define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3) 55%define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE) 56%define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE) 57%define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2) 58%define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET) 59;; /** @brief Holds the huffman tree used to huffman encode the input stream **/ 60;; struct isal_hufftables { 61;; // deflate huffman tree header 62;; uint8_t deflate_huff_hdr[ISAL_DEF_MAX_HDR_SIZE]; 63;; 64;; //!< Number of whole bytes in deflate_huff_hdr 65;; uint32_t deflate_huff_hdr_count; 66;; 67;; //!< Number of bits in the partial byte in header 68;; uint32_t deflate_huff_hdr_extra_bits; 69;; 70;; //!< bits 7:0 are the code length, bits 31:8 are the code 71;; uint32_t dist_table[DIST_TABLE_SIZE]; 72;; 73;; //!< bits 7:0 are the code length, bits 31:8 are the code 74;; uint32_t len_table[LEN_TABLE_SIZE]; 75;; 76;; //!< bits 3:0 are the code length, bits 15:4 are the code 77;; uint16_t lit_table[LIT_TABLE_SIZE]; 78;; 79;; //!< bits 3:0 are the code length, bits 15:4 are the code 80;; uint16_t dcodes[30 - DECODE_OFFSET]; 81 82;; }; 83 84 85%ifdef LONGER_HUFFTABLE 86; Uses RCX, clobbers dist 87; get_dist_code dist, code, len 88%macro get_dist_code 4 89%define %%dist %1 ; 64-bit IN 90%define %%code %2d ; 32-bit OUT 91%define %%len %3d ; 32-bit OUT 92%define %%hufftables %4 ; address of the hufftable 93 94 mov %%len, [%%hufftables + DIST_TABLE_OFFSET + 4*(%%dist + 1) ] 95 mov %%code, %%len 96 and %%len, 0x1F; 97 shr %%code, 5 98%endm 99 100%macro get_packed_dist_code 3 101%define %%dist %1 ; 64-bit IN 102%define %%code_len %2d ; 32-bit OUT 103%define %%hufftables %3 ; address of the hufftable 104 mov %%code_len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ] 105%endm 106 107%macro unpack_dist_code 2 108%define %%code %1d ; 32-bit OUT 109%define %%len %2d ; 32-bit OUT 110 111 mov %%len, %%code 112 and %%len, 0x1F; 113 shr %%code, 5 114%endm 115 116%else 117; Assumes (dist != 0) 118; Uses RCX, clobbers dist 119; void compute_dist_code dist, code, len 120%macro compute_dist_code 4 121%define %%dist %1 ; IN, clobbered 122%define %%distq %1 123%define %%code %2 ; OUT 124%define %%len %3 ; OUT 125%define %%hufftables %4 126 127 bsr rcx, %%dist ; ecx = msb = bsr(dist) 128 dec rcx ; ecx = num_extra_bits = msb - N 129 BZHI %%code, %%dist, rcx, %%len 130 SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits 131 lea %%dist, [%%dist + 2*rcx] ; dist = sym = dist + num_extra_bits*2 132 mov %%len, rcx ; len = num_extra_bits 133 movzx rcx, byte [hufftables + DCODE_TABLE_SIZE_OFFSET + %%distq WRT_OPT] 134 movzx %%dist, word [hufftables + DCODE_TABLE_OFFSET + 2 * %%distq WRT_OPT] 135 SHLX %%code, %%code, rcx ; code = extra_bits << (sym & 0xF) 136 or %%code, %%dist ; code = (sym >> 4) | (extra_bits << (sym & 0xF)) 137 add %%len, rcx ; len = num_extra_bits + (sym & 0xF) 138%endm 139 140; Uses RCX, clobbers dist 141; get_dist_code dist, code, len 142%macro get_dist_code 4 143%define %%dist %1 ; 32-bit IN, clobbered 144%define %%distq %1 ; 64-bit IN, clobbered 145%define %%code %2 ; 32-bit OUT 146%define %%len %3 ; 32-bit OUT 147%define %%hufftables %4 148 149 cmp %%dist, DIST_TABLE_SIZE - 1 150 jg %%do_compute 151%ifndef IACA 152 mov %%len %+ d, dword [hufftables + DIST_TABLE_OFFSET + 4*(%%distq + 1) WRT_OPT] 153 mov %%code, %%len 154 and %%len, 0x1F; 155 shr %%code, 5 156 jmp %%done 157%endif 158%%do_compute: 159 compute_dist_code %%distq, %%code, %%len, %%hufftables 160%%done: 161%endm 162 163%macro get_packed_dist_code 3 164%define %%dist %1 ; 64-bit IN 165%define %%code_len %2d ; 32-bit OUT 166%define %%hufftables %3 ; address of the hufftable 167%endm 168 169%endif 170 171 172; Macros for doing Huffman Encoding 173 174; Assumes (dist != 0) 175; Uses RCX, clobbers dist 176; void compute_dist_code dist, code, len 177%macro compute_dist_icf_code 3 178%define %%dist %1 ; IN, clobbered 179%define %%distq %1 180%define %%code %2 ; OUT 181%define %%tmp1 %3 182 183 bsr rcx, %%dist ; ecx = msb = bsr(dist) 184 dec rcx ; ecx = num_extra_bits = msb - N 185 BZHI %%code, %%dist, rcx, %%tmp1 186 SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits 187 lea %%dist, [%%dist + 2*rcx] ; code = sym = dist + num_extra_bits*2 188 shl %%code, EXTRA_BITS_OFFSET - DIST_OFFSET 189 add %%code, %%dist ; code = extra_bits | sym 190 191%endm 192 193; Uses RCX, clobbers dist 194; get_dist_code dist, code, len 195%macro get_dist_icf_code 3 196%define %%dist %1 ; 32-bit IN, clobbered 197%define %%distq %1 ; 64-bit IN, clobbered 198%define %%code %2 ; 32-bit OUT 199%define %%tmp1 %3 200 201 cmp %%dist, 1 202 jg %%do_compute 203 204%ifnidn %%code, %%dist 205 mov %%code, %%dist 206%endif 207 jmp %%done 208%%do_compute: 209 compute_dist_icf_code %%distq, %%code, %%tmp1 210%%done: 211 shl %%code, DIST_OFFSET 212%endm 213 214 215; "len" can be same register as "length" 216; get_len_code length, code, len 217%macro get_len_code 4 218%define %%length %1 ; 64-bit IN 219%define %%code %2d ; 32-bit OUT 220%define %%len %3d ; 32-bit OUT 221%define %%hufftables %4 222 223 mov %%len, [%%hufftables + LEN_TABLE_OFFSET + 4 * %%length] 224 mov %%code, %%len 225 and %%len, 0x1F 226 shr %%code, 5 227%endm 228 229 230%macro get_lit_code 4 231%define %%lit %1 ; 64-bit IN or CONST 232%define %%code %2d ; 32-bit OUT 233%define %%len %3d ; 32-bit OUT 234%define %%hufftables %4 235 236 movzx %%len, byte [%%hufftables + LIT_TABLE_SIZES_OFFSET + %%lit] 237 movzx %%code, word [%%hufftables + LIT_TABLE_OFFSET + 2 * %%lit] 238 239%endm 240 241 242;; Compute hash of first 3 bytes of data 243%macro compute_hash 2 244%define %%result %1d ; 32-bit reg 245%define %%data %2d ; 32-bit reg (low byte not clobbered) 246 247 xor %%result, %%result 248 crc32 %%result, %%data 249%endm 250