1660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2660f49b0SGreg Tucker; Copyright(c) 2011-2016 Intel Corporation All rights reserved. 3660f49b0SGreg Tucker; 4660f49b0SGreg Tucker; Redistribution and use in source and binary forms, with or without 5660f49b0SGreg Tucker; modification, are permitted provided that the following conditions 6660f49b0SGreg Tucker; are met: 7660f49b0SGreg Tucker; * Redistributions of source code must retain the above copyright 8660f49b0SGreg Tucker; notice, this list of conditions and the following disclaimer. 9660f49b0SGreg Tucker; * Redistributions in binary form must reproduce the above copyright 10660f49b0SGreg Tucker; notice, this list of conditions and the following disclaimer in 11660f49b0SGreg Tucker; the documentation and/or other materials provided with the 12660f49b0SGreg Tucker; distribution. 13660f49b0SGreg Tucker; * Neither the name of Intel Corporation nor the names of its 14660f49b0SGreg Tucker; contributors may be used to endorse or promote products derived 15660f49b0SGreg Tucker; from this software without specific prior written permission. 16660f49b0SGreg Tucker; 17660f49b0SGreg Tucker; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18660f49b0SGreg Tucker; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19660f49b0SGreg Tucker; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20660f49b0SGreg Tucker; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21660f49b0SGreg Tucker; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22660f49b0SGreg Tucker; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23660f49b0SGreg Tucker; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24660f49b0SGreg Tucker; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25660f49b0SGreg Tucker; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26660f49b0SGreg Tucker; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27660f49b0SGreg Tucker; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28660f49b0SGreg Tucker;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 29660f49b0SGreg Tucker 30660f49b0SGreg Tucker%include "options.asm" 31660f49b0SGreg Tucker%include "lz0a_const.asm" 32*c1876a12SRoy Oursler%include "stdmac.asm" 33660f49b0SGreg Tucker 34660f49b0SGreg Tucker; Macros for doing Huffman Encoding 35660f49b0SGreg Tucker 36660f49b0SGreg Tucker%ifdef LONGER_HUFFTABLE 37660f49b0SGreg Tucker %if (D > 8192) 38660f49b0SGreg Tucker %error History D is larger than 8K, cannot use %LONGER_HUFFTABLE 39660f49b0SGreg Tucker % error 40660f49b0SGreg Tucker %else 41660f49b0SGreg Tucker %define DIST_TABLE_SIZE 8192 42660f49b0SGreg Tucker %define DECODE_OFFSET 26 43660f49b0SGreg Tucker %endif 44660f49b0SGreg Tucker%else 459d53af0cSRoy Oursler %define DIST_TABLE_SIZE 2 469d53af0cSRoy Oursler %define DECODE_OFFSET 0 47660f49b0SGreg Tucker%endif 48660f49b0SGreg Tucker 49660f49b0SGreg Tucker%define LEN_TABLE_SIZE 256 50660f49b0SGreg Tucker%define LIT_TABLE_SIZE 257 51660f49b0SGreg Tucker 5288f95d85SRoy Oursler%define DIST_TABLE_START (ISAL_DEF_MAX_HDR_SIZE + 8) 53660f49b0SGreg Tucker%define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1) 54660f49b0SGreg Tucker%define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3) 55660f49b0SGreg Tucker%define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE) 56660f49b0SGreg Tucker%define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE) 57660f49b0SGreg Tucker%define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2) 58660f49b0SGreg Tucker%define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET) 59660f49b0SGreg Tucker;; /** @brief Holds the huffman tree used to huffman encode the input stream **/ 60660f49b0SGreg Tucker;; struct isal_hufftables { 61660f49b0SGreg Tucker;; // deflate huffman tree header 6288f95d85SRoy Oursler;; uint8_t deflate_huff_hdr[ISAL_DEF_MAX_HDR_SIZE]; 63660f49b0SGreg Tucker;; 64660f49b0SGreg Tucker;; //!< Number of whole bytes in deflate_huff_hdr 65660f49b0SGreg Tucker;; uint32_t deflate_huff_hdr_count; 66660f49b0SGreg Tucker;; 67660f49b0SGreg Tucker;; //!< Number of bits in the partial byte in header 68660f49b0SGreg Tucker;; uint32_t deflate_huff_hdr_extra_bits; 69660f49b0SGreg Tucker;; 70660f49b0SGreg Tucker;; //!< bits 7:0 are the code length, bits 31:8 are the code 71660f49b0SGreg Tucker;; uint32_t dist_table[DIST_TABLE_SIZE]; 72660f49b0SGreg Tucker;; 73660f49b0SGreg Tucker;; //!< bits 7:0 are the code length, bits 31:8 are the code 74660f49b0SGreg Tucker;; uint32_t len_table[LEN_TABLE_SIZE]; 75660f49b0SGreg Tucker;; 76660f49b0SGreg Tucker;; //!< bits 3:0 are the code length, bits 15:4 are the code 77660f49b0SGreg Tucker;; uint16_t lit_table[LIT_TABLE_SIZE]; 78660f49b0SGreg Tucker;; 79660f49b0SGreg Tucker;; //!< bits 3:0 are the code length, bits 15:4 are the code 80660f49b0SGreg Tucker;; uint16_t dcodes[30 - DECODE_OFFSET]; 81660f49b0SGreg Tucker 82660f49b0SGreg Tucker;; }; 83660f49b0SGreg Tucker 84660f49b0SGreg Tucker 85660f49b0SGreg Tucker%ifdef LONGER_HUFFTABLE 86660f49b0SGreg Tucker; Uses RCX, clobbers dist 87660f49b0SGreg Tucker; get_dist_code dist, code, len 88660f49b0SGreg Tucker%macro get_dist_code 4 89660f49b0SGreg Tucker%define %%dist %1 ; 64-bit IN 90660f49b0SGreg Tucker%define %%code %2d ; 32-bit OUT 91660f49b0SGreg Tucker%define %%len %3d ; 32-bit OUT 92660f49b0SGreg Tucker%define %%hufftables %4 ; address of the hufftable 93660f49b0SGreg Tucker 949d53af0cSRoy Oursler mov %%len, [%%hufftables + DIST_TABLE_OFFSET + 4*(%%dist + 1) ] 95660f49b0SGreg Tucker mov %%code, %%len 96660f49b0SGreg Tucker and %%len, 0x1F; 97660f49b0SGreg Tucker shr %%code, 5 98660f49b0SGreg Tucker%endm 99660f49b0SGreg Tucker 100660f49b0SGreg Tucker%macro get_packed_dist_code 3 101660f49b0SGreg Tucker%define %%dist %1 ; 64-bit IN 102660f49b0SGreg Tucker%define %%code_len %2d ; 32-bit OUT 103660f49b0SGreg Tucker%define %%hufftables %3 ; address of the hufftable 104660f49b0SGreg Tucker mov %%code_len, [%%hufftables + DIST_TABLE_OFFSET + 4*%%dist ] 105660f49b0SGreg Tucker%endm 106660f49b0SGreg Tucker 107660f49b0SGreg Tucker%macro unpack_dist_code 2 108660f49b0SGreg Tucker%define %%code %1d ; 32-bit OUT 109660f49b0SGreg Tucker%define %%len %2d ; 32-bit OUT 110660f49b0SGreg Tucker 111660f49b0SGreg Tucker mov %%len, %%code 112660f49b0SGreg Tucker and %%len, 0x1F; 113660f49b0SGreg Tucker shr %%code, 5 114660f49b0SGreg Tucker%endm 115660f49b0SGreg Tucker 116660f49b0SGreg Tucker%else 117660f49b0SGreg Tucker; Assumes (dist != 0) 118660f49b0SGreg Tucker; Uses RCX, clobbers dist 119660f49b0SGreg Tucker; void compute_dist_code dist, code, len 120660f49b0SGreg Tucker%macro compute_dist_code 4 1219d53af0cSRoy Oursler%define %%dist %1 ; IN, clobbered 122660f49b0SGreg Tucker%define %%distq %1 123660f49b0SGreg Tucker%define %%code %2 ; OUT 124660f49b0SGreg Tucker%define %%len %3 ; OUT 125660f49b0SGreg Tucker%define %%hufftables %4 126660f49b0SGreg Tucker 1279d53af0cSRoy Oursler bsr rcx, %%dist ; ecx = msb = bsr(dist) 1289d53af0cSRoy Oursler dec rcx ; ecx = num_extra_bits = msb - N 1299d53af0cSRoy Oursler BZHI %%code, %%dist, rcx, %%len 1309d53af0cSRoy Oursler SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits 1319d53af0cSRoy Oursler lea %%dist, [%%dist + 2*rcx] ; dist = sym = dist + num_extra_bits*2 1329d53af0cSRoy Oursler mov %%len, rcx ; len = num_extra_bits 1339d53af0cSRoy Oursler movzx rcx, byte [hufftables + DCODE_TABLE_SIZE_OFFSET + %%distq WRT_OPT] 134660f49b0SGreg Tucker movzx %%dist, word [hufftables + DCODE_TABLE_OFFSET + 2 * %%distq WRT_OPT] 1359d53af0cSRoy Oursler SHLX %%code, %%code, rcx ; code = extra_bits << (sym & 0xF) 136660f49b0SGreg Tucker or %%code, %%dist ; code = (sym >> 4) | (extra_bits << (sym & 0xF)) 1379d53af0cSRoy Oursler add %%len, rcx ; len = num_extra_bits + (sym & 0xF) 138660f49b0SGreg Tucker%endm 139660f49b0SGreg Tucker 140660f49b0SGreg Tucker; Uses RCX, clobbers dist 141660f49b0SGreg Tucker; get_dist_code dist, code, len 142660f49b0SGreg Tucker%macro get_dist_code 4 1439d53af0cSRoy Oursler%define %%dist %1 ; 32-bit IN, clobbered 144660f49b0SGreg Tucker%define %%distq %1 ; 64-bit IN, clobbered 1459d53af0cSRoy Oursler%define %%code %2 ; 32-bit OUT 1469d53af0cSRoy Oursler%define %%len %3 ; 32-bit OUT 147660f49b0SGreg Tucker%define %%hufftables %4 148660f49b0SGreg Tucker 1499d53af0cSRoy Oursler cmp %%dist, DIST_TABLE_SIZE - 1 150660f49b0SGreg Tucker jg %%do_compute 1519d53af0cSRoy Oursler%ifndef IACA 1529d53af0cSRoy Oursler mov %%len %+ d, dword [hufftables + DIST_TABLE_OFFSET + 4*(%%distq + 1) WRT_OPT] 153660f49b0SGreg Tucker mov %%code, %%len 154660f49b0SGreg Tucker and %%len, 0x1F; 155660f49b0SGreg Tucker shr %%code, 5 156660f49b0SGreg Tucker jmp %%done 1579d53af0cSRoy Oursler%endif 158660f49b0SGreg Tucker%%do_compute: 159660f49b0SGreg Tucker compute_dist_code %%distq, %%code, %%len, %%hufftables 160660f49b0SGreg Tucker%%done: 161660f49b0SGreg Tucker%endm 162660f49b0SGreg Tucker 163660f49b0SGreg Tucker%macro get_packed_dist_code 3 164660f49b0SGreg Tucker%define %%dist %1 ; 64-bit IN 165660f49b0SGreg Tucker%define %%code_len %2d ; 32-bit OUT 166660f49b0SGreg Tucker%define %%hufftables %3 ; address of the hufftable 167660f49b0SGreg Tucker%endm 168660f49b0SGreg Tucker 169660f49b0SGreg Tucker%endif 170660f49b0SGreg Tucker 171660f49b0SGreg Tucker 17201dfbcc4SRoy Oursler; Macros for doing Huffman Encoding 17301dfbcc4SRoy Oursler 17401dfbcc4SRoy Oursler; Assumes (dist != 0) 17501dfbcc4SRoy Oursler; Uses RCX, clobbers dist 17601dfbcc4SRoy Oursler; void compute_dist_code dist, code, len 17701dfbcc4SRoy Oursler%macro compute_dist_icf_code 3 17801dfbcc4SRoy Oursler%define %%dist %1 ; IN, clobbered 17901dfbcc4SRoy Oursler%define %%distq %1 18001dfbcc4SRoy Oursler%define %%code %2 ; OUT 18101dfbcc4SRoy Oursler%define %%tmp1 %3 18201dfbcc4SRoy Oursler 18301dfbcc4SRoy Oursler bsr rcx, %%dist ; ecx = msb = bsr(dist) 18401dfbcc4SRoy Oursler dec rcx ; ecx = num_extra_bits = msb - N 18501dfbcc4SRoy Oursler BZHI %%code, %%dist, rcx, %%tmp1 18601dfbcc4SRoy Oursler SHRX %%dist, %%dist, rcx ; dist >>= num_extra_bits 18701dfbcc4SRoy Oursler lea %%dist, [%%dist + 2*rcx] ; code = sym = dist + num_extra_bits*2 18801dfbcc4SRoy Oursler shl %%code, EXTRA_BITS_OFFSET - DIST_OFFSET 18901dfbcc4SRoy Oursler add %%code, %%dist ; code = extra_bits | sym 19001dfbcc4SRoy Oursler 19101dfbcc4SRoy Oursler%endm 19201dfbcc4SRoy Oursler 19301dfbcc4SRoy Oursler; Uses RCX, clobbers dist 19401dfbcc4SRoy Oursler; get_dist_code dist, code, len 19501dfbcc4SRoy Oursler%macro get_dist_icf_code 3 19601dfbcc4SRoy Oursler%define %%dist %1 ; 32-bit IN, clobbered 19701dfbcc4SRoy Oursler%define %%distq %1 ; 64-bit IN, clobbered 19801dfbcc4SRoy Oursler%define %%code %2 ; 32-bit OUT 19901dfbcc4SRoy Oursler%define %%tmp1 %3 20001dfbcc4SRoy Oursler 20101dfbcc4SRoy Oursler cmp %%dist, 1 20201dfbcc4SRoy Oursler jg %%do_compute 20301dfbcc4SRoy Oursler 20401dfbcc4SRoy Oursler%ifnidn %%code, %%dist 20501dfbcc4SRoy Oursler mov %%code, %%dist 20601dfbcc4SRoy Oursler%endif 20701dfbcc4SRoy Oursler jmp %%done 20801dfbcc4SRoy Oursler%%do_compute: 20901dfbcc4SRoy Oursler compute_dist_icf_code %%distq, %%code, %%tmp1 21001dfbcc4SRoy Oursler%%done: 21101dfbcc4SRoy Oursler shl %%code, DIST_OFFSET 21201dfbcc4SRoy Oursler%endm 21301dfbcc4SRoy Oursler 21401dfbcc4SRoy Oursler 215660f49b0SGreg Tucker; "len" can be same register as "length" 216660f49b0SGreg Tucker; get_len_code length, code, len 217660f49b0SGreg Tucker%macro get_len_code 4 218660f49b0SGreg Tucker%define %%length %1 ; 64-bit IN 219660f49b0SGreg Tucker%define %%code %2d ; 32-bit OUT 220660f49b0SGreg Tucker%define %%len %3d ; 32-bit OUT 221660f49b0SGreg Tucker%define %%hufftables %4 222660f49b0SGreg Tucker 223660f49b0SGreg Tucker mov %%len, [%%hufftables + LEN_TABLE_OFFSET + 4 * %%length] 224660f49b0SGreg Tucker mov %%code, %%len 225660f49b0SGreg Tucker and %%len, 0x1F 226660f49b0SGreg Tucker shr %%code, 5 227660f49b0SGreg Tucker%endm 228660f49b0SGreg Tucker 229660f49b0SGreg Tucker 230660f49b0SGreg Tucker%macro get_lit_code 4 231660f49b0SGreg Tucker%define %%lit %1 ; 64-bit IN or CONST 232660f49b0SGreg Tucker%define %%code %2d ; 32-bit OUT 233660f49b0SGreg Tucker%define %%len %3d ; 32-bit OUT 234660f49b0SGreg Tucker%define %%hufftables %4 235660f49b0SGreg Tucker 236660f49b0SGreg Tucker movzx %%len, byte [%%hufftables + LIT_TABLE_SIZES_OFFSET + %%lit] 237660f49b0SGreg Tucker movzx %%code, word [%%hufftables + LIT_TABLE_OFFSET + 2 * %%lit] 238660f49b0SGreg Tucker 239660f49b0SGreg Tucker%endm 240660f49b0SGreg Tucker 241660f49b0SGreg Tucker 242660f49b0SGreg Tucker;; Compute hash of first 3 bytes of data 243660f49b0SGreg Tucker%macro compute_hash 2 244660f49b0SGreg Tucker%define %%result %1d ; 32-bit reg 245660f49b0SGreg Tucker%define %%data %2d ; 32-bit reg (low byte not clobbered) 246660f49b0SGreg Tucker 247660f49b0SGreg Tucker xor %%result, %%result 248660f49b0SGreg Tucker crc32 %%result, %%data 249660f49b0SGreg Tucker%endm 250