/* Clzip - LZMA lossless data compressor Copyright (C) 2010-2017 Antonio Diaz Diaz. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "lzip.h" static void Dis_slots_init(void) { int i, size, slot; for (slot = 0; slot < 4; ++slot) dis_slots[slot] = slot; for (i = 4, size = 2, slot = 4; slot < 20; slot += 2) { memset(&dis_slots[i], slot, size); memset(&dis_slots[i+size], slot + 1, size); size <<= 1; i += size; } } static uchar get_slot(unsigned dis) { if (dis < (1 << 10)) return dis_slots[dis]; if (dis < (1 << 19)) return dis_slots[dis>> 9] + 18; if (dis < (1 << 28)) return dis_slots[dis>>18] + 36; return dis_slots[dis>>27] + 54; } static void Prob_prices_init(void) { int i, j; for (i = 0; i < bit_model_total >> price_step_bits; ++i) { unsigned val = (i * price_step) + (price_step / 2); int bits = 0; /* base 2 logarithm of val */ for (j = 0; j < price_shift_bits; ++j) { val = val * val; bits <<= 1; while (val >= (1 << 16)) { val >>= 1; ++bits; } } bits += 15; /* remaining bits in val */ prob_prices[i] = (bit_model_total_bits << price_shift_bits) - bits; } } static int price_symbol3(Bit_model bm[], int symbol) { int price; bool bit = symbol & 1; symbol |= 8; symbol >>= 1; price = price_bit(bm[symbol], bit); bit = symbol & 1; symbol >>= 1; price += price_bit(bm[symbol], bit); return price + price_bit(bm[1], symbol & 1); } static int price_symbol6(Bit_model bm[], unsigned symbol) { int price; bool bit = symbol & 1; symbol |= 64; symbol >>= 1; price = price_bit(bm[symbol], bit); bit = symbol & 1; symbol >>= 1; price += price_bit(bm[symbol], bit); bit = symbol & 1; symbol >>= 1; price += price_bit(bm[symbol], bit); bit = symbol & 1; symbol >>= 1; price += price_bit(bm[symbol], bit); bit = symbol & 1; symbol >>= 1; price += price_bit(bm[symbol], bit); return price + price_bit(bm[1], symbol & 1); } static int price_symbol8(Bit_model bm[], int symbol) { int price; bool bit = symbol & 1; symbol |= 0x100; symbol >>= 1; price = price_bit(bm[symbol], bit); bit = symbol & 1; symbol >>= 1; price += price_bit(bm[symbol], bit); bit = symbol & 1; symbol >>= 1; price += price_bit(bm[symbol], bit); bit = symbol & 1; symbol >>= 1; price += price_bit(bm[symbol], bit); bit = symbol & 1; symbol >>= 1; price += price_bit(bm[symbol], bit); bit = symbol & 1; symbol >>= 1; price += price_bit(bm[symbol], bit); bit = symbol & 1; symbol >>= 1; price += price_bit(bm[symbol], bit); return price + price_bit(bm[1], symbol & 1); } static int price_symbol_reversed(Bit_model bm[], int symbol, int num_bits) { int price = 0; int model = 1; int i; for (i = num_bits; i > 0; --i) { bool bit = symbol & 1; symbol >>= 1; price += price_bit(bm[model], bit); model = (model << 1) | bit; } return price; } static int price_matched(Bit_model bm[], unsigned symbol, unsigned match_byte) { int price = 0; unsigned mask = 0x100; symbol |= mask; for (;;) { unsigned match_bit = (match_byte <<= 1) & mask; bool bit = (symbol <<= 1) & 0x100; price += price_bit(bm[(symbol>>9) + match_bit + mask], bit); if (symbol >= 0x10000) return price; mask &= ~(match_bit ^ symbol); /* if(match_bit != bit) mask = 0; */ } } struct Matchfinder_base { uvlong partial_data_pos; uchar * buffer; /* input buffer */ int32_t * prev_positions; /* 1 + last seen position of key. else 0 */ int32_t * pos_array; /* may be tree or chain */ int before_size; /* bytes to keep in buffer before dictionary */ int buffer_size; int dict_size; /* bytes to keep in buffer before pos */ int pos; /* current pos in buffer */ int cyclic_pos; /* cycles through [0, dict_size] */ int stream_pos; /* first byte not yet read from file */ int pos_limit; /* when reached, a new block must be read */ int key4_mask; int num_prev_positions; /* size of prev_positions */ int pos_array_size; int infd; /* input file descriptor */ bool at_stream_end; /* stream_pos shows real end of file */ }; bool Mb_read_block(Matchfinder_base *mb); void Mb_normalize_pos(Matchfinder_base *mb); bool Mb_init(Matchfinder_base *mb, int before, int dict_size, int after_size, int dict_factor, int num_prev_positions23, int pos_array_factor, int ifd); static void Mb_free(Matchfinder_base *mb) { free(mb->prev_positions); free(mb->buffer); } static int Mb_avail_bytes(Matchfinder_base *mb) { return mb->stream_pos - mb->pos; } static uvlong Mb_data_position(Matchfinder_base *mb) { return mb->partial_data_pos + mb->pos; } static bool Mb_data_finished(Matchfinder_base *mb) { return mb->at_stream_end && mb->pos >= mb->stream_pos; } static int Mb_true_match_len(Matchfinder_base *mb, int index, int distance) { uchar * data = mb->buffer + mb->pos; int i = index; int len_limit = min(Mb_avail_bytes(mb), max_match_len); while (i < len_limit && data[i-distance] == data[i]) ++i; return i; } static void Mb_move_pos(Matchfinder_base *mb) { if (++mb->cyclic_pos > mb->dict_size) mb->cyclic_pos = 0; if (++mb->pos >= mb->pos_limit) Mb_normalize_pos(mb); } void Mb_reset(Matchfinder_base *mb); enum { re_buffer_size = 65536 }; typedef struct LZ_encoder_base LZ_encoder_base; typedef struct Matchfinder_base Matchfinder_base; typedef struct Range_encoder Range_encoder; struct Range_encoder { uvlong low; uvlong partial_member_pos; uchar * buffer; /* output buffer */ int pos; /* current pos in buffer */ uint32_t range; unsigned ff_count; int outfd; /* output file descriptor */ uchar cache; File_header header; }; void Re_flush_data(Range_encoder *renc); static void Re_put_byte(Range_encoder *renc, uchar b) { renc->buffer[renc->pos] = b; if (++renc->pos >= re_buffer_size) Re_flush_data(renc); } static void Re_shift_low(Range_encoder *renc) { if (renc->low >> 24 != 0xFF) { bool carry = (renc->low > 0xFFFFFFFFU); Re_put_byte(renc, renc->cache + carry); for (; renc->ff_count > 0; --renc->ff_count) Re_put_byte(renc, 0xFF + carry); renc->cache = renc->low >> 24; } else ++renc->ff_count; renc->low = (renc->low & 0x00FFFFFFU) << 8; } static void Re_reset(Range_encoder *renc) { int i; renc->low = 0; renc->partial_member_pos = 0; renc->pos = 0; renc->range = 0xFFFFFFFFU; renc->ff_count = 0; renc->cache = 0; for (i = 0; i < Fh_size; ++i) Re_put_byte(renc, renc->header[i]); } static bool Re_init(Range_encoder *renc, unsigned dict_size, int ofd) { renc->buffer = (uchar *)malloc(re_buffer_size); if (!renc->buffer) return false; renc->outfd = ofd; Fh_set_magic(renc->header); Fh_set_dict_size(renc->header, dict_size); Re_reset(renc); return true; } static void Re_free(Range_encoder *renc) { free(renc->buffer); } static uvlong Re_member_position(Range_encoder *renc) { return renc->partial_member_pos + renc->pos + renc->ff_count; } static void Re_flush(Range_encoder *renc) { int i; for (i = 0; i < 5; ++i) Re_shift_low(renc); } static void Re_encode(Range_encoder *renc, int symbol, int num_bits) { unsigned mask; for (mask = 1 << (num_bits - 1); mask > 0; mask >>= 1) { renc->range >>= 1; if (symbol & mask) renc->low += renc->range; if (renc->range <= 0x00FFFFFFU) { renc->range <<= 8; Re_shift_low(renc); } } } static void Re_encode_bit(Range_encoder *renc, Bit_model *probability, bool bit) { Bit_model prob = *probability; uint32_t bound = (renc->range >> bit_model_total_bits) * prob; if (!bit) { renc->range = bound; *probability += (bit_model_total - prob) >> bit_model_move_bits; } else { renc->low += bound; renc->range -= bound; *probability -= prob >> bit_model_move_bits; } if (renc->range <= 0x00FFFFFFU) { renc->range <<= 8; Re_shift_low(renc); } } static void Re_encode_tree3(Range_encoder *renc, Bit_model bm[], int symbol) { int model = 1; bool bit = (symbol >> 2) & 1; Re_encode_bit(renc, &bm[model], bit); model = (model << 1) | bit; bit = (symbol >> 1) & 1; Re_encode_bit(renc, &bm[model], bit); model = (model << 1) | bit; Re_encode_bit(renc, &bm[model], symbol & 1); } static void Re_encode_tree6(Range_encoder *renc, Bit_model bm[], unsigned symbol) { int model = 1; bool bit = (symbol >> 5) & 1; Re_encode_bit(renc, &bm[model], bit); model = (model << 1) | bit; bit = (symbol >> 4) & 1; Re_encode_bit(renc, &bm[model], bit); model = (model << 1) | bit; bit = (symbol >> 3) & 1; Re_encode_bit(renc, &bm[model], bit); model = (model << 1) | bit; bit = (symbol >> 2) & 1; Re_encode_bit(renc, &bm[model], bit); model = (model << 1) | bit; bit = (symbol >> 1) & 1; Re_encode_bit(renc, &bm[model], bit); model = (model << 1) | bit; Re_encode_bit(renc, &bm[model], symbol & 1); } static void Re_encode_tree8(Range_encoder *renc, Bit_model bm[], int symbol) { int model = 1; int i; for (i = 7; i >= 0; --i) { bool bit = (symbol >> i) & 1; Re_encode_bit(renc, &bm[model], bit); model = (model << 1) | bit; } } static void Re_encode_tree_reversed(Range_encoder *renc, Bit_model bm[], int symbol, int num_bits) { int model = 1; int i; for (i = num_bits; i > 0; --i) { bool bit = symbol & 1; symbol >>= 1; Re_encode_bit(renc, &bm[model], bit); model = (model << 1) | bit; } } static void Re_encode_matched(Range_encoder *renc, Bit_model bm[], unsigned symbol, unsigned match_byte) { unsigned mask = 0x100; symbol |= mask; while (true) { unsigned match_bit = (match_byte <<= 1) & mask; bool bit = (symbol <<= 1) & 0x100; Re_encode_bit(renc, &bm[(symbol>>9)+match_bit+mask], bit); if (symbol >= 0x10000) break; mask &= ~(match_bit ^ symbol); /* if(match_bit != bit) mask = 0; */ } } static void Re_encode_len(struct Range_encoder *renc, Len_model *lm, int symbol, int pos_state) { bool bit = ((symbol -= min_match_len) >= len_low_syms); Re_encode_bit(renc, &lm->choice1, bit); if (!bit) Re_encode_tree3(renc, lm->bm_low[pos_state], symbol); else { bit = ((symbol -= len_low_syms) >= len_mid_syms); Re_encode_bit(renc, &lm->choice2, bit); if (!bit) Re_encode_tree3(renc, lm->bm_mid[pos_state], symbol); else Re_encode_tree8(renc, lm->bm_high, symbol - len_mid_syms); } } enum { max_marker_size = 16, num_rep_distances = 4 /* must be 4 */ }; struct LZ_encoder_base { struct Matchfinder_base mb; uint32_t crc; Bit_model bm_literal[1<mb, before, dict_size, after_size, dict_factor, num_prev_positions23, pos_array_factor, ifd)) return false; if (!Re_init(&eb->renc, eb->mb.dict_size, outfd)) return false; LZeb_reset(eb); return true; } static void LZeb_free(LZ_encoder_base *eb) { Re_free(&eb->renc); Mb_free(&eb->mb); } static unsigned LZeb_crc(LZ_encoder_base *eb) { return eb->crc ^ 0xFFFFFFFFU; } static int LZeb_price_literal(LZ_encoder_base *eb, uchar prev_byte, uchar symbol) { return price_symbol8(eb->bm_literal[get_lit_state(prev_byte)], symbol); } static int LZeb_price_matched(LZ_encoder_base *eb, uchar prev_byte, uchar symbol, uchar match_byte) { return price_matched(eb->bm_literal[get_lit_state(prev_byte)], symbol, match_byte); } static void LZeb_encode_literal(LZ_encoder_base *eb, uchar prev_byte, uchar symbol) { Re_encode_tree8(&eb->renc, eb->bm_literal[get_lit_state(prev_byte)], symbol); } static void LZeb_encode_matched(LZ_encoder_base *eb, uchar prev_byte, uchar symbol, uchar match_byte) { Re_encode_matched(&eb->renc, eb->bm_literal[get_lit_state(prev_byte)], symbol, match_byte); } static void LZeb_encode_pair(LZ_encoder_base *eb, unsigned dis, int len, int pos_state) { unsigned dis_slot = get_slot(dis); Re_encode_len(&eb->renc, &eb->match_len_model, len, pos_state); Re_encode_tree6(&eb->renc, eb->bm_dis_slot[get_len_state(len)], dis_slot); if (dis_slot >= start_dis_model) { int direct_bits = (dis_slot >> 1) - 1; unsigned base = (2 | (dis_slot & 1)) << direct_bits; unsigned direct_dis = dis - base; if (dis_slot < end_dis_model) Re_encode_tree_reversed(&eb->renc, eb->bm_dis + (base - dis_slot), direct_dis, direct_bits); else { Re_encode(&eb->renc, direct_dis >> dis_align_bits, direct_bits - dis_align_bits); Re_encode_tree_reversed(&eb->renc, eb->bm_align, direct_dis, dis_align_bits); } } } void LZeb_full_flush(LZ_encoder_base *eb, State state);