1*13d37d77SDavid du Colombier /* Clzip - LZMA lossless data compressor
2*13d37d77SDavid du Colombier Copyright (C) 2010-2017 Antonio Diaz Diaz.
3*13d37d77SDavid du Colombier
4*13d37d77SDavid du Colombier This program is free software: you can redistribute it and/or modify
5*13d37d77SDavid du Colombier it under the terms of the GNU General Public License as published by
6*13d37d77SDavid du Colombier the Free Software Foundation, either version 2 of the License, or
7*13d37d77SDavid du Colombier (at your option) any later version.
8*13d37d77SDavid du Colombier
9*13d37d77SDavid du Colombier This program is distributed in the hope that it will be useful,
10*13d37d77SDavid du Colombier but WITHOUT ANY WARRANTY; without even the implied warranty of
11*13d37d77SDavid du Colombier MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12*13d37d77SDavid du Colombier GNU General Public License for more details.
13*13d37d77SDavid du Colombier
14*13d37d77SDavid du Colombier You should have received a copy of the GNU General Public License
15*13d37d77SDavid du Colombier along with this program. If not, see <http://www.gnu.org/licenses/>. */
16*13d37d77SDavid du Colombier
17*13d37d77SDavid du Colombier #include "lzip.h"
18*13d37d77SDavid du Colombier #include "encoder_base.h"
19*13d37d77SDavid du Colombier
20*13d37d77SDavid du Colombier Dis_slots dis_slots;
21*13d37d77SDavid du Colombier Prob_prices prob_prices;
22*13d37d77SDavid du Colombier
23*13d37d77SDavid du Colombier bool
Mb_read_block(Matchfinder_base * mb)24*13d37d77SDavid du Colombier Mb_read_block(Matchfinder_base *mb)
25*13d37d77SDavid du Colombier {
26*13d37d77SDavid du Colombier if (!mb->at_stream_end && mb->stream_pos < mb->buffer_size) {
27*13d37d77SDavid du Colombier int size = mb->buffer_size - mb->stream_pos;
28*13d37d77SDavid du Colombier int rd = readblock(mb->infd, mb->buffer + mb->stream_pos, size);
29*13d37d77SDavid du Colombier
30*13d37d77SDavid du Colombier mb->stream_pos += rd;
31*13d37d77SDavid du Colombier if (rd != size && errno) {
32*13d37d77SDavid du Colombier show_error( "Read error", errno, false );
33*13d37d77SDavid du Colombier cleanup_and_fail(1);
34*13d37d77SDavid du Colombier }
35*13d37d77SDavid du Colombier if (rd < size) {
36*13d37d77SDavid du Colombier mb->at_stream_end = true;
37*13d37d77SDavid du Colombier mb->pos_limit = mb->buffer_size;
38*13d37d77SDavid du Colombier }
39*13d37d77SDavid du Colombier }
40*13d37d77SDavid du Colombier return mb->pos < mb->stream_pos;
41*13d37d77SDavid du Colombier }
42*13d37d77SDavid du Colombier
43*13d37d77SDavid du Colombier void
Mb_normalize_pos(Matchfinder_base * mb)44*13d37d77SDavid du Colombier Mb_normalize_pos(Matchfinder_base *mb)
45*13d37d77SDavid du Colombier {
46*13d37d77SDavid du Colombier if (mb->pos > mb->stream_pos)
47*13d37d77SDavid du Colombier internal_error( "pos > stream_pos in Mb_normalize_pos." );
48*13d37d77SDavid du Colombier if (!mb->at_stream_end) {
49*13d37d77SDavid du Colombier int i, offset = mb->pos - mb->before_size - mb->dict_size;
50*13d37d77SDavid du Colombier int size = mb->stream_pos - offset;
51*13d37d77SDavid du Colombier
52*13d37d77SDavid du Colombier memmove(mb->buffer, mb->buffer + offset, size);
53*13d37d77SDavid du Colombier mb->partial_data_pos += offset;
54*13d37d77SDavid du Colombier mb->pos -= offset; /* pos = before_size + dict_size */
55*13d37d77SDavid du Colombier mb->stream_pos -= offset;
56*13d37d77SDavid du Colombier for (i = 0; i < mb->num_prev_positions; ++i)
57*13d37d77SDavid du Colombier if (mb->prev_positions[i] < offset)
58*13d37d77SDavid du Colombier mb->prev_positions[i] = 0;
59*13d37d77SDavid du Colombier else
60*13d37d77SDavid du Colombier mb->prev_positions[i] -= offset;
61*13d37d77SDavid du Colombier for (i = 0; i < mb->pos_array_size; ++i)
62*13d37d77SDavid du Colombier if (mb->pos_array[i] < offset)
63*13d37d77SDavid du Colombier mb->pos_array[i] = 0;
64*13d37d77SDavid du Colombier else
65*13d37d77SDavid du Colombier mb->pos_array[i] -= offset;
66*13d37d77SDavid du Colombier Mb_read_block(mb);
67*13d37d77SDavid du Colombier }
68*13d37d77SDavid du Colombier }
69*13d37d77SDavid du Colombier
70*13d37d77SDavid du Colombier bool
Mb_init(Matchfinder_base * mb,int before,int dict_size,int after_size,int dict_factor,int num_prev_positions23,int pos_array_factor,int ifd)71*13d37d77SDavid du Colombier Mb_init(Matchfinder_base *mb, int before, int dict_size, int after_size, int dict_factor, int num_prev_positions23, int pos_array_factor, int ifd)
72*13d37d77SDavid du Colombier {
73*13d37d77SDavid du Colombier int buffer_size_limit = (dict_factor * dict_size) + before + after_size;
74*13d37d77SDavid du Colombier unsigned size;
75*13d37d77SDavid du Colombier int i;
76*13d37d77SDavid du Colombier
77*13d37d77SDavid du Colombier mb->partial_data_pos = 0;
78*13d37d77SDavid du Colombier mb->before_size = before;
79*13d37d77SDavid du Colombier mb->pos = 0;
80*13d37d77SDavid du Colombier mb->cyclic_pos = 0;
81*13d37d77SDavid du Colombier mb->stream_pos = 0;
82*13d37d77SDavid du Colombier mb->infd = ifd;
83*13d37d77SDavid du Colombier mb->at_stream_end = false;
84*13d37d77SDavid du Colombier
85*13d37d77SDavid du Colombier mb->buffer_size = max(65536, dict_size);
86*13d37d77SDavid du Colombier mb->buffer = (uchar *)malloc(mb->buffer_size);
87*13d37d77SDavid du Colombier if (!mb->buffer)
88*13d37d77SDavid du Colombier return false;
89*13d37d77SDavid du Colombier if (Mb_read_block(mb) && !mb->at_stream_end &&
90*13d37d77SDavid du Colombier mb->buffer_size < buffer_size_limit) {
91*13d37d77SDavid du Colombier uchar * tmp;
92*13d37d77SDavid du Colombier mb->buffer_size = buffer_size_limit;
93*13d37d77SDavid du Colombier tmp = (uchar *)realloc(mb->buffer, mb->buffer_size);
94*13d37d77SDavid du Colombier if (!tmp) {
95*13d37d77SDavid du Colombier free(mb->buffer);
96*13d37d77SDavid du Colombier return false;
97*13d37d77SDavid du Colombier }
98*13d37d77SDavid du Colombier mb->buffer = tmp;
99*13d37d77SDavid du Colombier Mb_read_block(mb);
100*13d37d77SDavid du Colombier }
101*13d37d77SDavid du Colombier if (mb->at_stream_end && mb->stream_pos < dict_size)
102*13d37d77SDavid du Colombier mb->dict_size = max(min_dict_size, mb->stream_pos);
103*13d37d77SDavid du Colombier else
104*13d37d77SDavid du Colombier mb->dict_size = dict_size;
105*13d37d77SDavid du Colombier mb->pos_limit = mb->buffer_size;
106*13d37d77SDavid du Colombier if (!mb->at_stream_end)
107*13d37d77SDavid du Colombier mb->pos_limit -= after_size;
108*13d37d77SDavid du Colombier size = real_bits(mb->dict_size - 1) - 2;
109*13d37d77SDavid du Colombier if (size < 16)
110*13d37d77SDavid du Colombier size = 16;
111*13d37d77SDavid du Colombier size = 1 << size;
112*13d37d77SDavid du Colombier // if (mb->dict_size > (1 << 26)) /* 64 MiB */
113*13d37d77SDavid du Colombier // size >>= 1;
114*13d37d77SDavid du Colombier mb->key4_mask = size - 1;
115*13d37d77SDavid du Colombier size += num_prev_positions23;
116*13d37d77SDavid du Colombier
117*13d37d77SDavid du Colombier mb->num_prev_positions = size;
118*13d37d77SDavid du Colombier mb->pos_array_size = pos_array_factor * (mb->dict_size + 1);
119*13d37d77SDavid du Colombier size += mb->pos_array_size;
120*13d37d77SDavid du Colombier if (size * sizeof mb->prev_positions[0] <= size)
121*13d37d77SDavid du Colombier mb->prev_positions = 0;
122*13d37d77SDavid du Colombier else
123*13d37d77SDavid du Colombier mb->prev_positions =
124*13d37d77SDavid du Colombier (int32_t *)malloc(size * sizeof mb->prev_positions[0]);
125*13d37d77SDavid du Colombier if (!mb->prev_positions) {
126*13d37d77SDavid du Colombier free(mb->buffer);
127*13d37d77SDavid du Colombier return false;
128*13d37d77SDavid du Colombier }
129*13d37d77SDavid du Colombier mb->pos_array = mb->prev_positions + mb->num_prev_positions;
130*13d37d77SDavid du Colombier for (i = 0; i < mb->num_prev_positions; ++i)
131*13d37d77SDavid du Colombier mb->prev_positions[i] = 0;
132*13d37d77SDavid du Colombier return true;
133*13d37d77SDavid du Colombier }
134*13d37d77SDavid du Colombier
135*13d37d77SDavid du Colombier void
Mb_reset(Matchfinder_base * mb)136*13d37d77SDavid du Colombier Mb_reset(Matchfinder_base *mb)
137*13d37d77SDavid du Colombier {
138*13d37d77SDavid du Colombier int i;
139*13d37d77SDavid du Colombier
140*13d37d77SDavid du Colombier if (mb->stream_pos > mb->pos)
141*13d37d77SDavid du Colombier memmove(mb->buffer, mb->buffer + mb->pos, mb->stream_pos - mb->pos);
142*13d37d77SDavid du Colombier mb->partial_data_pos = 0;
143*13d37d77SDavid du Colombier mb->stream_pos -= mb->pos;
144*13d37d77SDavid du Colombier mb->pos = 0;
145*13d37d77SDavid du Colombier mb->cyclic_pos = 0;
146*13d37d77SDavid du Colombier for (i = 0; i < mb->num_prev_positions; ++i)
147*13d37d77SDavid du Colombier mb->prev_positions[i] = 0;
148*13d37d77SDavid du Colombier Mb_read_block(mb);
149*13d37d77SDavid du Colombier }
150*13d37d77SDavid du Colombier
151*13d37d77SDavid du Colombier void
Re_flush_data(Range_encoder * renc)152*13d37d77SDavid du Colombier Re_flush_data(Range_encoder *renc)
153*13d37d77SDavid du Colombier {
154*13d37d77SDavid du Colombier if (renc->pos > 0) {
155*13d37d77SDavid du Colombier if (renc->outfd >= 0 &&
156*13d37d77SDavid du Colombier writeblock(renc->outfd, renc->buffer, renc->pos) != renc->pos) {
157*13d37d77SDavid du Colombier show_error( "Write error", errno, false );
158*13d37d77SDavid du Colombier cleanup_and_fail(1);
159*13d37d77SDavid du Colombier }
160*13d37d77SDavid du Colombier renc->partial_member_pos += renc->pos;
161*13d37d77SDavid du Colombier renc->pos = 0;
162*13d37d77SDavid du Colombier show_progress(0, 0, 0, 0);
163*13d37d77SDavid du Colombier }
164*13d37d77SDavid du Colombier }
165*13d37d77SDavid du Colombier
166*13d37d77SDavid du Colombier /* End Of Stream mark => (dis == 0xFFFFFFFFU, len == min_match_len) */
167*13d37d77SDavid du Colombier void
LZeb_full_flush(LZ_encoder_base * eb,State state)168*13d37d77SDavid du Colombier LZeb_full_flush(LZ_encoder_base *eb, State state)
169*13d37d77SDavid du Colombier {
170*13d37d77SDavid du Colombier int i;
171*13d37d77SDavid du Colombier int pos_state = Mb_data_position(&eb->mb) & pos_state_mask;
172*13d37d77SDavid du Colombier File_trailer trailer;
173*13d37d77SDavid du Colombier Re_encode_bit(&eb->renc, &eb->bm_match[state][pos_state], 1);
174*13d37d77SDavid du Colombier Re_encode_bit(&eb->renc, &eb->bm_rep[state], 0);
175*13d37d77SDavid du Colombier LZeb_encode_pair(eb, 0xFFFFFFFFU, min_match_len, pos_state);
176*13d37d77SDavid du Colombier Re_flush(&eb->renc);
177*13d37d77SDavid du Colombier Ft_set_data_crc(trailer, LZeb_crc(eb));
178*13d37d77SDavid du Colombier Ft_set_data_size(trailer, Mb_data_position(&eb->mb));
179*13d37d77SDavid du Colombier Ft_set_member_size(trailer, Re_member_position(&eb->renc) + Ft_size);
180*13d37d77SDavid du Colombier for (i = 0; i < Ft_size; ++i)
181*13d37d77SDavid du Colombier Re_put_byte(&eb->renc, trailer[i]);
182*13d37d77SDavid du Colombier Re_flush_data(&eb->renc);
183*13d37d77SDavid du Colombier }
184*13d37d77SDavid du Colombier
185*13d37d77SDavid du Colombier void
LZeb_reset(LZ_encoder_base * eb)186*13d37d77SDavid du Colombier LZeb_reset(LZ_encoder_base *eb)
187*13d37d77SDavid du Colombier {
188*13d37d77SDavid du Colombier Mb_reset(&eb->mb);
189*13d37d77SDavid du Colombier eb->crc = 0xFFFFFFFFU;
190*13d37d77SDavid du Colombier Bm_array_init(eb->bm_literal[0], (1 << literal_context_bits) * 0x300);
191*13d37d77SDavid du Colombier Bm_array_init(eb->bm_match[0], states * pos_states);
192*13d37d77SDavid du Colombier Bm_array_init(eb->bm_rep, states);
193*13d37d77SDavid du Colombier Bm_array_init(eb->bm_rep0, states);
194*13d37d77SDavid du Colombier Bm_array_init(eb->bm_rep1, states);
195*13d37d77SDavid du Colombier Bm_array_init(eb->bm_rep2, states);
196*13d37d77SDavid du Colombier Bm_array_init(eb->bm_len[0], states * pos_states);
197*13d37d77SDavid du Colombier Bm_array_init(eb->bm_dis_slot[0], len_states * (1 << dis_slot_bits));
198*13d37d77SDavid du Colombier Bm_array_init(eb->bm_dis, modeled_distances - end_dis_model + 1);
199*13d37d77SDavid du Colombier Bm_array_init(eb->bm_align, dis_align_size);
200*13d37d77SDavid du Colombier Lm_init(&eb->match_len_model);
201*13d37d77SDavid du Colombier Lm_init(&eb->rep_len_model);
202*13d37d77SDavid du Colombier Re_reset(&eb->renc);
203*13d37d77SDavid du Colombier }
204