1*13d37d77SDavid du Colombier /* Clzip - LZMA lossless data compressor
2*13d37d77SDavid du Colombier Copyright (C) 2010-2017 Antonio Diaz Diaz.
3*13d37d77SDavid du Colombier
4*13d37d77SDavid du Colombier This program is free software: you can redistribute it and/or modify
5*13d37d77SDavid du Colombier it under the terms of the GNU General Public License as published by
6*13d37d77SDavid du Colombier the Free Software Foundation, either version 2 of the License, or
7*13d37d77SDavid du Colombier (at your option) any later version.
8*13d37d77SDavid du Colombier
9*13d37d77SDavid du Colombier This program is distributed in the hope that it will be useful,
10*13d37d77SDavid du Colombier but WITHOUT ANY WARRANTY; without even the implied warranty of
11*13d37d77SDavid du Colombier MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12*13d37d77SDavid du Colombier GNU General Public License for more details.
13*13d37d77SDavid du Colombier
14*13d37d77SDavid du Colombier You should have received a copy of the GNU General Public License
15*13d37d77SDavid du Colombier along with this program. If not, see <http://www.gnu.org/licenses/>. */
16*13d37d77SDavid du Colombier
17*13d37d77SDavid du Colombier #include "lzip.h"
18*13d37d77SDavid du Colombier #include "decoder.h"
19*13d37d77SDavid du Colombier
20*13d37d77SDavid du Colombier void
Pp_show_msg(Pretty_print * pp,char * msg)21*13d37d77SDavid du Colombier Pp_show_msg(Pretty_print *pp, char *msg)
22*13d37d77SDavid du Colombier {
23*13d37d77SDavid du Colombier if (verbosity >= 0) {
24*13d37d77SDavid du Colombier if (pp->first_post) {
25*13d37d77SDavid du Colombier unsigned i;
26*13d37d77SDavid du Colombier
27*13d37d77SDavid du Colombier pp->first_post = false;
28*13d37d77SDavid du Colombier fprintf(stderr, "%s: ", pp->name);
29*13d37d77SDavid du Colombier for (i = strlen(pp->name); i < pp->longest_name; ++i)
30*13d37d77SDavid du Colombier fputc(' ', stderr);
31*13d37d77SDavid du Colombier if (!msg)
32*13d37d77SDavid du Colombier fflush(stderr);
33*13d37d77SDavid du Colombier }
34*13d37d77SDavid du Colombier if (msg)
35*13d37d77SDavid du Colombier fprintf(stderr, "%s\n", msg);
36*13d37d77SDavid du Colombier }
37*13d37d77SDavid du Colombier }
38*13d37d77SDavid du Colombier
39*13d37d77SDavid du Colombier /* Returns the number of bytes really read.
40*13d37d77SDavid du Colombier If returned value < size and no read error, means EOF was reached.
41*13d37d77SDavid du Colombier */
42*13d37d77SDavid du Colombier int
readblock(int fd,uchar * buf,int size)43*13d37d77SDavid du Colombier readblock(int fd, uchar *buf, int size)
44*13d37d77SDavid du Colombier {
45*13d37d77SDavid du Colombier int n, sz;
46*13d37d77SDavid du Colombier
47*13d37d77SDavid du Colombier for (sz = 0; sz < size; sz += n) {
48*13d37d77SDavid du Colombier n = read(fd, buf + sz, size - sz);
49*13d37d77SDavid du Colombier if (n <= 0)
50*13d37d77SDavid du Colombier break;
51*13d37d77SDavid du Colombier }
52*13d37d77SDavid du Colombier return sz;
53*13d37d77SDavid du Colombier }
54*13d37d77SDavid du Colombier
55*13d37d77SDavid du Colombier /* Returns the number of bytes really written.
56*13d37d77SDavid du Colombier If (returned value < size), it is always an error.
57*13d37d77SDavid du Colombier */
58*13d37d77SDavid du Colombier int
writeblock(int fd,uchar * buf,int size)59*13d37d77SDavid du Colombier writeblock(int fd, uchar *buf, int size)
60*13d37d77SDavid du Colombier {
61*13d37d77SDavid du Colombier int n, sz;
62*13d37d77SDavid du Colombier
63*13d37d77SDavid du Colombier for (sz = 0; sz < size; sz += n) {
64*13d37d77SDavid du Colombier n = write(fd, buf + sz, size - sz);
65*13d37d77SDavid du Colombier if (n != size - sz)
66*13d37d77SDavid du Colombier break;
67*13d37d77SDavid du Colombier }
68*13d37d77SDavid du Colombier return sz;
69*13d37d77SDavid du Colombier }
70*13d37d77SDavid du Colombier
71*13d37d77SDavid du Colombier bool
Rd_read_block(Range_decoder * rdec)72*13d37d77SDavid du Colombier Rd_read_block(Range_decoder *rdec)
73*13d37d77SDavid du Colombier {
74*13d37d77SDavid du Colombier if (!rdec->at_stream_end) {
75*13d37d77SDavid du Colombier rdec->stream_pos = readblock(rdec->infd, rdec->buffer, rd_buffer_size);
76*13d37d77SDavid du Colombier if (rdec->stream_pos != rd_buffer_size && errno) {
77*13d37d77SDavid du Colombier show_error( "Read error", errno, false );
78*13d37d77SDavid du Colombier cleanup_and_fail(1);
79*13d37d77SDavid du Colombier }
80*13d37d77SDavid du Colombier rdec->at_stream_end = (rdec->stream_pos < rd_buffer_size);
81*13d37d77SDavid du Colombier rdec->partial_member_pos += rdec->pos;
82*13d37d77SDavid du Colombier rdec->pos = 0;
83*13d37d77SDavid du Colombier }
84*13d37d77SDavid du Colombier return rdec->pos < rdec->stream_pos;
85*13d37d77SDavid du Colombier }
86*13d37d77SDavid du Colombier
87*13d37d77SDavid du Colombier void
LZd_flush_data(LZ_decoder * d)88*13d37d77SDavid du Colombier LZd_flush_data(LZ_decoder *d)
89*13d37d77SDavid du Colombier {
90*13d37d77SDavid du Colombier if (d->pos > d->stream_pos) {
91*13d37d77SDavid du Colombier int size = d->pos - d->stream_pos;
92*13d37d77SDavid du Colombier CRC32_update_buf(&d->crc, d->buffer + d->stream_pos, size);
93*13d37d77SDavid du Colombier if (d->outfd >= 0 &&
94*13d37d77SDavid du Colombier writeblock(d->outfd, d->buffer + d->stream_pos, size) != size) {
95*13d37d77SDavid du Colombier show_error( "Write error", errno, false );
96*13d37d77SDavid du Colombier cleanup_and_fail(1);
97*13d37d77SDavid du Colombier }
98*13d37d77SDavid du Colombier if (d->pos >= d->dict_size) {
99*13d37d77SDavid du Colombier d->partial_data_pos += d->pos;
100*13d37d77SDavid du Colombier d->pos = 0;
101*13d37d77SDavid du Colombier d->pos_wrapped = true;
102*13d37d77SDavid du Colombier }
103*13d37d77SDavid du Colombier d->stream_pos = d->pos;
104*13d37d77SDavid du Colombier }
105*13d37d77SDavid du Colombier }
106*13d37d77SDavid du Colombier
107*13d37d77SDavid du Colombier static bool
LZd_verify_trailer(LZ_decoder * d,Pretty_print * pp)108*13d37d77SDavid du Colombier LZd_verify_trailer(LZ_decoder *d, Pretty_print *pp)
109*13d37d77SDavid du Colombier {
110*13d37d77SDavid du Colombier File_trailer trailer;
111*13d37d77SDavid du Colombier int size = Rd_read_data(d->rdec, trailer, Ft_size);
112*13d37d77SDavid du Colombier uvlong data_size = LZd_data_position(d);
113*13d37d77SDavid du Colombier uvlong member_size = Rd_member_position(d->rdec);
114*13d37d77SDavid du Colombier bool error = false;
115*13d37d77SDavid du Colombier
116*13d37d77SDavid du Colombier if (size < Ft_size) {
117*13d37d77SDavid du Colombier error = true;
118*13d37d77SDavid du Colombier if (verbosity >= 0) {
119*13d37d77SDavid du Colombier Pp_show_msg(pp, 0);
120*13d37d77SDavid du Colombier fprintf( stderr, "Trailer truncated at trailer position %d;"
121*13d37d77SDavid du Colombier " some checks may fail.\n", size );
122*13d37d77SDavid du Colombier }
123*13d37d77SDavid du Colombier while (size < Ft_size)
124*13d37d77SDavid du Colombier trailer[size++] = 0;
125*13d37d77SDavid du Colombier }
126*13d37d77SDavid du Colombier
127*13d37d77SDavid du Colombier if (Ft_get_data_crc(trailer) != LZd_crc(d)) {
128*13d37d77SDavid du Colombier error = true;
129*13d37d77SDavid du Colombier if (verbosity >= 0) {
130*13d37d77SDavid du Colombier Pp_show_msg(pp, 0);
131*13d37d77SDavid du Colombier fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X\n",
132*13d37d77SDavid du Colombier Ft_get_data_crc(trailer), LZd_crc(d));
133*13d37d77SDavid du Colombier }
134*13d37d77SDavid du Colombier }
135*13d37d77SDavid du Colombier if (Ft_get_data_size(trailer) != data_size) {
136*13d37d77SDavid du Colombier error = true;
137*13d37d77SDavid du Colombier if (verbosity >= 0) {
138*13d37d77SDavid du Colombier Pp_show_msg(pp, 0);
139*13d37d77SDavid du Colombier fprintf( stderr, "Data size mismatch; trailer says %llud, data size is %llud (0x%lluX)\n",
140*13d37d77SDavid du Colombier Ft_get_data_size(trailer), data_size, data_size);
141*13d37d77SDavid du Colombier }
142*13d37d77SDavid du Colombier }
143*13d37d77SDavid du Colombier if (Ft_get_member_size(trailer) != member_size) {
144*13d37d77SDavid du Colombier error = true;
145*13d37d77SDavid du Colombier if (verbosity >= 0) {
146*13d37d77SDavid du Colombier Pp_show_msg(pp, 0);
147*13d37d77SDavid du Colombier fprintf(stderr, "Member size mismatch; trailer says %llud, member size is %llud (0x%lluX)\n",
148*13d37d77SDavid du Colombier Ft_get_member_size(trailer), member_size, member_size);
149*13d37d77SDavid du Colombier }
150*13d37d77SDavid du Colombier }
151*13d37d77SDavid du Colombier if (0 && !error && verbosity >= 2 && data_size > 0 && member_size > 0)
152*13d37d77SDavid du Colombier fprintf(stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
153*13d37d77SDavid du Colombier (double)data_size / member_size,
154*13d37d77SDavid du Colombier (8.0 * member_size) / data_size,
155*13d37d77SDavid du Colombier 100.0 * (1.0 - (double)member_size / data_size));
156*13d37d77SDavid du Colombier if (!error && verbosity >= 4)
157*13d37d77SDavid du Colombier fprintf( stderr, "CRC %08X, decompressed %9llud, compressed %8llud. ",
158*13d37d77SDavid du Colombier LZd_crc(d), data_size, member_size);
159*13d37d77SDavid du Colombier return !error;
160*13d37d77SDavid du Colombier }
161*13d37d77SDavid du Colombier
162*13d37d77SDavid du Colombier /* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
163*13d37d77SDavid du Colombier 3 = trailer error, 4 = unknown marker found. */
164*13d37d77SDavid du Colombier int
LZd_decode_member(LZ_decoder * d,Pretty_print * pp)165*13d37d77SDavid du Colombier LZd_decode_member(LZ_decoder *d, Pretty_print *pp)
166*13d37d77SDavid du Colombier {
167*13d37d77SDavid du Colombier Range_decoder *rdec = d->rdec;
168*13d37d77SDavid du Colombier Bit_model bm_literal[1<<literal_context_bits][0x300];
169*13d37d77SDavid du Colombier Bit_model bm_match[states][pos_states];
170*13d37d77SDavid du Colombier Bit_model bm_rep[states];
171*13d37d77SDavid du Colombier Bit_model bm_rep0[states];
172*13d37d77SDavid du Colombier Bit_model bm_rep1[states];
173*13d37d77SDavid du Colombier Bit_model bm_rep2[states];
174*13d37d77SDavid du Colombier Bit_model bm_len[states][pos_states];
175*13d37d77SDavid du Colombier Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
176*13d37d77SDavid du Colombier Bit_model bm_dis[modeled_distances-end_dis_model+1];
177*13d37d77SDavid du Colombier Bit_model bm_align[dis_align_size];
178*13d37d77SDavid du Colombier Len_model match_len_model;
179*13d37d77SDavid du Colombier Len_model rep_len_model;
180*13d37d77SDavid du Colombier unsigned rep0 = 0; /* rep[0-3] latest four distances */
181*13d37d77SDavid du Colombier unsigned rep1 = 0; /* used for efficient coding of */
182*13d37d77SDavid du Colombier unsigned rep2 = 0; /* repeated distances */
183*13d37d77SDavid du Colombier unsigned rep3 = 0;
184*13d37d77SDavid du Colombier State state = 0;
185*13d37d77SDavid du Colombier
186*13d37d77SDavid du Colombier Bm_array_init(bm_literal[0], (1 << literal_context_bits) * 0x300);
187*13d37d77SDavid du Colombier Bm_array_init(bm_match[0], states * pos_states);
188*13d37d77SDavid du Colombier Bm_array_init(bm_rep, states);
189*13d37d77SDavid du Colombier Bm_array_init(bm_rep0, states);
190*13d37d77SDavid du Colombier Bm_array_init(bm_rep1, states);
191*13d37d77SDavid du Colombier Bm_array_init(bm_rep2, states);
192*13d37d77SDavid du Colombier Bm_array_init(bm_len[0], states * pos_states);
193*13d37d77SDavid du Colombier Bm_array_init(bm_dis_slot[0], len_states * (1 << dis_slot_bits));
194*13d37d77SDavid du Colombier Bm_array_init(bm_dis, modeled_distances - end_dis_model + 1);
195*13d37d77SDavid du Colombier Bm_array_init(bm_align, dis_align_size);
196*13d37d77SDavid du Colombier Lm_init(&match_len_model);
197*13d37d77SDavid du Colombier Lm_init(&rep_len_model);
198*13d37d77SDavid du Colombier
199*13d37d77SDavid du Colombier Rd_load(rdec);
200*13d37d77SDavid du Colombier while (!Rd_finished(rdec)) {
201*13d37d77SDavid du Colombier int pos_state = LZd_data_position(d) & pos_state_mask;
202*13d37d77SDavid du Colombier if (Rd_decode_bit(rdec, &bm_match[state][pos_state]) == 0) /* 1st bit */ {
203*13d37d77SDavid du Colombier Bit_model * bm = bm_literal[get_lit_state(LZd_peek_prev(d))];
204*13d37d77SDavid du Colombier if (St_is_char(state)) {
205*13d37d77SDavid du Colombier state -= (state < 4) ? state : 3;
206*13d37d77SDavid du Colombier LZd_put_byte(d, Rd_decode_tree8(rdec, bm));
207*13d37d77SDavid du Colombier } else {
208*13d37d77SDavid du Colombier state -= (state < 10) ? 3 : 6;
209*13d37d77SDavid du Colombier LZd_put_byte(d, Rd_decode_matched(rdec, bm, LZd_peek(d, rep0)));
210*13d37d77SDavid du Colombier }
211*13d37d77SDavid du Colombier } else /* match or repeated match */ {
212*13d37d77SDavid du Colombier int len;
213*13d37d77SDavid du Colombier if (Rd_decode_bit(rdec, &bm_rep[state]) != 0) /* 2nd bit */ {
214*13d37d77SDavid du Colombier if (Rd_decode_bit(rdec, &bm_rep0[state]) == 0) /* 3rd bit */ {
215*13d37d77SDavid du Colombier if (Rd_decode_bit(rdec, &bm_len[state][pos_state]) == 0) /* 4th bit */ {
216*13d37d77SDavid du Colombier state = St_set_short_rep(state);
217*13d37d77SDavid du Colombier LZd_put_byte(d, LZd_peek(d, rep0));
218*13d37d77SDavid du Colombier continue;
219*13d37d77SDavid du Colombier }
220*13d37d77SDavid du Colombier } else {
221*13d37d77SDavid du Colombier unsigned distance;
222*13d37d77SDavid du Colombier if (Rd_decode_bit(rdec, &bm_rep1[state]) == 0) /* 4th bit */
223*13d37d77SDavid du Colombier distance = rep1;
224*13d37d77SDavid du Colombier else {
225*13d37d77SDavid du Colombier if (Rd_decode_bit(rdec, &bm_rep2[state]) == 0) /* 5th bit */
226*13d37d77SDavid du Colombier distance = rep2;
227*13d37d77SDavid du Colombier else {
228*13d37d77SDavid du Colombier distance = rep3;
229*13d37d77SDavid du Colombier rep3 = rep2;
230*13d37d77SDavid du Colombier }
231*13d37d77SDavid du Colombier rep2 = rep1;
232*13d37d77SDavid du Colombier }
233*13d37d77SDavid du Colombier rep1 = rep0;
234*13d37d77SDavid du Colombier rep0 = distance;
235*13d37d77SDavid du Colombier }
236*13d37d77SDavid du Colombier state = St_set_rep(state);
237*13d37d77SDavid du Colombier len = min_match_len + Rd_decode_len(rdec, &rep_len_model, pos_state);
238*13d37d77SDavid du Colombier } else /* match */ {
239*13d37d77SDavid du Colombier unsigned distance;
240*13d37d77SDavid du Colombier len = min_match_len + Rd_decode_len(rdec, &match_len_model, pos_state);
241*13d37d77SDavid du Colombier distance = Rd_decode_tree6(rdec, bm_dis_slot[get_len_state(len)]);
242*13d37d77SDavid du Colombier if (distance >= start_dis_model) {
243*13d37d77SDavid du Colombier unsigned dis_slot = distance;
244*13d37d77SDavid du Colombier int direct_bits = (dis_slot >> 1) - 1;
245*13d37d77SDavid du Colombier distance = (2 | (dis_slot & 1)) << direct_bits;
246*13d37d77SDavid du Colombier if (dis_slot < end_dis_model)
247*13d37d77SDavid du Colombier distance += Rd_decode_tree_reversed(rdec,
248*13d37d77SDavid du Colombier bm_dis + (distance - dis_slot), direct_bits);
249*13d37d77SDavid du Colombier else {
250*13d37d77SDavid du Colombier distance +=
251*13d37d77SDavid du Colombier Rd_decode(rdec, direct_bits - dis_align_bits) << dis_align_bits;
252*13d37d77SDavid du Colombier distance += Rd_decode_tree_reversed4(rdec, bm_align);
253*13d37d77SDavid du Colombier if (distance == 0xFFFFFFFFU) /* marker found */ {
254*13d37d77SDavid du Colombier Rd_normalize(rdec);
255*13d37d77SDavid du Colombier LZd_flush_data(d);
256*13d37d77SDavid du Colombier if (len == min_match_len) /* End Of Stream marker */ {
257*13d37d77SDavid du Colombier if (LZd_verify_trailer(d, pp))
258*13d37d77SDavid du Colombier /* code folded from here */
259*13d37d77SDavid du Colombier return 0;
260*13d37d77SDavid du Colombier /* unfolding */
261*13d37d77SDavid du Colombier else
262*13d37d77SDavid du Colombier /* code folded from here */
263*13d37d77SDavid du Colombier return 3;
264*13d37d77SDavid du Colombier /* unfolding */
265*13d37d77SDavid du Colombier }
266*13d37d77SDavid du Colombier if (len == min_match_len + 1) /* Sync Flush marker */ {
267*13d37d77SDavid du Colombier Rd_load(rdec);
268*13d37d77SDavid du Colombier continue;
269*13d37d77SDavid du Colombier }
270*13d37d77SDavid du Colombier if (verbosity >= 0) {
271*13d37d77SDavid du Colombier Pp_show_msg(pp, 0);
272*13d37d77SDavid du Colombier fprintf( stderr, "Unsupported marker code '%d'\n", len );
273*13d37d77SDavid du Colombier }
274*13d37d77SDavid du Colombier return 4;
275*13d37d77SDavid du Colombier }
276*13d37d77SDavid du Colombier }
277*13d37d77SDavid du Colombier }
278*13d37d77SDavid du Colombier rep3 = rep2;
279*13d37d77SDavid du Colombier rep2 = rep1;
280*13d37d77SDavid du Colombier rep1 = rep0;
281*13d37d77SDavid du Colombier rep0 = distance;
282*13d37d77SDavid du Colombier state = St_set_match(state);
283*13d37d77SDavid du Colombier if (rep0 >= d->dict_size || (rep0 >= d->pos && !d->pos_wrapped)) {
284*13d37d77SDavid du Colombier LZd_flush_data(d);
285*13d37d77SDavid du Colombier return 1;
286*13d37d77SDavid du Colombier }
287*13d37d77SDavid du Colombier }
288*13d37d77SDavid du Colombier LZd_copy_block(d, rep0, len);
289*13d37d77SDavid du Colombier }
290*13d37d77SDavid du Colombier }
291*13d37d77SDavid du Colombier LZd_flush_data(d);
292*13d37d77SDavid du Colombier return 2;
293*13d37d77SDavid du Colombier }
294*13d37d77SDavid du Colombier
295