1 /* Clzip - LZMA lossless data compressor
2 Copyright (C) 2010-2017 Antonio Diaz Diaz.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17 #include "lzip.h"
18 #include "decoder.h"
19
20 void
Pp_show_msg(Pretty_print * pp,char * msg)21 Pp_show_msg(Pretty_print *pp, char *msg)
22 {
23 if (verbosity >= 0) {
24 if (pp->first_post) {
25 unsigned i;
26
27 pp->first_post = false;
28 fprintf(stderr, "%s: ", pp->name);
29 for (i = strlen(pp->name); i < pp->longest_name; ++i)
30 fputc(' ', stderr);
31 if (!msg)
32 fflush(stderr);
33 }
34 if (msg)
35 fprintf(stderr, "%s\n", msg);
36 }
37 }
38
39 /* Returns the number of bytes really read.
40 If returned value < size and no read error, means EOF was reached.
41 */
42 int
readblock(int fd,uchar * buf,int size)43 readblock(int fd, uchar *buf, int size)
44 {
45 int n, sz;
46
47 for (sz = 0; sz < size; sz += n) {
48 n = read(fd, buf + sz, size - sz);
49 if (n <= 0)
50 break;
51 }
52 return sz;
53 }
54
55 /* Returns the number of bytes really written.
56 If (returned value < size), it is always an error.
57 */
58 int
writeblock(int fd,uchar * buf,int size)59 writeblock(int fd, uchar *buf, int size)
60 {
61 int n, sz;
62
63 for (sz = 0; sz < size; sz += n) {
64 n = write(fd, buf + sz, size - sz);
65 if (n != size - sz)
66 break;
67 }
68 return sz;
69 }
70
71 bool
Rd_read_block(Range_decoder * rdec)72 Rd_read_block(Range_decoder *rdec)
73 {
74 if (!rdec->at_stream_end) {
75 rdec->stream_pos = readblock(rdec->infd, rdec->buffer, rd_buffer_size);
76 if (rdec->stream_pos != rd_buffer_size && errno) {
77 show_error( "Read error", errno, false );
78 cleanup_and_fail(1);
79 }
80 rdec->at_stream_end = (rdec->stream_pos < rd_buffer_size);
81 rdec->partial_member_pos += rdec->pos;
82 rdec->pos = 0;
83 }
84 return rdec->pos < rdec->stream_pos;
85 }
86
87 void
LZd_flush_data(LZ_decoder * d)88 LZd_flush_data(LZ_decoder *d)
89 {
90 if (d->pos > d->stream_pos) {
91 int size = d->pos - d->stream_pos;
92 CRC32_update_buf(&d->crc, d->buffer + d->stream_pos, size);
93 if (d->outfd >= 0 &&
94 writeblock(d->outfd, d->buffer + d->stream_pos, size) != size) {
95 show_error( "Write error", errno, false );
96 cleanup_and_fail(1);
97 }
98 if (d->pos >= d->dict_size) {
99 d->partial_data_pos += d->pos;
100 d->pos = 0;
101 d->pos_wrapped = true;
102 }
103 d->stream_pos = d->pos;
104 }
105 }
106
107 static bool
LZd_verify_trailer(LZ_decoder * d,Pretty_print * pp)108 LZd_verify_trailer(LZ_decoder *d, Pretty_print *pp)
109 {
110 File_trailer trailer;
111 int size = Rd_read_data(d->rdec, trailer, Ft_size);
112 uvlong data_size = LZd_data_position(d);
113 uvlong member_size = Rd_member_position(d->rdec);
114 bool error = false;
115
116 if (size < Ft_size) {
117 error = true;
118 if (verbosity >= 0) {
119 Pp_show_msg(pp, 0);
120 fprintf( stderr, "Trailer truncated at trailer position %d;"
121 " some checks may fail.\n", size );
122 }
123 while (size < Ft_size)
124 trailer[size++] = 0;
125 }
126
127 if (Ft_get_data_crc(trailer) != LZd_crc(d)) {
128 error = true;
129 if (verbosity >= 0) {
130 Pp_show_msg(pp, 0);
131 fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X\n",
132 Ft_get_data_crc(trailer), LZd_crc(d));
133 }
134 }
135 if (Ft_get_data_size(trailer) != data_size) {
136 error = true;
137 if (verbosity >= 0) {
138 Pp_show_msg(pp, 0);
139 fprintf( stderr, "Data size mismatch; trailer says %llud, data size is %llud (0x%lluX)\n",
140 Ft_get_data_size(trailer), data_size, data_size);
141 }
142 }
143 if (Ft_get_member_size(trailer) != member_size) {
144 error = true;
145 if (verbosity >= 0) {
146 Pp_show_msg(pp, 0);
147 fprintf(stderr, "Member size mismatch; trailer says %llud, member size is %llud (0x%lluX)\n",
148 Ft_get_member_size(trailer), member_size, member_size);
149 }
150 }
151 if (0 && !error && verbosity >= 2 && data_size > 0 && member_size > 0)
152 fprintf(stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved. ",
153 (double)data_size / member_size,
154 (8.0 * member_size) / data_size,
155 100.0 * (1.0 - (double)member_size / data_size));
156 if (!error && verbosity >= 4)
157 fprintf( stderr, "CRC %08X, decompressed %9llud, compressed %8llud. ",
158 LZd_crc(d), data_size, member_size);
159 return !error;
160 }
161
162 /* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
163 3 = trailer error, 4 = unknown marker found. */
164 int
LZd_decode_member(LZ_decoder * d,Pretty_print * pp)165 LZd_decode_member(LZ_decoder *d, Pretty_print *pp)
166 {
167 Range_decoder *rdec = d->rdec;
168 Bit_model bm_literal[1<<literal_context_bits][0x300];
169 Bit_model bm_match[states][pos_states];
170 Bit_model bm_rep[states];
171 Bit_model bm_rep0[states];
172 Bit_model bm_rep1[states];
173 Bit_model bm_rep2[states];
174 Bit_model bm_len[states][pos_states];
175 Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
176 Bit_model bm_dis[modeled_distances-end_dis_model+1];
177 Bit_model bm_align[dis_align_size];
178 Len_model match_len_model;
179 Len_model rep_len_model;
180 unsigned rep0 = 0; /* rep[0-3] latest four distances */
181 unsigned rep1 = 0; /* used for efficient coding of */
182 unsigned rep2 = 0; /* repeated distances */
183 unsigned rep3 = 0;
184 State state = 0;
185
186 Bm_array_init(bm_literal[0], (1 << literal_context_bits) * 0x300);
187 Bm_array_init(bm_match[0], states * pos_states);
188 Bm_array_init(bm_rep, states);
189 Bm_array_init(bm_rep0, states);
190 Bm_array_init(bm_rep1, states);
191 Bm_array_init(bm_rep2, states);
192 Bm_array_init(bm_len[0], states * pos_states);
193 Bm_array_init(bm_dis_slot[0], len_states * (1 << dis_slot_bits));
194 Bm_array_init(bm_dis, modeled_distances - end_dis_model + 1);
195 Bm_array_init(bm_align, dis_align_size);
196 Lm_init(&match_len_model);
197 Lm_init(&rep_len_model);
198
199 Rd_load(rdec);
200 while (!Rd_finished(rdec)) {
201 int pos_state = LZd_data_position(d) & pos_state_mask;
202 if (Rd_decode_bit(rdec, &bm_match[state][pos_state]) == 0) /* 1st bit */ {
203 Bit_model * bm = bm_literal[get_lit_state(LZd_peek_prev(d))];
204 if (St_is_char(state)) {
205 state -= (state < 4) ? state : 3;
206 LZd_put_byte(d, Rd_decode_tree8(rdec, bm));
207 } else {
208 state -= (state < 10) ? 3 : 6;
209 LZd_put_byte(d, Rd_decode_matched(rdec, bm, LZd_peek(d, rep0)));
210 }
211 } else /* match or repeated match */ {
212 int len;
213 if (Rd_decode_bit(rdec, &bm_rep[state]) != 0) /* 2nd bit */ {
214 if (Rd_decode_bit(rdec, &bm_rep0[state]) == 0) /* 3rd bit */ {
215 if (Rd_decode_bit(rdec, &bm_len[state][pos_state]) == 0) /* 4th bit */ {
216 state = St_set_short_rep(state);
217 LZd_put_byte(d, LZd_peek(d, rep0));
218 continue;
219 }
220 } else {
221 unsigned distance;
222 if (Rd_decode_bit(rdec, &bm_rep1[state]) == 0) /* 4th bit */
223 distance = rep1;
224 else {
225 if (Rd_decode_bit(rdec, &bm_rep2[state]) == 0) /* 5th bit */
226 distance = rep2;
227 else {
228 distance = rep3;
229 rep3 = rep2;
230 }
231 rep2 = rep1;
232 }
233 rep1 = rep0;
234 rep0 = distance;
235 }
236 state = St_set_rep(state);
237 len = min_match_len + Rd_decode_len(rdec, &rep_len_model, pos_state);
238 } else /* match */ {
239 unsigned distance;
240 len = min_match_len + Rd_decode_len(rdec, &match_len_model, pos_state);
241 distance = Rd_decode_tree6(rdec, bm_dis_slot[get_len_state(len)]);
242 if (distance >= start_dis_model) {
243 unsigned dis_slot = distance;
244 int direct_bits = (dis_slot >> 1) - 1;
245 distance = (2 | (dis_slot & 1)) << direct_bits;
246 if (dis_slot < end_dis_model)
247 distance += Rd_decode_tree_reversed(rdec,
248 bm_dis + (distance - dis_slot), direct_bits);
249 else {
250 distance +=
251 Rd_decode(rdec, direct_bits - dis_align_bits) << dis_align_bits;
252 distance += Rd_decode_tree_reversed4(rdec, bm_align);
253 if (distance == 0xFFFFFFFFU) /* marker found */ {
254 Rd_normalize(rdec);
255 LZd_flush_data(d);
256 if (len == min_match_len) /* End Of Stream marker */ {
257 if (LZd_verify_trailer(d, pp))
258 /* code folded from here */
259 return 0;
260 /* unfolding */
261 else
262 /* code folded from here */
263 return 3;
264 /* unfolding */
265 }
266 if (len == min_match_len + 1) /* Sync Flush marker */ {
267 Rd_load(rdec);
268 continue;
269 }
270 if (verbosity >= 0) {
271 Pp_show_msg(pp, 0);
272 fprintf( stderr, "Unsupported marker code '%d'\n", len );
273 }
274 return 4;
275 }
276 }
277 }
278 rep3 = rep2;
279 rep2 = rep1;
280 rep1 = rep0;
281 rep0 = distance;
282 state = St_set_match(state);
283 if (rep0 >= d->dict_size || (rep0 >= d->pos && !d->pos_wrapped)) {
284 LZd_flush_data(d);
285 return 1;
286 }
287 }
288 LZd_copy_block(d, rep0, len);
289 }
290 }
291 LZd_flush_data(d);
292 return 2;
293 }
294
295