xref: /plan9-contrib/sys/src/cmd/lzip/decoder.c (revision 13d37d7716a3e781f408392d7869dff5927c6669)
1 /*  Clzip - LZMA lossless data compressor
2     Copyright (C) 2010-2017 Antonio Diaz Diaz.
3 
4     This program is free software: you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation, either version 2 of the License, or
7     (at your option) any later version.
8 
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13 
14     You should have received a copy of the GNU General Public License
15     along with this program.  If not, see <http://www.gnu.org/licenses/>. */
16 
17 #include "lzip.h"
18 #include "decoder.h"
19 
20 void
Pp_show_msg(Pretty_print * pp,char * msg)21 Pp_show_msg(Pretty_print *pp, char *msg)
22 {
23 	if (verbosity >= 0) {
24 		if (pp->first_post) {
25 			unsigned i;
26 
27 			pp->first_post = false;
28 			fprintf(stderr, "%s: ", pp->name);
29 			for (i = strlen(pp->name); i < pp->longest_name; ++i)
30 				fputc(' ', stderr);
31 			if (!msg)
32 				fflush(stderr);
33 		}
34 		if (msg)
35 			fprintf(stderr, "%s\n", msg);
36 	}
37 }
38 
39 /* Returns the number of bytes really read.
40    If returned value < size and no read error, means EOF was reached.
41  */
42 int
readblock(int fd,uchar * buf,int size)43 readblock(int fd, uchar *buf, int size)
44 {
45 	int n, sz;
46 
47 	for (sz = 0; sz < size; sz += n) {
48 		n = read(fd, buf + sz, size - sz);
49 		if (n <= 0)
50 			break;
51 	}
52 	return sz;
53 }
54 
55 /* Returns the number of bytes really written.
56    If (returned value < size), it is always an error.
57  */
58 int
writeblock(int fd,uchar * buf,int size)59 writeblock(int fd, uchar *buf, int size)
60 {
61 	int n, sz;
62 
63 	for (sz = 0; sz < size; sz += n) {
64 		n = write(fd, buf + sz, size - sz);
65 		if (n != size - sz)
66 			break;
67 	}
68 	return sz;
69 }
70 
71 bool
Rd_read_block(Range_decoder * rdec)72 Rd_read_block(Range_decoder *rdec)
73 {
74 	if (!rdec->at_stream_end) {
75 		rdec->stream_pos = readblock(rdec->infd, rdec->buffer, rd_buffer_size);
76 		if (rdec->stream_pos != rd_buffer_size && errno) {
77 			show_error( "Read error", errno, false );
78 			cleanup_and_fail(1);
79 		}
80 		rdec->at_stream_end = (rdec->stream_pos < rd_buffer_size);
81 		rdec->partial_member_pos += rdec->pos;
82 		rdec->pos = 0;
83 	}
84 	return rdec->pos < rdec->stream_pos;
85 }
86 
87 void
LZd_flush_data(LZ_decoder * d)88 LZd_flush_data(LZ_decoder *d)
89 {
90 	if (d->pos > d->stream_pos) {
91 		int size = d->pos - d->stream_pos;
92 		CRC32_update_buf(&d->crc, d->buffer + d->stream_pos, size);
93 		if (d->outfd >= 0 &&
94 		    writeblock(d->outfd, d->buffer + d->stream_pos, size) != size) {
95 			show_error( "Write error", errno, false );
96 			cleanup_and_fail(1);
97 		}
98 		if (d->pos >= d->dict_size) {
99 			d->partial_data_pos += d->pos;
100 			d->pos = 0;
101 			d->pos_wrapped = true;
102 		}
103 		d->stream_pos = d->pos;
104 	}
105 }
106 
107 static bool
LZd_verify_trailer(LZ_decoder * d,Pretty_print * pp)108 LZd_verify_trailer(LZ_decoder *d, Pretty_print *pp)
109 {
110 	File_trailer trailer;
111 	int	size = Rd_read_data(d->rdec, trailer, Ft_size);
112 	uvlong data_size = LZd_data_position(d);
113 	uvlong member_size = Rd_member_position(d->rdec);
114 	bool error = false;
115 
116 	if (size < Ft_size) {
117 		error = true;
118 		if (verbosity >= 0) {
119 			Pp_show_msg(pp, 0);
120 			fprintf( stderr, "Trailer truncated at trailer position %d;"
121 			    " some checks may fail.\n", size );
122 		}
123 		while (size < Ft_size)
124 			trailer[size++] = 0;
125 	}
126 
127 	if (Ft_get_data_crc(trailer) != LZd_crc(d)) {
128 		error = true;
129 		if (verbosity >= 0) {
130 			Pp_show_msg(pp, 0);
131 			fprintf( stderr, "CRC mismatch; trailer says %08X, data CRC is %08X\n",
132 			    Ft_get_data_crc(trailer), LZd_crc(d));
133 		}
134 	}
135 	if (Ft_get_data_size(trailer) != data_size) {
136 		error = true;
137 		if (verbosity >= 0) {
138 			Pp_show_msg(pp, 0);
139 			fprintf( stderr, "Data size mismatch; trailer says %llud, data size is %llud (0x%lluX)\n",
140 			    Ft_get_data_size(trailer), data_size, data_size);
141 		}
142 	}
143 	if (Ft_get_member_size(trailer) != member_size) {
144 		error = true;
145 		if (verbosity >= 0) {
146 			Pp_show_msg(pp, 0);
147 			fprintf(stderr, "Member size mismatch; trailer says %llud, member size is %llud (0x%lluX)\n",
148 			    Ft_get_member_size(trailer), member_size, member_size);
149 		}
150 	}
151 	if (0 && !error && verbosity >= 2 && data_size > 0 && member_size > 0)
152 		fprintf(stderr, "%6.3f:1, %6.3f bits/byte, %5.2f%% saved.  ",
153 		    (double)data_size / member_size,
154 		    (8.0 * member_size) / data_size,
155 		    100.0 * (1.0 - (double)member_size / data_size));
156 	if (!error && verbosity >= 4)
157 		fprintf( stderr, "CRC %08X, decompressed %9llud, compressed %8llud.  ",
158 		    LZd_crc(d), data_size, member_size);
159 	return !error;
160 }
161 
162 /* Return value: 0 = OK, 1 = decoder error, 2 = unexpected EOF,
163                  3 = trailer error, 4 = unknown marker found. */
164 int
LZd_decode_member(LZ_decoder * d,Pretty_print * pp)165 LZd_decode_member(LZ_decoder *d, Pretty_print *pp)
166 {
167 	Range_decoder *rdec = d->rdec;
168 	Bit_model bm_literal[1<<literal_context_bits][0x300];
169 	Bit_model bm_match[states][pos_states];
170 	Bit_model bm_rep[states];
171 	Bit_model bm_rep0[states];
172 	Bit_model bm_rep1[states];
173 	Bit_model bm_rep2[states];
174 	Bit_model bm_len[states][pos_states];
175 	Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
176 	Bit_model bm_dis[modeled_distances-end_dis_model+1];
177 	Bit_model bm_align[dis_align_size];
178 	Len_model match_len_model;
179 	Len_model rep_len_model;
180 	unsigned	rep0 = 0;		/* rep[0-3] latest four distances */
181 	unsigned	rep1 = 0;		/* used for efficient coding of */
182 	unsigned	rep2 = 0;		/* repeated distances */
183 	unsigned	rep3 = 0;
184 	State state = 0;
185 
186 	Bm_array_init(bm_literal[0], (1 << literal_context_bits) * 0x300);
187 	Bm_array_init(bm_match[0], states * pos_states);
188 	Bm_array_init(bm_rep, states);
189 	Bm_array_init(bm_rep0, states);
190 	Bm_array_init(bm_rep1, states);
191 	Bm_array_init(bm_rep2, states);
192 	Bm_array_init(bm_len[0], states * pos_states);
193 	Bm_array_init(bm_dis_slot[0], len_states * (1 << dis_slot_bits));
194 	Bm_array_init(bm_dis, modeled_distances - end_dis_model + 1);
195 	Bm_array_init(bm_align, dis_align_size);
196 	Lm_init(&match_len_model);
197 	Lm_init(&rep_len_model);
198 
199 	Rd_load(rdec);
200 	while (!Rd_finished(rdec)) {
201 		int pos_state = LZd_data_position(d) & pos_state_mask;
202 		if (Rd_decode_bit(rdec, &bm_match[state][pos_state]) == 0)	/* 1st bit */ {
203 			Bit_model * bm = bm_literal[get_lit_state(LZd_peek_prev(d))];
204 			if (St_is_char(state)) {
205 				state -= (state < 4) ? state : 3;
206 				LZd_put_byte(d, Rd_decode_tree8(rdec, bm));
207 			} else {
208 				state -= (state < 10) ? 3 : 6;
209 				LZd_put_byte(d, Rd_decode_matched(rdec, bm, LZd_peek(d, rep0)));
210 			}
211 		} else	/* match or repeated match */      {
212 			int	len;
213 			if (Rd_decode_bit(rdec, &bm_rep[state]) != 0)		/* 2nd bit */ {
214 				if (Rd_decode_bit(rdec, &bm_rep0[state]) == 0)	/* 3rd bit */ {
215 					if (Rd_decode_bit(rdec, &bm_len[state][pos_state]) == 0) /* 4th bit */ {
216 						state = St_set_short_rep(state);
217 						LZd_put_byte(d, LZd_peek(d, rep0));
218 						continue;
219 					}
220 				} else {
221 					unsigned	distance;
222 					if (Rd_decode_bit(rdec, &bm_rep1[state]) == 0)	/* 4th bit */
223 						distance = rep1;
224 					else {
225 						if (Rd_decode_bit(rdec, &bm_rep2[state]) == 0)	/* 5th bit */
226 							distance = rep2;
227 						else {
228 							distance = rep3;
229 							rep3 = rep2;
230 						}
231 						rep2 = rep1;
232 					}
233 					rep1 = rep0;
234 					rep0 = distance;
235 				}
236 				state = St_set_rep(state);
237 				len = min_match_len + Rd_decode_len(rdec, &rep_len_model, pos_state);
238 			} else	/* match */        {
239 				unsigned	distance;
240 				len = min_match_len + Rd_decode_len(rdec, &match_len_model, pos_state);
241 				distance = Rd_decode_tree6(rdec, bm_dis_slot[get_len_state(len)]);
242 				if (distance >= start_dis_model) {
243 					unsigned dis_slot = distance;
244 					int direct_bits = (dis_slot >> 1) - 1;
245 					distance = (2 | (dis_slot & 1)) << direct_bits;
246 					if (dis_slot < end_dis_model)
247 						distance += Rd_decode_tree_reversed(rdec,
248 						    bm_dis + (distance - dis_slot), direct_bits);
249 					else {
250 						distance +=
251 						    Rd_decode(rdec, direct_bits - dis_align_bits) << dis_align_bits;
252 						distance += Rd_decode_tree_reversed4(rdec, bm_align);
253 						if (distance == 0xFFFFFFFFU)		/* marker found */ {
254 							Rd_normalize(rdec);
255 							LZd_flush_data(d);
256 							if (len == min_match_len)	/* End Of Stream marker */ {
257 								if (LZd_verify_trailer(d, pp))
258 /* code folded from here */
259 	return 0;
260 /* unfolding */
261 								else
262 /* code folded from here */
263 	return 3;
264 /* unfolding */
265 							}
266 							if (len == min_match_len + 1)	/* Sync Flush marker */ {
267 								Rd_load(rdec);
268 								continue;
269 							}
270 							if (verbosity >= 0) {
271 								Pp_show_msg(pp, 0);
272 								fprintf( stderr, "Unsupported marker code '%d'\n", len );
273 							}
274 							return 4;
275 						}
276 					}
277 				}
278 				rep3 = rep2;
279 				rep2 = rep1;
280 				rep1 = rep0;
281 				rep0 = distance;
282 				state = St_set_match(state);
283 				if (rep0 >= d->dict_size || (rep0 >= d->pos && !d->pos_wrapped)) {
284 					LZd_flush_data(d);
285 					return 1;
286 				}
287 			}
288 			LZd_copy_block(d, rep0, len);
289 		}
290 	}
291 	LZd_flush_data(d);
292 	return 2;
293 }
294 
295