xref: /plan9-contrib/sys/src/cmd/lzip/decoder.h (revision 13d37d7716a3e781f408392d7869dff5927c6669)
1 /*  Clzip - LZMA lossless data compressor
2     Copyright (C) 2010-2017 Antonio Diaz Diaz.
3 
4     This program is free software: you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation, either version 2 of the License, or
7     (at your option) any later version.
8 
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13 
14     You should have received a copy of the GNU General Public License
15     along with this program.  If not, see <http://www.gnu.org/licenses/>. */
16 
17 enum { rd_buffer_size = 16384 };
18 
19 typedef struct LZ_decoder LZ_decoder;
20 typedef struct Range_decoder Range_decoder;
21 
22 struct Range_decoder {
23 	uvlong partial_member_pos;
24 	uchar * buffer;	/* input buffer */
25 	int	pos;		/* current pos in buffer */
26 	int	stream_pos;	/* when reached, a new block must be read */
27 	uint32_t code;
28 	uint32_t range;
29 	int	infd;		/* input file descriptor */
30 	bool at_stream_end;
31 };
32 
33 bool	Rd_read_block(Range_decoder *rdec);
34 
35 static bool
Rd_init(Range_decoder * rdec,int ifd)36 Rd_init(Range_decoder *rdec, int ifd)
37 {
38 	rdec->partial_member_pos = 0;
39 	rdec->buffer = (uchar *)malloc(rd_buffer_size);
40 	if (!rdec->buffer)
41 		return false;
42 	rdec->pos = 0;
43 	rdec->stream_pos = 0;
44 	rdec->code = 0;
45 	rdec->range = 0xFFFFFFFFU;
46 	rdec->infd = ifd;
47 	rdec->at_stream_end = false;
48 	return true;
49 }
50 
51 static void
Rd_free(Range_decoder * rdec)52 Rd_free(Range_decoder *rdec)
53 {
54 	free(rdec->buffer);
55 }
56 
57 static bool
Rd_finished(Range_decoder * rdec)58 Rd_finished(Range_decoder *rdec)
59 {
60 	return rdec->pos >= rdec->stream_pos && !Rd_read_block(rdec);
61 }
62 
63 static uvlong
Rd_member_position(Range_decoder * rdec)64 Rd_member_position(Range_decoder *rdec)
65 {
66 	return rdec->partial_member_pos + rdec->pos;
67 }
68 
69 static void
Rd_reset_member_position(Range_decoder * rdec)70 Rd_reset_member_position(Range_decoder *rdec)
71 {
72 	rdec->partial_member_pos = 0;
73 	rdec->partial_member_pos -= rdec->pos;
74 }
75 
76 static uchar
Rd_get_byte(Range_decoder * rdec)77 Rd_get_byte(Range_decoder *rdec)
78 {
79 	/* 0xFF avoids decoder error if member is truncated at EOS marker */
80 	if (Rd_finished(rdec))
81 		return 0xFF;
82 	return rdec->buffer[rdec->pos++];
83 }
84 
Rd_read_data(Range_decoder * rdec,uchar * outbuf,int size)85 static int Rd_read_data(Range_decoder *rdec, uchar *outbuf, int size)
86 {
87 	int sz = 0;
88 
89 	while (sz < size && !Rd_finished(rdec)) {
90 		int rd, rsz = size - sz;
91 		int rpos = rdec->stream_pos - rdec->pos;
92 
93 		if (rsz < rpos)
94 			rd = rsz;
95 		else
96 			rd = rpos;
97 		memcpy(outbuf + sz, rdec->buffer + rdec->pos, rd);
98 		rdec->pos += rd;
99 		sz += rd;
100 	}
101 	return sz;
102 }
103 
104 static void
Rd_load(Range_decoder * rdec)105 Rd_load(Range_decoder *rdec)
106 {
107 	int	i;
108 	rdec->code = 0;
109 	for (i = 0; i < 5; ++i)
110 		rdec->code = (rdec->code << 8) | Rd_get_byte(rdec);
111 	rdec->range = 0xFFFFFFFFU;
112 	rdec->code &= rdec->range;	/* make sure that first byte is discarded */
113 }
114 
115 static void
Rd_normalize(Range_decoder * rdec)116 Rd_normalize(Range_decoder *rdec)
117 {
118 	if (rdec->range <= 0x00FFFFFFU) {
119 		rdec->range <<= 8;
120 		rdec->code = (rdec->code << 8) | Rd_get_byte(rdec);
121 	}
122 }
123 
124 static unsigned
Rd_decode(Range_decoder * rdec,int num_bits)125 Rd_decode(Range_decoder *rdec, int num_bits)
126 {
127 	unsigned	symbol = 0;
128 	int	i;
129 	for (i = num_bits; i > 0; --i) {
130 		bool bit;
131 		Rd_normalize(rdec);
132 		rdec->range >>= 1;
133 		/*    symbol <<= 1; */
134 		/*    if(rdec->code >= rdec->range) { rdec->code -= rdec->range; symbol |= 1; } */
135 		bit = (rdec->code >= rdec->range);
136 		symbol = (symbol << 1) + bit;
137 		rdec->code -= rdec->range & (0U - bit);
138 	}
139 	return symbol;
140 }
141 
142 static unsigned
Rd_decode_bit(Range_decoder * rdec,Bit_model * probability)143 Rd_decode_bit(Range_decoder *rdec, Bit_model *probability)
144 {
145 	uint32_t bound;
146 	Rd_normalize(rdec);
147 	bound = (rdec->range >> bit_model_total_bits) * *probability;
148 	if (rdec->code < bound) {
149 		rdec->range = bound;
150 		*probability += (bit_model_total - *probability) >> bit_model_move_bits;
151 		return 0;
152 	} else {
153 		rdec->range -= bound;
154 		rdec->code -= bound;
155 		*probability -= *probability >> bit_model_move_bits;
156 		return 1;
157 	}
158 }
159 
160 static unsigned
Rd_decode_tree3(Range_decoder * rdec,Bit_model bm[])161 Rd_decode_tree3(Range_decoder *rdec, Bit_model bm[])
162 {
163 	unsigned	symbol = 1;
164 	symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
165 	symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
166 	symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
167 	return symbol & 7;
168 }
169 
170 static unsigned
Rd_decode_tree6(Range_decoder * rdec,Bit_model bm[])171 Rd_decode_tree6(Range_decoder *rdec, Bit_model bm[])
172 {
173 	unsigned	symbol = 1;
174 	symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
175 	symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
176 	symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
177 	symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
178 	symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
179 	symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
180 	return symbol & 0x3F;
181 }
182 
183 static unsigned
Rd_decode_tree8(Range_decoder * rdec,Bit_model bm[])184 Rd_decode_tree8(Range_decoder *rdec, Bit_model bm[])
185 {
186 	unsigned	symbol = 1;
187 	int	i;
188 	for (i = 0; i < 8; ++i)
189 		symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
190 	return symbol & 0xFF;
191 }
192 
193 static unsigned
Rd_decode_tree_reversed(Range_decoder * rdec,Bit_model bm[],int num_bits)194 Rd_decode_tree_reversed(Range_decoder *rdec, Bit_model bm[], int num_bits)
195 {
196 	unsigned model = 1;
197 	unsigned symbol = 0;
198 	int	i;
199 	for (i = 0; i < num_bits; ++i) {
200 		unsigned bit = Rd_decode_bit(rdec, &bm[model]);
201 		model = (model << 1) + bit;
202 		symbol |= (bit << i);
203 	}
204 	return symbol;
205 }
206 
207 static unsigned
Rd_decode_tree_reversed4(Range_decoder * rdec,Bit_model bm[])208 Rd_decode_tree_reversed4(Range_decoder *rdec, Bit_model bm[])
209 {
210 	unsigned	symbol = Rd_decode_bit(rdec, &bm[1]);
211 	unsigned	model = 2 + symbol;
212 	unsigned	bit = Rd_decode_bit(rdec, &bm[model]);
213 	model = (model << 1) + bit;
214 	symbol |= (bit << 1);
215 	bit = Rd_decode_bit(rdec, &bm[model]);
216 	model = (model << 1) + bit;
217 	symbol |= (bit << 2);
218 	symbol |= (Rd_decode_bit(rdec, &bm[model]) << 3);
219 	return symbol;
220 }
221 
222 static unsigned
Rd_decode_matched(Range_decoder * rdec,Bit_model bm[],unsigned match_byte)223 Rd_decode_matched(Range_decoder *rdec, Bit_model bm[], unsigned match_byte)
224 {
225 	unsigned	symbol = 1;
226 	unsigned	mask = 0x100;
227 	while (true) {
228 		unsigned match_bit = (match_byte <<= 1) & mask;
229 		unsigned bit = Rd_decode_bit(rdec, &bm[symbol+match_bit+mask]);
230 		symbol = (symbol << 1) + bit;
231 		if (symbol > 0xFF)
232 			return symbol & 0xFF;
233 		mask &= ~(match_bit ^ (bit << 8));	/* if(match_bit != bit) mask = 0; */
234 	}
235 }
236 
237 static unsigned
Rd_decode_len(struct Range_decoder * rdec,Len_model * lm,int pos_state)238 Rd_decode_len(struct Range_decoder *rdec, Len_model *lm, int pos_state)
239 {
240 	if (Rd_decode_bit(rdec, &lm->choice1) == 0)
241 		return Rd_decode_tree3(rdec, lm->bm_low[pos_state]);
242 	if (Rd_decode_bit(rdec, &lm->choice2) == 0)
243 		return len_low_syms + Rd_decode_tree3(rdec, lm->bm_mid[pos_state]);
244 	return len_low_syms + len_mid_syms + Rd_decode_tree8(rdec, lm->bm_high);
245 }
246 
247 struct LZ_decoder {
248 	uvlong	partial_data_pos;
249 	struct Range_decoder *rdec;
250 	unsigned dict_size;
251 	uchar * buffer;		/* output buffer */
252 	unsigned pos;			/* current pos in buffer */
253 	unsigned stream_pos;		/* first byte not yet written to file */
254 	uint32_t crc;
255 	int	outfd;			/* output file descriptor */
256 	bool pos_wrapped;
257 };
258 
259 void	LZd_flush_data(LZ_decoder *d);
260 
261 static uchar
LZd_peek_prev(LZ_decoder * d)262 LZd_peek_prev(LZ_decoder *d)
263 {
264 	if (d->pos > 0)
265 		return d->buffer[d->pos-1];
266 	if (d->pos_wrapped)
267 		return d->buffer[d->dict_size-1];
268 	return 0;			/* prev_byte of first byte */
269 }
270 
271 static uchar
LZd_peek(LZ_decoder * d,unsigned distance)272 LZd_peek(LZ_decoder *d,
273 unsigned distance)
274 {
275 	unsigned i = ((d->pos > distance) ? 0 : d->dict_size) +
276 	    d->pos - distance - 1;
277 	return d->buffer[i];
278 }
279 
280 static void
LZd_put_byte(LZ_decoder * d,uchar b)281 LZd_put_byte(LZ_decoder *d, uchar b)
282 {
283 	d->buffer[d->pos] = b;
284 	if (++d->pos >= d->dict_size)
285 		LZd_flush_data(d);
286 }
287 
288 static void
LZd_copy_block(LZ_decoder * d,unsigned distance,unsigned len)289 LZd_copy_block(LZ_decoder *d, unsigned distance, unsigned len)
290 {
291 	unsigned	lpos = d->pos, i = lpos -distance -1;
292 	bool fast, fast2;
293 
294 	if (lpos > distance) {
295 		fast = (len < d->dict_size - lpos);
296 		fast2 = (fast && len <= lpos - i);
297 	} else {
298 		i += d->dict_size;
299 		fast = (len < d->dict_size - i); /* (i == pos) may happen */
300 		fast2 = (fast && len <= i - lpos);
301 	}
302 	if (fast)				/* no wrap */ {
303 		d->pos += len;
304 		if (fast2)			/* no wrap, no overlap */
305 			memcpy(d->buffer + lpos, d->buffer + i, len);
306 		else
307 			for (; len > 0; --len)
308 				d->buffer[lpos++] = d->buffer[i++];
309 	} else
310 		for (; len > 0; --len) {
311 			d->buffer[d->pos] = d->buffer[i];
312 			if (++d->pos >= d->dict_size)
313 				LZd_flush_data(d);
314 			if (++i >= d->dict_size)
315 				i = 0;
316 		}
317 }
318 
319 static bool
LZd_init(struct LZ_decoder * d,Range_decoder * rde,unsigned dict_size,int ofd)320 LZd_init(struct LZ_decoder *d, Range_decoder *rde, unsigned dict_size, int ofd)
321 {
322 	d->partial_data_pos = 0;
323 	d->rdec = rde;
324 	d->dict_size = dict_size;
325 	d->buffer = (uchar *)malloc(d->dict_size);
326 	if (!d->buffer)
327 		return false;
328 	d->pos = 0;
329 	d->stream_pos = 0;
330 	d->crc = 0xFFFFFFFFU;
331 	d->outfd = ofd;
332 	d->pos_wrapped = false;
333 	return true;
334 }
335 
336 static void
LZd_free(LZ_decoder * d)337 LZd_free(LZ_decoder *d)
338 {
339 	free(d->buffer);
340 }
341 
342 static unsigned
LZd_crc(LZ_decoder * d)343 LZd_crc(LZ_decoder *d)
344 {
345 	return d->crc ^ 0xFFFFFFFFU;
346 }
347 
348 static uvlong
LZd_data_position(LZ_decoder * d)349 LZd_data_position(LZ_decoder *d)
350 {
351 	return d->partial_data_pos + d->pos;
352 }
353 
354 int	LZd_decode_member(struct LZ_decoder *d, Pretty_print *pp);
355