1 /* Clzip - LZMA lossless data compressor
2 Copyright (C) 2010-2017 Antonio Diaz Diaz.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
16
17 enum { rd_buffer_size = 16384 };
18
19 typedef struct LZ_decoder LZ_decoder;
20 typedef struct Range_decoder Range_decoder;
21
22 struct Range_decoder {
23 uvlong partial_member_pos;
24 uchar * buffer; /* input buffer */
25 int pos; /* current pos in buffer */
26 int stream_pos; /* when reached, a new block must be read */
27 uint32_t code;
28 uint32_t range;
29 int infd; /* input file descriptor */
30 bool at_stream_end;
31 };
32
33 bool Rd_read_block(Range_decoder *rdec);
34
35 static bool
Rd_init(Range_decoder * rdec,int ifd)36 Rd_init(Range_decoder *rdec, int ifd)
37 {
38 rdec->partial_member_pos = 0;
39 rdec->buffer = (uchar *)malloc(rd_buffer_size);
40 if (!rdec->buffer)
41 return false;
42 rdec->pos = 0;
43 rdec->stream_pos = 0;
44 rdec->code = 0;
45 rdec->range = 0xFFFFFFFFU;
46 rdec->infd = ifd;
47 rdec->at_stream_end = false;
48 return true;
49 }
50
51 static void
Rd_free(Range_decoder * rdec)52 Rd_free(Range_decoder *rdec)
53 {
54 free(rdec->buffer);
55 }
56
57 static bool
Rd_finished(Range_decoder * rdec)58 Rd_finished(Range_decoder *rdec)
59 {
60 return rdec->pos >= rdec->stream_pos && !Rd_read_block(rdec);
61 }
62
63 static uvlong
Rd_member_position(Range_decoder * rdec)64 Rd_member_position(Range_decoder *rdec)
65 {
66 return rdec->partial_member_pos + rdec->pos;
67 }
68
69 static void
Rd_reset_member_position(Range_decoder * rdec)70 Rd_reset_member_position(Range_decoder *rdec)
71 {
72 rdec->partial_member_pos = 0;
73 rdec->partial_member_pos -= rdec->pos;
74 }
75
76 static uchar
Rd_get_byte(Range_decoder * rdec)77 Rd_get_byte(Range_decoder *rdec)
78 {
79 /* 0xFF avoids decoder error if member is truncated at EOS marker */
80 if (Rd_finished(rdec))
81 return 0xFF;
82 return rdec->buffer[rdec->pos++];
83 }
84
Rd_read_data(Range_decoder * rdec,uchar * outbuf,int size)85 static int Rd_read_data(Range_decoder *rdec, uchar *outbuf, int size)
86 {
87 int sz = 0;
88
89 while (sz < size && !Rd_finished(rdec)) {
90 int rd, rsz = size - sz;
91 int rpos = rdec->stream_pos - rdec->pos;
92
93 if (rsz < rpos)
94 rd = rsz;
95 else
96 rd = rpos;
97 memcpy(outbuf + sz, rdec->buffer + rdec->pos, rd);
98 rdec->pos += rd;
99 sz += rd;
100 }
101 return sz;
102 }
103
104 static void
Rd_load(Range_decoder * rdec)105 Rd_load(Range_decoder *rdec)
106 {
107 int i;
108 rdec->code = 0;
109 for (i = 0; i < 5; ++i)
110 rdec->code = (rdec->code << 8) | Rd_get_byte(rdec);
111 rdec->range = 0xFFFFFFFFU;
112 rdec->code &= rdec->range; /* make sure that first byte is discarded */
113 }
114
115 static void
Rd_normalize(Range_decoder * rdec)116 Rd_normalize(Range_decoder *rdec)
117 {
118 if (rdec->range <= 0x00FFFFFFU) {
119 rdec->range <<= 8;
120 rdec->code = (rdec->code << 8) | Rd_get_byte(rdec);
121 }
122 }
123
124 static unsigned
Rd_decode(Range_decoder * rdec,int num_bits)125 Rd_decode(Range_decoder *rdec, int num_bits)
126 {
127 unsigned symbol = 0;
128 int i;
129 for (i = num_bits; i > 0; --i) {
130 bool bit;
131 Rd_normalize(rdec);
132 rdec->range >>= 1;
133 /* symbol <<= 1; */
134 /* if(rdec->code >= rdec->range) { rdec->code -= rdec->range; symbol |= 1; } */
135 bit = (rdec->code >= rdec->range);
136 symbol = (symbol << 1) + bit;
137 rdec->code -= rdec->range & (0U - bit);
138 }
139 return symbol;
140 }
141
142 static unsigned
Rd_decode_bit(Range_decoder * rdec,Bit_model * probability)143 Rd_decode_bit(Range_decoder *rdec, Bit_model *probability)
144 {
145 uint32_t bound;
146 Rd_normalize(rdec);
147 bound = (rdec->range >> bit_model_total_bits) * *probability;
148 if (rdec->code < bound) {
149 rdec->range = bound;
150 *probability += (bit_model_total - *probability) >> bit_model_move_bits;
151 return 0;
152 } else {
153 rdec->range -= bound;
154 rdec->code -= bound;
155 *probability -= *probability >> bit_model_move_bits;
156 return 1;
157 }
158 }
159
160 static unsigned
Rd_decode_tree3(Range_decoder * rdec,Bit_model bm[])161 Rd_decode_tree3(Range_decoder *rdec, Bit_model bm[])
162 {
163 unsigned symbol = 1;
164 symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
165 symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
166 symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
167 return symbol & 7;
168 }
169
170 static unsigned
Rd_decode_tree6(Range_decoder * rdec,Bit_model bm[])171 Rd_decode_tree6(Range_decoder *rdec, Bit_model bm[])
172 {
173 unsigned symbol = 1;
174 symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
175 symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
176 symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
177 symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
178 symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
179 symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
180 return symbol & 0x3F;
181 }
182
183 static unsigned
Rd_decode_tree8(Range_decoder * rdec,Bit_model bm[])184 Rd_decode_tree8(Range_decoder *rdec, Bit_model bm[])
185 {
186 unsigned symbol = 1;
187 int i;
188 for (i = 0; i < 8; ++i)
189 symbol = (symbol << 1) | Rd_decode_bit(rdec, &bm[symbol]);
190 return symbol & 0xFF;
191 }
192
193 static unsigned
Rd_decode_tree_reversed(Range_decoder * rdec,Bit_model bm[],int num_bits)194 Rd_decode_tree_reversed(Range_decoder *rdec, Bit_model bm[], int num_bits)
195 {
196 unsigned model = 1;
197 unsigned symbol = 0;
198 int i;
199 for (i = 0; i < num_bits; ++i) {
200 unsigned bit = Rd_decode_bit(rdec, &bm[model]);
201 model = (model << 1) + bit;
202 symbol |= (bit << i);
203 }
204 return symbol;
205 }
206
207 static unsigned
Rd_decode_tree_reversed4(Range_decoder * rdec,Bit_model bm[])208 Rd_decode_tree_reversed4(Range_decoder *rdec, Bit_model bm[])
209 {
210 unsigned symbol = Rd_decode_bit(rdec, &bm[1]);
211 unsigned model = 2 + symbol;
212 unsigned bit = Rd_decode_bit(rdec, &bm[model]);
213 model = (model << 1) + bit;
214 symbol |= (bit << 1);
215 bit = Rd_decode_bit(rdec, &bm[model]);
216 model = (model << 1) + bit;
217 symbol |= (bit << 2);
218 symbol |= (Rd_decode_bit(rdec, &bm[model]) << 3);
219 return symbol;
220 }
221
222 static unsigned
Rd_decode_matched(Range_decoder * rdec,Bit_model bm[],unsigned match_byte)223 Rd_decode_matched(Range_decoder *rdec, Bit_model bm[], unsigned match_byte)
224 {
225 unsigned symbol = 1;
226 unsigned mask = 0x100;
227 while (true) {
228 unsigned match_bit = (match_byte <<= 1) & mask;
229 unsigned bit = Rd_decode_bit(rdec, &bm[symbol+match_bit+mask]);
230 symbol = (symbol << 1) + bit;
231 if (symbol > 0xFF)
232 return symbol & 0xFF;
233 mask &= ~(match_bit ^ (bit << 8)); /* if(match_bit != bit) mask = 0; */
234 }
235 }
236
237 static unsigned
Rd_decode_len(struct Range_decoder * rdec,Len_model * lm,int pos_state)238 Rd_decode_len(struct Range_decoder *rdec, Len_model *lm, int pos_state)
239 {
240 if (Rd_decode_bit(rdec, &lm->choice1) == 0)
241 return Rd_decode_tree3(rdec, lm->bm_low[pos_state]);
242 if (Rd_decode_bit(rdec, &lm->choice2) == 0)
243 return len_low_syms + Rd_decode_tree3(rdec, lm->bm_mid[pos_state]);
244 return len_low_syms + len_mid_syms + Rd_decode_tree8(rdec, lm->bm_high);
245 }
246
247 struct LZ_decoder {
248 uvlong partial_data_pos;
249 struct Range_decoder *rdec;
250 unsigned dict_size;
251 uchar * buffer; /* output buffer */
252 unsigned pos; /* current pos in buffer */
253 unsigned stream_pos; /* first byte not yet written to file */
254 uint32_t crc;
255 int outfd; /* output file descriptor */
256 bool pos_wrapped;
257 };
258
259 void LZd_flush_data(LZ_decoder *d);
260
261 static uchar
LZd_peek_prev(LZ_decoder * d)262 LZd_peek_prev(LZ_decoder *d)
263 {
264 if (d->pos > 0)
265 return d->buffer[d->pos-1];
266 if (d->pos_wrapped)
267 return d->buffer[d->dict_size-1];
268 return 0; /* prev_byte of first byte */
269 }
270
271 static uchar
LZd_peek(LZ_decoder * d,unsigned distance)272 LZd_peek(LZ_decoder *d,
273 unsigned distance)
274 {
275 unsigned i = ((d->pos > distance) ? 0 : d->dict_size) +
276 d->pos - distance - 1;
277 return d->buffer[i];
278 }
279
280 static void
LZd_put_byte(LZ_decoder * d,uchar b)281 LZd_put_byte(LZ_decoder *d, uchar b)
282 {
283 d->buffer[d->pos] = b;
284 if (++d->pos >= d->dict_size)
285 LZd_flush_data(d);
286 }
287
288 static void
LZd_copy_block(LZ_decoder * d,unsigned distance,unsigned len)289 LZd_copy_block(LZ_decoder *d, unsigned distance, unsigned len)
290 {
291 unsigned lpos = d->pos, i = lpos -distance -1;
292 bool fast, fast2;
293
294 if (lpos > distance) {
295 fast = (len < d->dict_size - lpos);
296 fast2 = (fast && len <= lpos - i);
297 } else {
298 i += d->dict_size;
299 fast = (len < d->dict_size - i); /* (i == pos) may happen */
300 fast2 = (fast && len <= i - lpos);
301 }
302 if (fast) /* no wrap */ {
303 d->pos += len;
304 if (fast2) /* no wrap, no overlap */
305 memcpy(d->buffer + lpos, d->buffer + i, len);
306 else
307 for (; len > 0; --len)
308 d->buffer[lpos++] = d->buffer[i++];
309 } else
310 for (; len > 0; --len) {
311 d->buffer[d->pos] = d->buffer[i];
312 if (++d->pos >= d->dict_size)
313 LZd_flush_data(d);
314 if (++i >= d->dict_size)
315 i = 0;
316 }
317 }
318
319 static bool
LZd_init(struct LZ_decoder * d,Range_decoder * rde,unsigned dict_size,int ofd)320 LZd_init(struct LZ_decoder *d, Range_decoder *rde, unsigned dict_size, int ofd)
321 {
322 d->partial_data_pos = 0;
323 d->rdec = rde;
324 d->dict_size = dict_size;
325 d->buffer = (uchar *)malloc(d->dict_size);
326 if (!d->buffer)
327 return false;
328 d->pos = 0;
329 d->stream_pos = 0;
330 d->crc = 0xFFFFFFFFU;
331 d->outfd = ofd;
332 d->pos_wrapped = false;
333 return true;
334 }
335
336 static void
LZd_free(LZ_decoder * d)337 LZd_free(LZ_decoder *d)
338 {
339 free(d->buffer);
340 }
341
342 static unsigned
LZd_crc(LZ_decoder * d)343 LZd_crc(LZ_decoder *d)
344 {
345 return d->crc ^ 0xFFFFFFFFU;
346 }
347
348 static uvlong
LZd_data_position(LZ_decoder * d)349 LZd_data_position(LZ_decoder *d)
350 {
351 return d->partial_data_pos + d->pos;
352 }
353
354 int LZd_decode_member(struct LZ_decoder *d, Pretty_print *pp);
355