1 /* Clzip - LZMA lossless data compressor
2 Copyright (C) 2010-2017 Antonio Diaz Diaz.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #ifndef _LZIP_H
19 #define _LZIP_H
20
21 #include <u.h>
22 #include <libc.h>
23 #include <stdio.h>
24 #include <ctype.h>
25
26 #define exit(n) exits((n) == 0? 0: "err")
27 #define isatty(fd) 0
28 #define lseek seek
29
30 #ifndef max
31 #define max(x,y) ((x) >= (y) ? (x) : (y))
32 #endif
33 #ifndef min
34 #define min(x,y) ((x) <= (y) ? (x) : (y))
35 #endif
36
37 typedef int State;
38 typedef long int32_t;
39 typedef ulong uint32_t;
40 typedef int bool;
41
42 enum { false, true };
43
44 enum { states = 12 };
45 enum {
46 min_dict_bits = 12,
47 min_dict_size = 1 << min_dict_bits, /* >= modeled_distances */
48 max_dict_bits = 29,
49 max_dict_size = 1 << max_dict_bits,
50 min_member_size = 36,
51 literal_context_bits = 3,
52 literal_pos_state_bits = 0, /* not used */
53 pos_state_bits = 2,
54 pos_states = 1 << pos_state_bits,
55 pos_state_mask = pos_states -1,
56
57 len_states = 4,
58 dis_slot_bits = 6,
59 start_dis_model = 4,
60 end_dis_model = 14,
61 modeled_distances = 1 << (end_dis_model / 2), /* 128 */
62 dis_align_bits = 4,
63 dis_align_size = 1 << dis_align_bits,
64
65 len_low_bits = 3,
66 len_mid_bits = 3,
67 len_high_bits = 8,
68 len_low_syms = 1 << len_low_bits,
69 len_mid_syms = 1 << len_mid_bits,
70 len_high_syms = 1 << len_high_bits,
71 max_len_syms = len_low_syms + len_mid_syms + len_high_syms,
72
73 min_match_len = 2, /* must be 2 */
74 max_match_len = min_match_len + max_len_syms - 1, /* 273 */
75 min_match_len_limit = 5,
76
77 bit_model_move_bits = 5,
78 bit_model_total_bits = 11,
79 bit_model_total = 1 << bit_model_total_bits,
80 };
81
82 typedef struct Len_model Len_model;
83 typedef struct Pretty_print Pretty_print;
84 typedef struct Matchfinder_base Matchfinder_base;
85 typedef int Bit_model;
86
87 struct Len_model {
88 Bit_model choice1;
89 Bit_model choice2;
90 Bit_model bm_low[pos_states][len_low_syms];
91 Bit_model bm_mid[pos_states][len_mid_syms];
92 Bit_model bm_high[len_high_syms];
93 };
94 struct Pretty_print {
95 char *name;
96 char *stdin_name;
97 ulong longest_name;
98 bool first_post;
99 };
100
101 typedef ulong CRC32[256]; /* Table of CRCs of all 8-bit messages. */
102
103 extern CRC32 crc32;
104
105 #define errno 0
106
107 static uchar magic_string[4] = { "LZIP" };
108
109 typedef uchar File_header[6]; /* 0-3 magic bytes */
110 /* 4 version */
111 /* 5 coded_dict_size */
112 enum { Fh_size = 6 };
113
114 typedef uchar File_trailer[20];
115 /* 0-3 CRC32 of the uncompressed data */
116 /* 4-11 size of the uncompressed data */
117 /* 12-19 member size including header and trailer */
118
119 enum { Ft_size = 20 };
120
121 enum {
122 price_shift_bits = 6,
123 price_step_bits = 2,
124 price_step = 1 << price_step_bits,
125 };
126
127 typedef uchar Dis_slots[1<<10];
128 typedef short Prob_prices[bit_model_total >> price_step_bits];
129
130 extern Dis_slots dis_slots;
131 extern Prob_prices prob_prices;
132
133 #define get_price(prob) prob_prices[(prob) >> price_step_bits]
134 #define price0(prob) get_price(prob)
135 #define price1(prob) get_price(bit_model_total - (prob))
136 #define price_bit(bm, bit) ((bit)? price1(bm): price0(bm))
137
138 #define Mb_ptr_to_current_pos(mb) ((mb)->buffer + (mb)->pos)
139 #define Mb_peek(mb, distance) (mb)->buffer[(mb)->pos - (distance)]
140
141 #define Lp_price(lp, len, pos_state) \
142 (lp)->prices[pos_state][(len) - min_match_len]
143
144 #define Tr_update(trial, pr, distance4, p_i) \
145 { \
146 if ((pr) < (trial)->price) { \
147 (trial)->price = pr; \
148 (trial)->dis4 = distance4; \
149 (trial)->prev_index = p_i; \
150 (trial)->prev_index2 = single_step_trial; \
151 } else { \
152 } \
153 }
154
155 /* these functions are now extern and must be defined exactly once */
156 #ifdef _DEFINE_INLINES
157 #define _INLINES_DEFINED
158
159 int
get_len_state(int len)160 get_len_state(int len)
161 {
162 int lenstm1, lenmmm;
163
164 lenmmm = len - min_match_len;
165 lenstm1 = len_states - 1;
166 if (lenmmm < lenstm1)
167 return lenmmm;
168 else
169 return lenstm1;
170 }
171
172 State
St_set_char(State st)173 St_set_char(State st)
174 {
175 static State next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
176
177 assert((unsigned)st < nelem(next));
178 return next[st];
179 }
180
181 int
get_lit_state(uchar prev_byte)182 get_lit_state(uchar prev_byte)
183 {
184 return prev_byte >> (8 - literal_context_bits);
185 }
186
187 void
Bm_init(Bit_model * probability)188 Bm_init(Bit_model *probability)
189 {
190 *probability = bit_model_total / 2;
191 }
192
193 void
Bm_array_init(Bit_model bm[],int size)194 Bm_array_init(Bit_model bm[], int size)
195 {
196 int i;
197
198 for (i = 0; i < size; ++i)
199 Bm_init(&bm[i]);
200 }
201
202 void
Lm_init(Len_model * lm)203 Lm_init(Len_model *lm)
204 {
205 Bm_init(&lm->choice1);
206 Bm_init(&lm->choice2);
207 Bm_array_init(lm->bm_low[0], pos_states * len_low_syms);
208 Bm_array_init(lm->bm_mid[0], pos_states * len_mid_syms);
209 Bm_array_init(lm->bm_high, len_high_syms);
210 }
211
212 void
Pp_init(Pretty_print * pp,char * filenames[],int num_filenames,int verbosity)213 Pp_init(Pretty_print *pp, char *filenames[], int num_filenames, int verbosity)
214 {
215 unsigned stdin_name_len;
216 int i;
217
218 pp->name = 0;
219 pp->stdin_name = "(stdin)";
220 pp->longest_name = 0;
221 pp->first_post = false;
222
223 if (verbosity <= 0)
224 return;
225 stdin_name_len = strlen(pp->stdin_name);
226 for (i = 0; i < num_filenames; ++i) {
227 char *s = filenames[i];
228 unsigned len = strcmp(s, "-") == 0? stdin_name_len: strlen(s);
229
230 if (len > pp->longest_name)
231 pp->longest_name = len;
232 }
233 if (pp->longest_name == 0)
234 pp->longest_name = stdin_name_len;
235 }
236
237 void
Pp_set_name(Pretty_print * pp,char * filename)238 Pp_set_name(Pretty_print *pp, char *filename)
239 {
240 if ( filename && filename[0] && strcmp( filename, "-" ) != 0 )
241 pp->name = filename;
242 else
243 pp->name = pp->stdin_name;
244 pp->first_post = true;
245 }
246
247 void
Pp_reset(Pretty_print * pp)248 Pp_reset(Pretty_print *pp)
249 {
250 if (pp->name && pp->name[0])
251 pp->first_post = true;
252 }
253
254 void
255 Pp_show_msg(Pretty_print *pp, char *msg);
256
257 void
CRC32_init(void)258 CRC32_init(void)
259 {
260 unsigned n;
261
262 for (n = 0; n < 256; ++n) {
263 unsigned c = n;
264 int k;
265 for (k = 0; k < 8; ++k) {
266 if (c & 1)
267 c = 0xEDB88320U ^ (c >> 1);
268 else
269 c >>= 1;
270 }
271 crc32[n] = c;
272 }
273 }
274
275 void
CRC32_update_byte(uint32_t * crc,uchar byte)276 CRC32_update_byte(uint32_t *crc, uchar byte)
277 {
278 *crc = crc32[(*crc^byte)&0xFF] ^ (*crc >> 8);
279 }
280
281 void
CRC32_update_buf(uint32_t * crc,uchar * buffer,int size)282 CRC32_update_buf(uint32_t *crc, uchar *buffer, int size)
283 {
284 int i;
285 uint32_t c = *crc;
286 for (i = 0; i < size; ++i)
287 c = crc32[(c^buffer[i])&0xFF] ^ (c >> 8);
288 *crc = c;
289 }
290
291 bool
isvalid_ds(unsigned dict_size)292 isvalid_ds(unsigned dict_size)
293 {
294 return (dict_size >= min_dict_size &&
295 dict_size <= max_dict_size);
296 }
297
298 int
real_bits(unsigned value)299 real_bits(unsigned value)
300 {
301 int bits = 0;
302
303 while (value > 0) {
304 value >>= 1;
305 ++bits;
306 }
307 return bits;
308 }
309
310 void
Fh_set_magic(File_header data)311 Fh_set_magic(File_header data)
312 {
313 memcpy(data, magic_string, 4);
314 data[4] = 1;
315 }
316
317 bool
Fh_verify_magic(File_header data)318 Fh_verify_magic(File_header data)
319 {
320 return (memcmp(data, magic_string, 4) == 0);
321 }
322
323 /* detect truncated header */
324 bool
Fh_verify_prefix(File_header data,int size)325 Fh_verify_prefix(File_header data, int size)
326 {
327 int i;
328 for (i = 0; i < size && i < 4; ++i)
329 if (data[i] != magic_string[i])
330 return false;
331 return (size > 0);
332 }
333
334 uchar
Fh_version(File_header data)335 Fh_version(File_header data)
336 {
337 return data[4];
338 }
339
340 bool
Fh_verify_version(File_header data)341 Fh_verify_version(File_header data)
342 {
343 return (data[4] == 1);
344 }
345
346 unsigned
Fh_get_dict_size(File_header data)347 Fh_get_dict_size(File_header data)
348 {
349 unsigned sz = (1 << (data[5] &0x1F));
350 if (sz > min_dict_size)
351 sz -= (sz / 16) * ((data[5] >> 5) & 7);
352 return sz;
353 }
354
355 bool
Fh_set_dict_size(File_header data,unsigned sz)356 Fh_set_dict_size(File_header data, unsigned sz)
357 {
358 if (!isvalid_ds(sz))
359 return false;
360 data[5] = real_bits(sz - 1);
361 if (sz > min_dict_size) {
362 unsigned base_size = 1 << data[5];
363 unsigned fraction = base_size / 16;
364 unsigned i;
365 for (i = 7; i >= 1; --i)
366 if (base_size - (i * fraction) >= sz) {
367 data[5] |= (i << 5);
368 break;
369 }
370 }
371 return true;
372 }
373
374 unsigned
Ft_get_data_crc(File_trailer data)375 Ft_get_data_crc(File_trailer data)
376 {
377 unsigned tmp = 0;
378 int i;
379 for (i = 3; i >= 0; --i) {
380 tmp <<= 8;
381 tmp += data[i];
382 }
383 return tmp;
384 }
385
386 void
Ft_set_data_crc(File_trailer data,unsigned crc)387 Ft_set_data_crc(File_trailer data, unsigned crc)
388 {
389 int i;
390 for (i = 0; i <= 3; ++i) {
391 data[i] = (uchar)crc;
392 crc >>= 8;
393 }
394 }
395
396 uvlong
Ft_get_data_size(File_trailer data)397 Ft_get_data_size(File_trailer data)
398 {
399 uvlong tmp = 0;
400 int i;
401 for (i = 11; i >= 4; --i) {
402 tmp <<= 8;
403 tmp += data[i];
404 }
405 return tmp;
406 }
407
408 void
Ft_set_data_size(File_trailer data,uvlong sz)409 Ft_set_data_size(File_trailer data, uvlong sz)
410 {
411 int i;
412 for (i = 4; i <= 11; ++i) {
413 data[i] = (uchar)sz;
414 sz >>= 8;
415 }
416 }
417
418 uvlong
Ft_get_member_size(File_trailer data)419 Ft_get_member_size(File_trailer data)
420 {
421 uvlong tmp = 0;
422 int i;
423 for (i = 19; i >= 12; --i) {
424 tmp <<= 8;
425 tmp += data[i];
426 }
427 return tmp;
428 }
429
430 void
Ft_set_member_size(File_trailer data,uvlong sz)431 Ft_set_member_size(File_trailer data, uvlong sz)
432 {
433 int i;
434 for (i = 12; i <= 19; ++i) {
435 data[i] = (uchar)sz;
436 sz >>= 8;
437 }
438 }
439 #else /* _DEFINE_INLINES */
440 void Bm_array_init(Bit_model bm[], int size);
441 void Bm_init(Bit_model *probability);
442 void CRC32_init(void);
443 void CRC32_update_buf(uint32_t *crc, uchar *buffer, int size);
444 void CRC32_update_byte(uint32_t *crc, uchar byte);
445 unsigned Fh_get_dict_size(File_header data);
446 bool Fh_set_dict_size(File_header data, unsigned sz);
447 void Fh_set_magic(File_header data);
448 bool Fh_verify_magic(File_header data);
449 bool Fh_verify_prefix(File_header data, int size);
450 bool Fh_verify_version(File_header data);
451 uchar Fh_version(File_header data);
452 unsigned Ft_get_data_crc(File_trailer data);
453 uvlong Ft_get_data_size(File_trailer data);
454 uvlong Ft_get_member_size(File_trailer data);
455 void Ft_set_data_crc(File_trailer data, unsigned crc);
456 void Ft_set_data_size(File_trailer data, uvlong sz);
457 void Ft_set_member_size(File_trailer data, uvlong sz);
458 void Lm_init(Len_model *lm);
459 void Pp_init(Pretty_print *pp, char *filenames[], int num_filenames, int verbosity);
460 void Pp_reset(Pretty_print *pp);
461 void Pp_set_name(Pretty_print *pp, char *filename);
462 void Pp_show_msg(Pretty_print *pp, char *msg);
463 State St_set_char(State st);
464 int get_lit_state(uchar prev_byte);
465 int get_len_state(int len);
466 bool isvalid_ds(unsigned dict_size);
467 int real_bits(unsigned value);
468 #endif /* _DEFINE_INLINES */
469
470 #define St_is_char(state) ((state) < 7)
471 #define St_set_match(state) ((state) < 7? 7: 10)
472 #define St_set_rep(state) ((state) < 7? 8: 11)
473 #define St_set_short_rep(state) ((state) < 7? 9: 11)
474
475 static char *bad_magic_msg = "Bad magic number (file not in lzip format).";
476 static char *bad_dict_msg = "Invalid dictionary size in member header.";
477 static char *trailing_msg = "Trailing data not allowed.";
478
479 /* defined in decoder.c */
480 int readblock(int fd, uchar *buf, int size);
481 int writeblock(int fd, uchar *buf, int size);
482
483 /* defined in main.c */
484 extern int verbosity;
485 Dir;
486 char *bad_version(unsigned version);
487 char *format_ds(unsigned dict_size);
488 int open_instream(char *name, Dir *in_statsp, bool no_ofile, bool reg_only);
489 void *resize_buffer(void *buf, unsigned min_size);
490 void cleanup_and_fail(int retval);
491 void show_error(char *msg, int errcode, bool help);
492 void show_file_error(char *filename, char *msg, int errcode);
493 void internal_error(char *msg);
494 struct Matchfinder_base;
495 void show_progress(uvlong partial_size, Matchfinder_base *m, Pretty_print *p,
496 uvlong cfile_size);
497 #endif
498