xref: /netbsd-src/usr.bin/gzip/unlz.c (revision 5b4135cd6fe5844aa564bd507dd8f56d25be8b42)
1*5b4135cdSchristos /*	$NetBSD: unlz.c,v 1.10 2024/05/04 13:18:06 christos Exp $	*/
241c9b009Schristos 
341c9b009Schristos /*-
441c9b009Schristos  * Copyright (c) 2018 The NetBSD Foundation, Inc.
541c9b009Schristos  * All rights reserved.
641c9b009Schristos  *
741c9b009Schristos  * This code is derived from software contributed to The NetBSD Foundation
841c9b009Schristos  * by Christos Zoulas.
941c9b009Schristos  *
1041c9b009Schristos  * Redistribution and use in source and binary forms, with or without
1141c9b009Schristos  * modification, are permitted provided that the following conditions
1241c9b009Schristos  * are met:
1341c9b009Schristos  * 1. Redistributions of source code must retain the above copyright
1441c9b009Schristos  *    notice, this list of conditions and the following disclaimer.
1541c9b009Schristos  * 2. Redistributions in binary form must reproduce the above copyright
1641c9b009Schristos  *    notice, this list of conditions and the following disclaimer in the
1741c9b009Schristos  *    documentation and/or other materials provided with the distribution.
1841c9b009Schristos  *
1941c9b009Schristos  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
2041c9b009Schristos  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
2141c9b009Schristos  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
2241c9b009Schristos  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
2341c9b009Schristos  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2441c9b009Schristos  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2541c9b009Schristos  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2641c9b009Schristos  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2741c9b009Schristos  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2841c9b009Schristos  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2941c9b009Schristos  * POSSIBILITY OF SUCH DAMAGE.
3041c9b009Schristos  */
3141c9b009Schristos 
3241c9b009Schristos /*  Lzd - Educational decompressor for the lzip format
3341c9b009Schristos     Copyright (C) 2013-2018 Antonio Diaz Diaz.
3441c9b009Schristos 
3541c9b009Schristos     This program is free software. Redistribution and use in source and
3641c9b009Schristos     binary forms, with or without modification, are permitted provided
3741c9b009Schristos     that the following conditions are met:
3841c9b009Schristos 
3941c9b009Schristos     1. Redistributions of source code must retain the above copyright
4041c9b009Schristos     notice, this list of conditions and the following disclaimer.
4141c9b009Schristos 
4241c9b009Schristos     2. Redistributions in binary form must reproduce the above copyright
4341c9b009Schristos     notice, this list of conditions and the following disclaimer in the
4441c9b009Schristos     documentation and/or other materials provided with the distribution.
4541c9b009Schristos 
4641c9b009Schristos     This program is distributed in the hope that it will be useful,
4741c9b009Schristos     but WITHOUT ANY WARRANTY; without even the implied warranty of
4841c9b009Schristos     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
4941c9b009Schristos */
5041c9b009Schristos 
5141c9b009Schristos #include <sys/param.h>
5241c9b009Schristos #include <stdio.h>
5341c9b009Schristos #include <string.h>
5441c9b009Schristos #include <stdlib.h>
5541c9b009Schristos #include <stdint.h>
5641c9b009Schristos #include <stdbool.h>
5741c9b009Schristos #include <errno.h>
5841c9b009Schristos #include <unistd.h>
5941c9b009Schristos 
6041c9b009Schristos #define LZ_STATES		12
6141c9b009Schristos 
6241c9b009Schristos #define LITERAL_CONTEXT_BITS	3
6341c9b009Schristos #define POS_STATE_BITS		2
6441c9b009Schristos #define POS_STATES		(1 << POS_STATE_BITS)
6541c9b009Schristos #define POS_STATE_MASK 		(POS_STATES - 1)
6641c9b009Schristos 
6741c9b009Schristos #define STATES			4
6841c9b009Schristos #define DIS_SLOT_BITS		6
6941c9b009Schristos 
7041c9b009Schristos #define DIS_MODEL_START		4
7141c9b009Schristos #define DIS_MODEL_END		14
7241c9b009Schristos 
7341c9b009Schristos #define MODELED_DISTANCES	(1 << (DIS_MODEL_END / 2))
7441c9b009Schristos #define DIS_ALIGN_BITS		4
7541c9b009Schristos #define DIS_ALIGN_SIZE		(1 << DIS_ALIGN_BITS)
7641c9b009Schristos 
7741c9b009Schristos #define LOW_BITS		3
7841c9b009Schristos #define MID_BITS		3
7941c9b009Schristos #define HIGH_BITS		8
8041c9b009Schristos 
8141c9b009Schristos #define LOW_SYMBOLS		(1 << LOW_BITS)
8241c9b009Schristos #define MID_SYMBOLS		(1 << MID_BITS)
8341c9b009Schristos #define HIGH_SYMBOLS		(1 << HIGH_BITS)
8441c9b009Schristos 
8541c9b009Schristos #define MAX_SYMBOLS 		(LOW_SYMBOLS + MID_SYMBOLS + HIGH_SYMBOLS)
8641c9b009Schristos 
8741c9b009Schristos #define MIN_MATCH_LEN		2
8841c9b009Schristos 
8941c9b009Schristos #define BIT_MODEL_MOVE_BITS	5
9041c9b009Schristos #define BIT_MODEL_TOTAL_BITS 	11
9141c9b009Schristos #define BIT_MODEL_TOTAL 	(1 << BIT_MODEL_TOTAL_BITS)
9241c9b009Schristos #define BIT_MODEL_INIT		(BIT_MODEL_TOTAL / 2)
9341c9b009Schristos 
9441c9b009Schristos static const int lz_st_next[] = {
9541c9b009Schristos 	0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5,
9641c9b009Schristos };
9741c9b009Schristos 
9841c9b009Schristos static bool
lz_st_is_char(int st)9941c9b009Schristos lz_st_is_char(int st) {
10041c9b009Schristos 	return st < 7;
10141c9b009Schristos }
10241c9b009Schristos 
10341c9b009Schristos static int
lz_st_get_char(int st)10441c9b009Schristos lz_st_get_char(int st) {
10541c9b009Schristos 	return lz_st_next[st];
10641c9b009Schristos }
10741c9b009Schristos 
10841c9b009Schristos static int
lz_st_get_match(int st)10941c9b009Schristos lz_st_get_match(int st) {
11041c9b009Schristos 	return st < 7 ? 7 : 10;
11141c9b009Schristos }
11241c9b009Schristos 
11341c9b009Schristos static int
lz_st_get_rep(int st)11441c9b009Schristos lz_st_get_rep(int st) {
11541c9b009Schristos 	return st < 7 ? 8 : 11;
11641c9b009Schristos }
11741c9b009Schristos 
11841c9b009Schristos static int
lz_st_get_short_rep(int st)11941c9b009Schristos lz_st_get_short_rep(int st) {
12041c9b009Schristos 	return st < 7 ? 9 : 11;
12141c9b009Schristos }
12241c9b009Schristos 
12341c9b009Schristos struct lz_len_model {
12441c9b009Schristos 	int choice1;
12541c9b009Schristos 	int choice2;
12641c9b009Schristos 	int bm_low[POS_STATES][LOW_SYMBOLS];
12741c9b009Schristos 	int bm_mid[POS_STATES][MID_SYMBOLS];
12841c9b009Schristos 	int bm_high[HIGH_SYMBOLS];
12941c9b009Schristos };
13041c9b009Schristos 
13141c9b009Schristos static uint32_t lz_crc[256];
13241c9b009Schristos 
13341c9b009Schristos static void
lz_crc_init(void)13441c9b009Schristos lz_crc_init(void)
13541c9b009Schristos {
13641c9b009Schristos 	for (unsigned i = 0; i < __arraycount(lz_crc); i++) {
13741c9b009Schristos 		unsigned c = i;
13841c9b009Schristos 		for (unsigned j = 0; j < 8; j++) {
13941c9b009Schristos 			if (c & 1)
14041c9b009Schristos 				c = 0xEDB88320U ^ (c >> 1);
14141c9b009Schristos 			else
14241c9b009Schristos 				c >>= 1;
14341c9b009Schristos 		}
14441c9b009Schristos 		lz_crc[i] = c;
14541c9b009Schristos       }
14641c9b009Schristos }
14741c9b009Schristos 
14841c9b009Schristos static void
lz_crc_update(uint32_t * crc,const uint8_t * buf,size_t len)14941c9b009Schristos lz_crc_update(uint32_t *crc, const uint8_t *buf, size_t len)
15041c9b009Schristos {
15141c9b009Schristos 	for (size_t i = 0; i < len; i++)
15241c9b009Schristos 		*crc = lz_crc[(*crc ^ buf[i]) & 0xFF] ^ (*crc >> 8);
15341c9b009Schristos }
15441c9b009Schristos 
15541c9b009Schristos struct lz_range_decoder {
15641c9b009Schristos 	FILE *fp;
15741c9b009Schristos 	uint32_t code;
15841c9b009Schristos 	uint32_t range;
15941c9b009Schristos };
16041c9b009Schristos 
16141c9b009Schristos static int
lz_rd_create(struct lz_range_decoder * rd,FILE * fp)16241c9b009Schristos lz_rd_create(struct lz_range_decoder *rd, FILE *fp)
16341c9b009Schristos {
16441c9b009Schristos 	rd->fp = fp;
16541c9b009Schristos 	rd->code = 0;
16641c9b009Schristos 	rd->range = ~0;
16741c9b009Schristos 	for (int i = 0; i < 5; i++)
16841c9b009Schristos 		rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
16941c9b009Schristos 	return ferror(rd->fp) ? -1 : 0;
17041c9b009Schristos }
17141c9b009Schristos 
17241c9b009Schristos static unsigned
lz_rd_decode(struct lz_range_decoder * rd,int num_bits)17341c9b009Schristos lz_rd_decode(struct lz_range_decoder *rd, int num_bits)
17441c9b009Schristos {
17541c9b009Schristos 	unsigned symbol = 0;
17641c9b009Schristos 
17741c9b009Schristos 	for (int i = num_bits; i > 0; i--) {
17841c9b009Schristos 		rd->range >>= 1;
17941c9b009Schristos 		symbol <<= 1;
18041c9b009Schristos 		if (rd->code >= rd->range) {
18141c9b009Schristos 			rd->code -= rd->range;
18241c9b009Schristos 			symbol |= 1;
18341c9b009Schristos 		}
18441c9b009Schristos 		if (rd->range <= 0x00FFFFFFU) {
18541c9b009Schristos 			rd->range <<= 8;
18641c9b009Schristos 			rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
18741c9b009Schristos 		}
18841c9b009Schristos 	}
18941c9b009Schristos 
19041c9b009Schristos 	return symbol;
19141c9b009Schristos }
19241c9b009Schristos 
19341c9b009Schristos static unsigned
lz_rd_decode_bit(struct lz_range_decoder * rd,int * bm)19441c9b009Schristos lz_rd_decode_bit(struct lz_range_decoder *rd, int *bm)
19541c9b009Schristos {
19641c9b009Schristos 	unsigned symbol;
19741c9b009Schristos 	const uint32_t bound = (rd->range >> BIT_MODEL_TOTAL_BITS) * *bm;
19841c9b009Schristos 
19941c9b009Schristos 	if(rd->code < bound) {
20041c9b009Schristos 		rd->range = bound;
20141c9b009Schristos 		*bm += (BIT_MODEL_TOTAL - *bm) >> BIT_MODEL_MOVE_BITS;
20241c9b009Schristos 		symbol = 0;
20341c9b009Schristos 	}
20441c9b009Schristos 	else {
20541c9b009Schristos 		rd->range -= bound;
20641c9b009Schristos 		rd->code -= bound;
20741c9b009Schristos 		*bm -= *bm >> BIT_MODEL_MOVE_BITS;
20841c9b009Schristos 		symbol = 1;
20941c9b009Schristos 	}
21041c9b009Schristos 
21141c9b009Schristos 	if (rd->range <= 0x00FFFFFFU) {
21241c9b009Schristos 		rd->range <<= 8;
21341c9b009Schristos 		rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
21441c9b009Schristos 	}
21541c9b009Schristos 	return symbol;
21641c9b009Schristos }
21741c9b009Schristos 
21841c9b009Schristos static unsigned
lz_rd_decode_tree(struct lz_range_decoder * rd,int * bm,int num_bits)21941c9b009Schristos lz_rd_decode_tree(struct lz_range_decoder *rd, int *bm, int num_bits)
22041c9b009Schristos {
22141c9b009Schristos 	unsigned symbol = 1;
22241c9b009Schristos 
22341c9b009Schristos 	for (int i = 0; i < num_bits; i++)
22441c9b009Schristos 		symbol = (symbol << 1) | lz_rd_decode_bit(rd, &bm[symbol]);
22541c9b009Schristos 
22641c9b009Schristos 	return symbol - (1 << num_bits);
22741c9b009Schristos }
22841c9b009Schristos 
22941c9b009Schristos static unsigned
lz_rd_decode_tree_reversed(struct lz_range_decoder * rd,int * bm,int num_bits)23041c9b009Schristos lz_rd_decode_tree_reversed(struct lz_range_decoder *rd, int *bm, int num_bits)
23141c9b009Schristos {
23241c9b009Schristos 	unsigned symbol = lz_rd_decode_tree(rd, bm, num_bits);
23341c9b009Schristos 	unsigned reversed_symbol = 0;
23441c9b009Schristos 
23541c9b009Schristos 	for (int i = 0; i < num_bits; i++) {
23641c9b009Schristos 		reversed_symbol = (reversed_symbol << 1) | (symbol & 1);
23741c9b009Schristos 		symbol >>= 1;
23841c9b009Schristos 	}
23941c9b009Schristos 
24041c9b009Schristos 	return reversed_symbol;
24141c9b009Schristos }
24241c9b009Schristos 
24341c9b009Schristos static unsigned
lz_rd_decode_matched(struct lz_range_decoder * rd,int * bm,int match_byte)24441c9b009Schristos lz_rd_decode_matched(struct lz_range_decoder *rd, int *bm, int match_byte)
24541c9b009Schristos {
24641c9b009Schristos 	unsigned symbol = 1;
24741c9b009Schristos 
24841c9b009Schristos 	for (int i = 7; i >= 0; i--) {
24941c9b009Schristos 		const unsigned match_bit = (match_byte >> i) & 1;
25041c9b009Schristos 		const unsigned bit = lz_rd_decode_bit(rd,
25141c9b009Schristos 		    &bm[symbol + (match_bit << 8) + 0x100]);
25241c9b009Schristos 		symbol = (symbol << 1) | bit;
25341c9b009Schristos 		if (match_bit != bit) {
25441c9b009Schristos 			while (symbol < 0x100) {
25541c9b009Schristos 				symbol = (symbol << 1) |
25641c9b009Schristos 				    lz_rd_decode_bit(rd, &bm[symbol]);
25741c9b009Schristos 			}
25841c9b009Schristos 			break;
25941c9b009Schristos 		}
26041c9b009Schristos 	}
26141c9b009Schristos 	return symbol & 0xFF;
26241c9b009Schristos }
26341c9b009Schristos 
26441c9b009Schristos static unsigned
lz_rd_decode_len(struct lz_range_decoder * rd,struct lz_len_model * lm,int pos_state)26541c9b009Schristos lz_rd_decode_len(struct lz_range_decoder *rd, struct lz_len_model *lm,
26641c9b009Schristos     int pos_state)
26741c9b009Schristos {
26841c9b009Schristos 	if (lz_rd_decode_bit(rd, &lm->choice1) == 0)
26941c9b009Schristos 		return lz_rd_decode_tree(rd, lm->bm_low[pos_state], LOW_BITS);
27041c9b009Schristos 
27141c9b009Schristos 	if (lz_rd_decode_bit(rd, &lm->choice2) == 0) {
27241c9b009Schristos 		return LOW_SYMBOLS +
27341c9b009Schristos 		    lz_rd_decode_tree(rd, lm->bm_mid[pos_state], MID_BITS);
27441c9b009Schristos 	}
27541c9b009Schristos 
27641c9b009Schristos 	return LOW_SYMBOLS + MID_SYMBOLS +
27741c9b009Schristos            lz_rd_decode_tree(rd, lm->bm_high, HIGH_BITS);
27841c9b009Schristos }
27941c9b009Schristos 
28041c9b009Schristos struct lz_decoder {
28141c9b009Schristos 	FILE *fin, *fout;
28241c9b009Schristos 	off_t pos, ppos, spos, dict_size;
28341c9b009Schristos 	bool wrapped;
28441c9b009Schristos 	uint32_t crc;
28541c9b009Schristos 	uint8_t *obuf;
28641c9b009Schristos 	struct lz_range_decoder rdec;
28741c9b009Schristos };
28841c9b009Schristos 
28941c9b009Schristos static int
lz_flush(struct lz_decoder * lz)29041c9b009Schristos lz_flush(struct lz_decoder *lz)
29141c9b009Schristos {
29241c9b009Schristos 	off_t offs = lz->pos - lz->spos;
29341c9b009Schristos 	if (offs <= 0)
29441c9b009Schristos 		return -1;
29541c9b009Schristos 
29641c9b009Schristos 	size_t size = (size_t)offs;
29741c9b009Schristos 	lz_crc_update(&lz->crc, lz->obuf + lz->spos, size);
298f57e9384Schristos 	if (!tflag && fwrite(lz->obuf + lz->spos, 1, size, lz->fout) != size)
29941c9b009Schristos 		return -1;
30041c9b009Schristos 
30141c9b009Schristos 	lz->wrapped = lz->pos >= lz->dict_size;
30241c9b009Schristos 	if (lz->wrapped) {
30341c9b009Schristos 		lz->ppos += lz->pos;
30441c9b009Schristos 		lz->pos = 0;
30541c9b009Schristos 	}
30641c9b009Schristos 	lz->spos = lz->pos;
30741c9b009Schristos 	return 0;
30841c9b009Schristos }
30941c9b009Schristos 
31041c9b009Schristos static void
lz_destroy(struct lz_decoder * lz)31141c9b009Schristos lz_destroy(struct lz_decoder *lz)
31241c9b009Schristos {
31341c9b009Schristos 	if (lz->fin)
31441c9b009Schristos 		fclose(lz->fin);
31541c9b009Schristos 	if (lz->fout)
31641c9b009Schristos 		fclose(lz->fout);
31741c9b009Schristos 	free(lz->obuf);
31841c9b009Schristos }
31941c9b009Schristos 
32041c9b009Schristos static int
lz_create(struct lz_decoder * lz,int fin,int fdout,int dict_size)32141c9b009Schristos lz_create(struct lz_decoder *lz, int fin, int fdout, int dict_size)
32241c9b009Schristos {
32341c9b009Schristos 	memset(lz, 0, sizeof(*lz));
32441c9b009Schristos 
32541c9b009Schristos 	lz->fin = fdopen(dup(fin), "r");
32641c9b009Schristos 	if (lz->fin == NULL)
32741c9b009Schristos 		goto out;
32841c9b009Schristos 
32941c9b009Schristos 	lz->fout = fdopen(dup(fdout), "w");
33041c9b009Schristos 	if (lz->fout == NULL)
33141c9b009Schristos 		goto out;
33241c9b009Schristos 
33341c9b009Schristos 	lz->pos = lz->ppos = lz->spos = 0;
33441c9b009Schristos 	lz->crc = ~0;
33541c9b009Schristos 	lz->dict_size = dict_size;
33641c9b009Schristos 	lz->wrapped = false;
33741c9b009Schristos 
33841c9b009Schristos 	lz->obuf = malloc(dict_size);
33941c9b009Schristos 	if (lz->obuf == NULL)
34041c9b009Schristos 		goto out;
34141c9b009Schristos 
34241c9b009Schristos 	if (lz_rd_create(&lz->rdec, lz->fin) == -1)
34341c9b009Schristos 		goto out;
34441c9b009Schristos 	return 0;
34541c9b009Schristos out:
34641c9b009Schristos 	lz_destroy(lz);
34741c9b009Schristos 	return -1;
34841c9b009Schristos }
34941c9b009Schristos 
35041c9b009Schristos static uint8_t
lz_peek(const struct lz_decoder * lz,unsigned ahead)35141c9b009Schristos lz_peek(const struct lz_decoder *lz, unsigned ahead)
35241c9b009Schristos {
35341c9b009Schristos 	off_t diff = lz->pos - ahead - 1;
35441c9b009Schristos 
35541c9b009Schristos 	if (diff >= 0)
35641c9b009Schristos 		return lz->obuf[diff];
35741c9b009Schristos 
35841c9b009Schristos 	if (lz->wrapped)
35941c9b009Schristos 		return lz->obuf[lz->dict_size + diff];
36041c9b009Schristos 
36141c9b009Schristos 	return 0;
36241c9b009Schristos }
36341c9b009Schristos 
36441c9b009Schristos static void
lz_put(struct lz_decoder * lz,uint8_t b)36541c9b009Schristos lz_put(struct lz_decoder *lz, uint8_t b)
36641c9b009Schristos {
36741c9b009Schristos 	lz->obuf[lz->pos++] = b;
36841c9b009Schristos 	if (lz->dict_size == lz->pos)
36941c9b009Schristos 		lz_flush(lz);
37041c9b009Schristos }
37141c9b009Schristos 
37241c9b009Schristos static off_t
lz_get_data_position(const struct lz_decoder * lz)37341c9b009Schristos lz_get_data_position(const struct lz_decoder *lz)
37441c9b009Schristos {
37541c9b009Schristos 	return lz->ppos + lz->pos;
37641c9b009Schristos }
37741c9b009Schristos 
37841c9b009Schristos static unsigned
lz_get_crc(const struct lz_decoder * lz)37941c9b009Schristos lz_get_crc(const struct lz_decoder *lz)
38041c9b009Schristos {
38141c9b009Schristos 	return lz->crc ^ 0xffffffffU;
38241c9b009Schristos }
38341c9b009Schristos 
38441c9b009Schristos static void
lz_bm_init(int * a,size_t l)38541c9b009Schristos lz_bm_init(int *a, size_t l)
38641c9b009Schristos {
38741c9b009Schristos 	for (size_t i = 0; i < l; i++)
38841c9b009Schristos 		a[i] = BIT_MODEL_INIT;
38941c9b009Schristos }
39041c9b009Schristos 
39141c9b009Schristos #define LZ_BM_INIT(a)	lz_bm_init(a, __arraycount(a))
39241c9b009Schristos #define LZ_BM_INIT2(a)	do { \
39341c9b009Schristos 	size_t l = __arraycount(a[0]); \
39441c9b009Schristos 	for (size_t i = 0; i < __arraycount(a); i++) \
39541c9b009Schristos 		lz_bm_init(a[i], l); \
396133e932aSrillig } while (0)
39741c9b009Schristos 
39841c9b009Schristos #define LZ_MODEL_INIT(a) do { \
39941c9b009Schristos 	a.choice1 = BIT_MODEL_INIT; \
40041c9b009Schristos 	a.choice2 = BIT_MODEL_INIT; \
40141c9b009Schristos 	LZ_BM_INIT2(a.bm_low); \
40241c9b009Schristos 	LZ_BM_INIT2(a.bm_mid); \
40341c9b009Schristos 	LZ_BM_INIT(a.bm_high); \
404133e932aSrillig } while (0)
40541c9b009Schristos 
40641c9b009Schristos static bool
lz_decode_member(struct lz_decoder * lz)40741c9b009Schristos lz_decode_member(struct lz_decoder *lz)
40841c9b009Schristos {
40941c9b009Schristos 	int bm_literal[1 << LITERAL_CONTEXT_BITS][0x300];
41041c9b009Schristos 	int bm_match[LZ_STATES][POS_STATES];
41141c9b009Schristos 	int bm_rep[4][LZ_STATES];
41241c9b009Schristos 	int bm_len[LZ_STATES][POS_STATES];
41341c9b009Schristos 	int bm_dis_slot[LZ_STATES][1 << DIS_SLOT_BITS];
41441c9b009Schristos 	int bm_dis[MODELED_DISTANCES - DIS_MODEL_END + 1];
41541c9b009Schristos 	int bm_align[DIS_ALIGN_SIZE];
41641c9b009Schristos 
41741c9b009Schristos 	LZ_BM_INIT2(bm_literal);
41841c9b009Schristos 	LZ_BM_INIT2(bm_match);
41941c9b009Schristos 	LZ_BM_INIT2(bm_rep);
42041c9b009Schristos 	LZ_BM_INIT2(bm_len);
42141c9b009Schristos 	LZ_BM_INIT2(bm_dis_slot);
42241c9b009Schristos 	LZ_BM_INIT(bm_dis);
42341c9b009Schristos 	LZ_BM_INIT(bm_align);
42441c9b009Schristos 
42541c9b009Schristos 	struct lz_len_model match_len_model;
42641c9b009Schristos 	struct lz_len_model rep_len_model;
42741c9b009Schristos 
42841c9b009Schristos 	LZ_MODEL_INIT(match_len_model);
42941c9b009Schristos 	LZ_MODEL_INIT(rep_len_model);
43041c9b009Schristos 
43141c9b009Schristos 	struct lz_range_decoder *rd = &lz->rdec;
43241c9b009Schristos 	unsigned rep[4] = { 0 };
43341c9b009Schristos 
43441c9b009Schristos 
43541c9b009Schristos 	int state = 0;
43641c9b009Schristos 
43741c9b009Schristos 	while (!feof(lz->fin) && !ferror(lz->fin)) {
43841c9b009Schristos 		const int pos_state = lz_get_data_position(lz) & POS_STATE_MASK;
43941c9b009Schristos 		// bit 1
44041c9b009Schristos 		if (lz_rd_decode_bit(rd, &bm_match[state][pos_state]) == 0) {
44141c9b009Schristos 			const uint8_t prev_byte = lz_peek(lz, 0);
44241c9b009Schristos 			const int literal_state =
44341c9b009Schristos 			    prev_byte >> (8 - LITERAL_CONTEXT_BITS);
44441c9b009Schristos 			int *bm = bm_literal[literal_state];
44541c9b009Schristos 			if (lz_st_is_char(state))
44641c9b009Schristos 				lz_put(lz, lz_rd_decode_tree(rd, bm, 8));
44741c9b009Schristos 			else {
44841c9b009Schristos 				int peek = lz_peek(lz, rep[0]);
44941c9b009Schristos 				lz_put(lz, lz_rd_decode_matched(rd, bm, peek));
45041c9b009Schristos 			}
45141c9b009Schristos 			state = lz_st_get_char(state);
45241c9b009Schristos 			continue;
45341c9b009Schristos 		}
45441c9b009Schristos 		int len;
45541c9b009Schristos 		// bit 2
45641c9b009Schristos 		if (lz_rd_decode_bit(rd, &bm_rep[0][state]) != 0) {
45741c9b009Schristos 			// bit 3
45841c9b009Schristos 			if (lz_rd_decode_bit(rd, &bm_rep[1][state]) == 0) {
45941c9b009Schristos 				// bit 4
46041c9b009Schristos 				if (lz_rd_decode_bit(rd,
46141c9b009Schristos 				    &bm_len[state][pos_state]) == 0)
46241c9b009Schristos 				{
46341c9b009Schristos 					state = lz_st_get_short_rep(state);
46441c9b009Schristos 					lz_put(lz, lz_peek(lz, rep[0]));
46541c9b009Schristos 					continue;
46641c9b009Schristos 				}
46741c9b009Schristos 			} else {
46841c9b009Schristos 				unsigned distance;
46941c9b009Schristos 				// bit 4
47041c9b009Schristos 				if (lz_rd_decode_bit(rd, &bm_rep[2][state])
47141c9b009Schristos 				    == 0)
47241c9b009Schristos 					distance = rep[1];
47341c9b009Schristos 				else {
47441c9b009Schristos 					// bit 5
47541c9b009Schristos 					if (lz_rd_decode_bit(rd,
47641c9b009Schristos 					    &bm_rep[3][state]) == 0)
47741c9b009Schristos 						distance = rep[2];
47841c9b009Schristos 					else {
47941c9b009Schristos 						distance = rep[3];
48041c9b009Schristos 						rep[3] = rep[2];
48141c9b009Schristos 					}
48241c9b009Schristos 					rep[2] = rep[1];
48341c9b009Schristos 				}
48441c9b009Schristos 				rep[1] = rep[0];
48541c9b009Schristos 				rep[0] = distance;
48641c9b009Schristos 			}
48741c9b009Schristos 			state = lz_st_get_rep(state);
48841c9b009Schristos 			len = MIN_MATCH_LEN +
48941c9b009Schristos 			    lz_rd_decode_len(rd, &rep_len_model, pos_state);
49041c9b009Schristos 		} else {
49141c9b009Schristos 			rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0];
49241c9b009Schristos 			len = MIN_MATCH_LEN +
49341c9b009Schristos 			    lz_rd_decode_len(rd, &match_len_model, pos_state);
49441c9b009Schristos 			const int len_state =
49541c9b009Schristos 			    MIN(len - MIN_MATCH_LEN, STATES - 1);
49641c9b009Schristos 			rep[0] = lz_rd_decode_tree(rd, bm_dis_slot[len_state],
49741c9b009Schristos 			    DIS_SLOT_BITS);
49841c9b009Schristos 			if (rep[0] >= DIS_MODEL_START) {
49941c9b009Schristos 				const unsigned dis_slot = rep[0];
50041c9b009Schristos 				const int direct_bits = (dis_slot >> 1) - 1;
50141c9b009Schristos 			        rep[0] = (2 | (dis_slot & 1)) << direct_bits;
50241c9b009Schristos 				if (dis_slot < DIS_MODEL_END)
50341c9b009Schristos 					rep[0] += lz_rd_decode_tree_reversed(rd,
50441c9b009Schristos 					    &bm_dis[rep[0] - dis_slot],
50541c9b009Schristos                                             direct_bits);
50641c9b009Schristos 				else {
50741c9b009Schristos 					rep[0] += lz_rd_decode(rd, direct_bits
50841c9b009Schristos 					    - DIS_ALIGN_BITS) << DIS_ALIGN_BITS;
50941c9b009Schristos 					rep[0] += lz_rd_decode_tree_reversed(rd,
51041c9b009Schristos 					    bm_align, DIS_ALIGN_BITS);
51141c9b009Schristos 					if (rep[0] == 0xFFFFFFFFU) {
51241c9b009Schristos 						lz_flush(lz);
51341c9b009Schristos 						return len == MIN_MATCH_LEN;
51441c9b009Schristos 					}
51541c9b009Schristos 				}
51641c9b009Schristos 			}
51741c9b009Schristos 			state = lz_st_get_match(state);
51841c9b009Schristos 			if (rep[0] >= lz->dict_size ||
51941c9b009Schristos 			    (rep[0] >= lz->pos && !lz->wrapped)) {
52041c9b009Schristos 				lz_flush(lz);
52141c9b009Schristos 				return false;
52241c9b009Schristos 			}
52341c9b009Schristos 		}
52441c9b009Schristos 		for (int i = 0; i < len; i++)
52541c9b009Schristos 			lz_put(lz, lz_peek(lz, rep[0]));
52641c9b009Schristos     	}
52741c9b009Schristos 	lz_flush(lz);
52841c9b009Schristos 	return false;
52941c9b009Schristos }
53041c9b009Schristos 
53141c9b009Schristos /*
53241c9b009Schristos  * 0-3	CRC32 of the uncompressed data
53341c9b009Schristos  * 4-11 size of the uncompressed data
53441c9b009Schristos  * 12-19 member size including header and trailer
53541c9b009Schristos  */
53641c9b009Schristos #define TRAILER_SIZE 20
53741c9b009Schristos 
53841c9b009Schristos 
53941c9b009Schristos static off_t
lz_decode(int fin,int fdout,unsigned dict_size,off_t * insize)540bc30e841Schristos lz_decode(int fin, int fdout, unsigned dict_size, off_t *insize)
54141c9b009Schristos {
54241c9b009Schristos 	struct lz_decoder lz;
54341c9b009Schristos 	off_t rv = -1;
54441c9b009Schristos 
54541c9b009Schristos 	if (lz_create(&lz, fin, fdout, dict_size) == -1)
54641c9b009Schristos 		return -1;
54741c9b009Schristos 
54841c9b009Schristos 	if (!lz_decode_member(&lz))
54941c9b009Schristos 		goto out;
55041c9b009Schristos 
55141c9b009Schristos 	uint8_t trailer[TRAILER_SIZE];
55241c9b009Schristos 
55341c9b009Schristos 	for(size_t i = 0; i < __arraycount(trailer); i++)
55441c9b009Schristos 		trailer[i] = (uint8_t)getc(lz.fin);
55541c9b009Schristos 
55641c9b009Schristos 	unsigned crc = 0;
55741c9b009Schristos 	for (int i = 3; i >= 0; --i) {
55841c9b009Schristos 		crc <<= 8;
55941c9b009Schristos 		crc += trailer[i];
56041c9b009Schristos 	}
56141c9b009Schristos 
56241c9b009Schristos 	int64_t data_size = 0;
56341c9b009Schristos 	for (int i = 11; i >= 4; --i) {
56441c9b009Schristos 		data_size <<= 8;
56541c9b009Schristos 		data_size += trailer[i];
56641c9b009Schristos 	}
56741c9b009Schristos 
56841c9b009Schristos 	if (crc != lz_get_crc(&lz) || data_size != lz_get_data_position(&lz))
56941c9b009Schristos 		goto out;
57041c9b009Schristos 
57141c9b009Schristos 	rv = 0;
57241c9b009Schristos 	for (int i = 19; i >= 12; --i) {
57341c9b009Schristos 		rv <<= 8;
57441c9b009Schristos 		rv += trailer[i];
57541c9b009Schristos 	}
576bc30e841Schristos 	if (insize)
577bc30e841Schristos 		*insize = rv;
5783b9b3869Schristos #if 0
5793b9b3869Schristos 	/* Does not work with pipes */
58041c9b009Schristos 	rv = ftello(lz.fout);
5813b9b3869Schristos #else
5823b9b3869Schristos 	rv = data_size;
5833b9b3869Schristos #endif
58441c9b009Schristos out:
58541c9b009Schristos 	lz_destroy(&lz);
58641c9b009Schristos 	return rv;
58741c9b009Schristos }
58841c9b009Schristos 
58941c9b009Schristos 
59041c9b009Schristos /*
59141c9b009Schristos  * 0-3 magic
59241c9b009Schristos  * 4 version
59341c9b009Schristos  * 5 coded dict_size
59441c9b009Schristos  */
59541c9b009Schristos #define HDR_SIZE 6
59641c9b009Schristos #define MIN_DICTIONARY_SIZE (1 << 12)
59741c9b009Schristos #define MAX_DICTIONARY_SIZE (1 << 29)
59841c9b009Schristos 
59941c9b009Schristos static const char hdrmagic[] = { 'L', 'Z', 'I', 'P', 1 };
60041c9b009Schristos 
60141c9b009Schristos static unsigned
lz_get_dict_size(unsigned char c)60241c9b009Schristos lz_get_dict_size(unsigned char c)
60341c9b009Schristos {
60441c9b009Schristos 	unsigned dict_size = 1 << (c & 0x1f);
605*5b4135cdSchristos 	dict_size -= (dict_size >> 4) * ( (c >> 5) & 0x7);
60641c9b009Schristos 	if (dict_size < MIN_DICTIONARY_SIZE || dict_size > MAX_DICTIONARY_SIZE)
60741c9b009Schristos 		return 0;
60841c9b009Schristos 	return dict_size;
60941c9b009Schristos }
61041c9b009Schristos 
61141c9b009Schristos static off_t
unlz(int fin,int fout,char * pre,size_t prelen,off_t * bytes_in)61241c9b009Schristos unlz(int fin, int fout, char *pre, size_t prelen, off_t *bytes_in)
61341c9b009Schristos {
61441c9b009Schristos 	if (lz_crc[0] == 0)
61541c9b009Schristos 		lz_crc_init();
61641c9b009Schristos 
61741c9b009Schristos 	char header[HDR_SIZE];
61841c9b009Schristos 
619bc30e841Schristos 	if (pre && prelen)
620bc30e841Schristos 		memcpy(header, pre, prelen);
621bc30e841Schristos 
622bc30e841Schristos 	ssize_t nr = read(fin, header + prelen, sizeof(header) - prelen);
623bc30e841Schristos 	switch (nr) {
624bc30e841Schristos 	case -1:
625bc30e841Schristos 		return -1;
626bc30e841Schristos 	case 0:
627bc30e841Schristos 		return prelen ? -1 : 0;
628bc30e841Schristos 	default:
629bc30e841Schristos 		if ((size_t)nr != sizeof(header) - prelen)
630bc30e841Schristos 			return -1;
631bc30e841Schristos 		break;
63241c9b009Schristos 	}
63341c9b009Schristos 
63441c9b009Schristos 	if (memcmp(header, hdrmagic, sizeof(hdrmagic)) != 0)
63541c9b009Schristos 		return -1;
63641c9b009Schristos 
63741c9b009Schristos 	unsigned dict_size = lz_get_dict_size(header[5]);
63841c9b009Schristos 	if (dict_size == 0)
63941c9b009Schristos 		return -1;
64041c9b009Schristos 
641bc30e841Schristos 	return lz_decode(fin, fout, dict_size, bytes_in);
64241c9b009Schristos }
643