1*5b4135cdSchristos /* $NetBSD: unlz.c,v 1.10 2024/05/04 13:18:06 christos Exp $ */
241c9b009Schristos
341c9b009Schristos /*-
441c9b009Schristos * Copyright (c) 2018 The NetBSD Foundation, Inc.
541c9b009Schristos * All rights reserved.
641c9b009Schristos *
741c9b009Schristos * This code is derived from software contributed to The NetBSD Foundation
841c9b009Schristos * by Christos Zoulas.
941c9b009Schristos *
1041c9b009Schristos * Redistribution and use in source and binary forms, with or without
1141c9b009Schristos * modification, are permitted provided that the following conditions
1241c9b009Schristos * are met:
1341c9b009Schristos * 1. Redistributions of source code must retain the above copyright
1441c9b009Schristos * notice, this list of conditions and the following disclaimer.
1541c9b009Schristos * 2. Redistributions in binary form must reproduce the above copyright
1641c9b009Schristos * notice, this list of conditions and the following disclaimer in the
1741c9b009Schristos * documentation and/or other materials provided with the distribution.
1841c9b009Schristos *
1941c9b009Schristos * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
2041c9b009Schristos * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
2141c9b009Schristos * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
2241c9b009Schristos * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
2341c9b009Schristos * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2441c9b009Schristos * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2541c9b009Schristos * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2641c9b009Schristos * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2741c9b009Schristos * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2841c9b009Schristos * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2941c9b009Schristos * POSSIBILITY OF SUCH DAMAGE.
3041c9b009Schristos */
3141c9b009Schristos
3241c9b009Schristos /* Lzd - Educational decompressor for the lzip format
3341c9b009Schristos Copyright (C) 2013-2018 Antonio Diaz Diaz.
3441c9b009Schristos
3541c9b009Schristos This program is free software. Redistribution and use in source and
3641c9b009Schristos binary forms, with or without modification, are permitted provided
3741c9b009Schristos that the following conditions are met:
3841c9b009Schristos
3941c9b009Schristos 1. Redistributions of source code must retain the above copyright
4041c9b009Schristos notice, this list of conditions and the following disclaimer.
4141c9b009Schristos
4241c9b009Schristos 2. Redistributions in binary form must reproduce the above copyright
4341c9b009Schristos notice, this list of conditions and the following disclaimer in the
4441c9b009Schristos documentation and/or other materials provided with the distribution.
4541c9b009Schristos
4641c9b009Schristos This program is distributed in the hope that it will be useful,
4741c9b009Schristos but WITHOUT ANY WARRANTY; without even the implied warranty of
4841c9b009Schristos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
4941c9b009Schristos */
5041c9b009Schristos
5141c9b009Schristos #include <sys/param.h>
5241c9b009Schristos #include <stdio.h>
5341c9b009Schristos #include <string.h>
5441c9b009Schristos #include <stdlib.h>
5541c9b009Schristos #include <stdint.h>
5641c9b009Schristos #include <stdbool.h>
5741c9b009Schristos #include <errno.h>
5841c9b009Schristos #include <unistd.h>
5941c9b009Schristos
6041c9b009Schristos #define LZ_STATES 12
6141c9b009Schristos
6241c9b009Schristos #define LITERAL_CONTEXT_BITS 3
6341c9b009Schristos #define POS_STATE_BITS 2
6441c9b009Schristos #define POS_STATES (1 << POS_STATE_BITS)
6541c9b009Schristos #define POS_STATE_MASK (POS_STATES - 1)
6641c9b009Schristos
6741c9b009Schristos #define STATES 4
6841c9b009Schristos #define DIS_SLOT_BITS 6
6941c9b009Schristos
7041c9b009Schristos #define DIS_MODEL_START 4
7141c9b009Schristos #define DIS_MODEL_END 14
7241c9b009Schristos
7341c9b009Schristos #define MODELED_DISTANCES (1 << (DIS_MODEL_END / 2))
7441c9b009Schristos #define DIS_ALIGN_BITS 4
7541c9b009Schristos #define DIS_ALIGN_SIZE (1 << DIS_ALIGN_BITS)
7641c9b009Schristos
7741c9b009Schristos #define LOW_BITS 3
7841c9b009Schristos #define MID_BITS 3
7941c9b009Schristos #define HIGH_BITS 8
8041c9b009Schristos
8141c9b009Schristos #define LOW_SYMBOLS (1 << LOW_BITS)
8241c9b009Schristos #define MID_SYMBOLS (1 << MID_BITS)
8341c9b009Schristos #define HIGH_SYMBOLS (1 << HIGH_BITS)
8441c9b009Schristos
8541c9b009Schristos #define MAX_SYMBOLS (LOW_SYMBOLS + MID_SYMBOLS + HIGH_SYMBOLS)
8641c9b009Schristos
8741c9b009Schristos #define MIN_MATCH_LEN 2
8841c9b009Schristos
8941c9b009Schristos #define BIT_MODEL_MOVE_BITS 5
9041c9b009Schristos #define BIT_MODEL_TOTAL_BITS 11
9141c9b009Schristos #define BIT_MODEL_TOTAL (1 << BIT_MODEL_TOTAL_BITS)
9241c9b009Schristos #define BIT_MODEL_INIT (BIT_MODEL_TOTAL / 2)
9341c9b009Schristos
9441c9b009Schristos static const int lz_st_next[] = {
9541c9b009Schristos 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5,
9641c9b009Schristos };
9741c9b009Schristos
9841c9b009Schristos static bool
lz_st_is_char(int st)9941c9b009Schristos lz_st_is_char(int st) {
10041c9b009Schristos return st < 7;
10141c9b009Schristos }
10241c9b009Schristos
10341c9b009Schristos static int
lz_st_get_char(int st)10441c9b009Schristos lz_st_get_char(int st) {
10541c9b009Schristos return lz_st_next[st];
10641c9b009Schristos }
10741c9b009Schristos
10841c9b009Schristos static int
lz_st_get_match(int st)10941c9b009Schristos lz_st_get_match(int st) {
11041c9b009Schristos return st < 7 ? 7 : 10;
11141c9b009Schristos }
11241c9b009Schristos
11341c9b009Schristos static int
lz_st_get_rep(int st)11441c9b009Schristos lz_st_get_rep(int st) {
11541c9b009Schristos return st < 7 ? 8 : 11;
11641c9b009Schristos }
11741c9b009Schristos
11841c9b009Schristos static int
lz_st_get_short_rep(int st)11941c9b009Schristos lz_st_get_short_rep(int st) {
12041c9b009Schristos return st < 7 ? 9 : 11;
12141c9b009Schristos }
12241c9b009Schristos
12341c9b009Schristos struct lz_len_model {
12441c9b009Schristos int choice1;
12541c9b009Schristos int choice2;
12641c9b009Schristos int bm_low[POS_STATES][LOW_SYMBOLS];
12741c9b009Schristos int bm_mid[POS_STATES][MID_SYMBOLS];
12841c9b009Schristos int bm_high[HIGH_SYMBOLS];
12941c9b009Schristos };
13041c9b009Schristos
13141c9b009Schristos static uint32_t lz_crc[256];
13241c9b009Schristos
13341c9b009Schristos static void
lz_crc_init(void)13441c9b009Schristos lz_crc_init(void)
13541c9b009Schristos {
13641c9b009Schristos for (unsigned i = 0; i < __arraycount(lz_crc); i++) {
13741c9b009Schristos unsigned c = i;
13841c9b009Schristos for (unsigned j = 0; j < 8; j++) {
13941c9b009Schristos if (c & 1)
14041c9b009Schristos c = 0xEDB88320U ^ (c >> 1);
14141c9b009Schristos else
14241c9b009Schristos c >>= 1;
14341c9b009Schristos }
14441c9b009Schristos lz_crc[i] = c;
14541c9b009Schristos }
14641c9b009Schristos }
14741c9b009Schristos
14841c9b009Schristos static void
lz_crc_update(uint32_t * crc,const uint8_t * buf,size_t len)14941c9b009Schristos lz_crc_update(uint32_t *crc, const uint8_t *buf, size_t len)
15041c9b009Schristos {
15141c9b009Schristos for (size_t i = 0; i < len; i++)
15241c9b009Schristos *crc = lz_crc[(*crc ^ buf[i]) & 0xFF] ^ (*crc >> 8);
15341c9b009Schristos }
15441c9b009Schristos
15541c9b009Schristos struct lz_range_decoder {
15641c9b009Schristos FILE *fp;
15741c9b009Schristos uint32_t code;
15841c9b009Schristos uint32_t range;
15941c9b009Schristos };
16041c9b009Schristos
16141c9b009Schristos static int
lz_rd_create(struct lz_range_decoder * rd,FILE * fp)16241c9b009Schristos lz_rd_create(struct lz_range_decoder *rd, FILE *fp)
16341c9b009Schristos {
16441c9b009Schristos rd->fp = fp;
16541c9b009Schristos rd->code = 0;
16641c9b009Schristos rd->range = ~0;
16741c9b009Schristos for (int i = 0; i < 5; i++)
16841c9b009Schristos rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
16941c9b009Schristos return ferror(rd->fp) ? -1 : 0;
17041c9b009Schristos }
17141c9b009Schristos
17241c9b009Schristos static unsigned
lz_rd_decode(struct lz_range_decoder * rd,int num_bits)17341c9b009Schristos lz_rd_decode(struct lz_range_decoder *rd, int num_bits)
17441c9b009Schristos {
17541c9b009Schristos unsigned symbol = 0;
17641c9b009Schristos
17741c9b009Schristos for (int i = num_bits; i > 0; i--) {
17841c9b009Schristos rd->range >>= 1;
17941c9b009Schristos symbol <<= 1;
18041c9b009Schristos if (rd->code >= rd->range) {
18141c9b009Schristos rd->code -= rd->range;
18241c9b009Schristos symbol |= 1;
18341c9b009Schristos }
18441c9b009Schristos if (rd->range <= 0x00FFFFFFU) {
18541c9b009Schristos rd->range <<= 8;
18641c9b009Schristos rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
18741c9b009Schristos }
18841c9b009Schristos }
18941c9b009Schristos
19041c9b009Schristos return symbol;
19141c9b009Schristos }
19241c9b009Schristos
19341c9b009Schristos static unsigned
lz_rd_decode_bit(struct lz_range_decoder * rd,int * bm)19441c9b009Schristos lz_rd_decode_bit(struct lz_range_decoder *rd, int *bm)
19541c9b009Schristos {
19641c9b009Schristos unsigned symbol;
19741c9b009Schristos const uint32_t bound = (rd->range >> BIT_MODEL_TOTAL_BITS) * *bm;
19841c9b009Schristos
19941c9b009Schristos if(rd->code < bound) {
20041c9b009Schristos rd->range = bound;
20141c9b009Schristos *bm += (BIT_MODEL_TOTAL - *bm) >> BIT_MODEL_MOVE_BITS;
20241c9b009Schristos symbol = 0;
20341c9b009Schristos }
20441c9b009Schristos else {
20541c9b009Schristos rd->range -= bound;
20641c9b009Schristos rd->code -= bound;
20741c9b009Schristos *bm -= *bm >> BIT_MODEL_MOVE_BITS;
20841c9b009Schristos symbol = 1;
20941c9b009Schristos }
21041c9b009Schristos
21141c9b009Schristos if (rd->range <= 0x00FFFFFFU) {
21241c9b009Schristos rd->range <<= 8;
21341c9b009Schristos rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
21441c9b009Schristos }
21541c9b009Schristos return symbol;
21641c9b009Schristos }
21741c9b009Schristos
21841c9b009Schristos static unsigned
lz_rd_decode_tree(struct lz_range_decoder * rd,int * bm,int num_bits)21941c9b009Schristos lz_rd_decode_tree(struct lz_range_decoder *rd, int *bm, int num_bits)
22041c9b009Schristos {
22141c9b009Schristos unsigned symbol = 1;
22241c9b009Schristos
22341c9b009Schristos for (int i = 0; i < num_bits; i++)
22441c9b009Schristos symbol = (symbol << 1) | lz_rd_decode_bit(rd, &bm[symbol]);
22541c9b009Schristos
22641c9b009Schristos return symbol - (1 << num_bits);
22741c9b009Schristos }
22841c9b009Schristos
22941c9b009Schristos static unsigned
lz_rd_decode_tree_reversed(struct lz_range_decoder * rd,int * bm,int num_bits)23041c9b009Schristos lz_rd_decode_tree_reversed(struct lz_range_decoder *rd, int *bm, int num_bits)
23141c9b009Schristos {
23241c9b009Schristos unsigned symbol = lz_rd_decode_tree(rd, bm, num_bits);
23341c9b009Schristos unsigned reversed_symbol = 0;
23441c9b009Schristos
23541c9b009Schristos for (int i = 0; i < num_bits; i++) {
23641c9b009Schristos reversed_symbol = (reversed_symbol << 1) | (symbol & 1);
23741c9b009Schristos symbol >>= 1;
23841c9b009Schristos }
23941c9b009Schristos
24041c9b009Schristos return reversed_symbol;
24141c9b009Schristos }
24241c9b009Schristos
24341c9b009Schristos static unsigned
lz_rd_decode_matched(struct lz_range_decoder * rd,int * bm,int match_byte)24441c9b009Schristos lz_rd_decode_matched(struct lz_range_decoder *rd, int *bm, int match_byte)
24541c9b009Schristos {
24641c9b009Schristos unsigned symbol = 1;
24741c9b009Schristos
24841c9b009Schristos for (int i = 7; i >= 0; i--) {
24941c9b009Schristos const unsigned match_bit = (match_byte >> i) & 1;
25041c9b009Schristos const unsigned bit = lz_rd_decode_bit(rd,
25141c9b009Schristos &bm[symbol + (match_bit << 8) + 0x100]);
25241c9b009Schristos symbol = (symbol << 1) | bit;
25341c9b009Schristos if (match_bit != bit) {
25441c9b009Schristos while (symbol < 0x100) {
25541c9b009Schristos symbol = (symbol << 1) |
25641c9b009Schristos lz_rd_decode_bit(rd, &bm[symbol]);
25741c9b009Schristos }
25841c9b009Schristos break;
25941c9b009Schristos }
26041c9b009Schristos }
26141c9b009Schristos return symbol & 0xFF;
26241c9b009Schristos }
26341c9b009Schristos
26441c9b009Schristos static unsigned
lz_rd_decode_len(struct lz_range_decoder * rd,struct lz_len_model * lm,int pos_state)26541c9b009Schristos lz_rd_decode_len(struct lz_range_decoder *rd, struct lz_len_model *lm,
26641c9b009Schristos int pos_state)
26741c9b009Schristos {
26841c9b009Schristos if (lz_rd_decode_bit(rd, &lm->choice1) == 0)
26941c9b009Schristos return lz_rd_decode_tree(rd, lm->bm_low[pos_state], LOW_BITS);
27041c9b009Schristos
27141c9b009Schristos if (lz_rd_decode_bit(rd, &lm->choice2) == 0) {
27241c9b009Schristos return LOW_SYMBOLS +
27341c9b009Schristos lz_rd_decode_tree(rd, lm->bm_mid[pos_state], MID_BITS);
27441c9b009Schristos }
27541c9b009Schristos
27641c9b009Schristos return LOW_SYMBOLS + MID_SYMBOLS +
27741c9b009Schristos lz_rd_decode_tree(rd, lm->bm_high, HIGH_BITS);
27841c9b009Schristos }
27941c9b009Schristos
28041c9b009Schristos struct lz_decoder {
28141c9b009Schristos FILE *fin, *fout;
28241c9b009Schristos off_t pos, ppos, spos, dict_size;
28341c9b009Schristos bool wrapped;
28441c9b009Schristos uint32_t crc;
28541c9b009Schristos uint8_t *obuf;
28641c9b009Schristos struct lz_range_decoder rdec;
28741c9b009Schristos };
28841c9b009Schristos
28941c9b009Schristos static int
lz_flush(struct lz_decoder * lz)29041c9b009Schristos lz_flush(struct lz_decoder *lz)
29141c9b009Schristos {
29241c9b009Schristos off_t offs = lz->pos - lz->spos;
29341c9b009Schristos if (offs <= 0)
29441c9b009Schristos return -1;
29541c9b009Schristos
29641c9b009Schristos size_t size = (size_t)offs;
29741c9b009Schristos lz_crc_update(&lz->crc, lz->obuf + lz->spos, size);
298f57e9384Schristos if (!tflag && fwrite(lz->obuf + lz->spos, 1, size, lz->fout) != size)
29941c9b009Schristos return -1;
30041c9b009Schristos
30141c9b009Schristos lz->wrapped = lz->pos >= lz->dict_size;
30241c9b009Schristos if (lz->wrapped) {
30341c9b009Schristos lz->ppos += lz->pos;
30441c9b009Schristos lz->pos = 0;
30541c9b009Schristos }
30641c9b009Schristos lz->spos = lz->pos;
30741c9b009Schristos return 0;
30841c9b009Schristos }
30941c9b009Schristos
31041c9b009Schristos static void
lz_destroy(struct lz_decoder * lz)31141c9b009Schristos lz_destroy(struct lz_decoder *lz)
31241c9b009Schristos {
31341c9b009Schristos if (lz->fin)
31441c9b009Schristos fclose(lz->fin);
31541c9b009Schristos if (lz->fout)
31641c9b009Schristos fclose(lz->fout);
31741c9b009Schristos free(lz->obuf);
31841c9b009Schristos }
31941c9b009Schristos
32041c9b009Schristos static int
lz_create(struct lz_decoder * lz,int fin,int fdout,int dict_size)32141c9b009Schristos lz_create(struct lz_decoder *lz, int fin, int fdout, int dict_size)
32241c9b009Schristos {
32341c9b009Schristos memset(lz, 0, sizeof(*lz));
32441c9b009Schristos
32541c9b009Schristos lz->fin = fdopen(dup(fin), "r");
32641c9b009Schristos if (lz->fin == NULL)
32741c9b009Schristos goto out;
32841c9b009Schristos
32941c9b009Schristos lz->fout = fdopen(dup(fdout), "w");
33041c9b009Schristos if (lz->fout == NULL)
33141c9b009Schristos goto out;
33241c9b009Schristos
33341c9b009Schristos lz->pos = lz->ppos = lz->spos = 0;
33441c9b009Schristos lz->crc = ~0;
33541c9b009Schristos lz->dict_size = dict_size;
33641c9b009Schristos lz->wrapped = false;
33741c9b009Schristos
33841c9b009Schristos lz->obuf = malloc(dict_size);
33941c9b009Schristos if (lz->obuf == NULL)
34041c9b009Schristos goto out;
34141c9b009Schristos
34241c9b009Schristos if (lz_rd_create(&lz->rdec, lz->fin) == -1)
34341c9b009Schristos goto out;
34441c9b009Schristos return 0;
34541c9b009Schristos out:
34641c9b009Schristos lz_destroy(lz);
34741c9b009Schristos return -1;
34841c9b009Schristos }
34941c9b009Schristos
35041c9b009Schristos static uint8_t
lz_peek(const struct lz_decoder * lz,unsigned ahead)35141c9b009Schristos lz_peek(const struct lz_decoder *lz, unsigned ahead)
35241c9b009Schristos {
35341c9b009Schristos off_t diff = lz->pos - ahead - 1;
35441c9b009Schristos
35541c9b009Schristos if (diff >= 0)
35641c9b009Schristos return lz->obuf[diff];
35741c9b009Schristos
35841c9b009Schristos if (lz->wrapped)
35941c9b009Schristos return lz->obuf[lz->dict_size + diff];
36041c9b009Schristos
36141c9b009Schristos return 0;
36241c9b009Schristos }
36341c9b009Schristos
36441c9b009Schristos static void
lz_put(struct lz_decoder * lz,uint8_t b)36541c9b009Schristos lz_put(struct lz_decoder *lz, uint8_t b)
36641c9b009Schristos {
36741c9b009Schristos lz->obuf[lz->pos++] = b;
36841c9b009Schristos if (lz->dict_size == lz->pos)
36941c9b009Schristos lz_flush(lz);
37041c9b009Schristos }
37141c9b009Schristos
37241c9b009Schristos static off_t
lz_get_data_position(const struct lz_decoder * lz)37341c9b009Schristos lz_get_data_position(const struct lz_decoder *lz)
37441c9b009Schristos {
37541c9b009Schristos return lz->ppos + lz->pos;
37641c9b009Schristos }
37741c9b009Schristos
37841c9b009Schristos static unsigned
lz_get_crc(const struct lz_decoder * lz)37941c9b009Schristos lz_get_crc(const struct lz_decoder *lz)
38041c9b009Schristos {
38141c9b009Schristos return lz->crc ^ 0xffffffffU;
38241c9b009Schristos }
38341c9b009Schristos
38441c9b009Schristos static void
lz_bm_init(int * a,size_t l)38541c9b009Schristos lz_bm_init(int *a, size_t l)
38641c9b009Schristos {
38741c9b009Schristos for (size_t i = 0; i < l; i++)
38841c9b009Schristos a[i] = BIT_MODEL_INIT;
38941c9b009Schristos }
39041c9b009Schristos
39141c9b009Schristos #define LZ_BM_INIT(a) lz_bm_init(a, __arraycount(a))
39241c9b009Schristos #define LZ_BM_INIT2(a) do { \
39341c9b009Schristos size_t l = __arraycount(a[0]); \
39441c9b009Schristos for (size_t i = 0; i < __arraycount(a); i++) \
39541c9b009Schristos lz_bm_init(a[i], l); \
396133e932aSrillig } while (0)
39741c9b009Schristos
39841c9b009Schristos #define LZ_MODEL_INIT(a) do { \
39941c9b009Schristos a.choice1 = BIT_MODEL_INIT; \
40041c9b009Schristos a.choice2 = BIT_MODEL_INIT; \
40141c9b009Schristos LZ_BM_INIT2(a.bm_low); \
40241c9b009Schristos LZ_BM_INIT2(a.bm_mid); \
40341c9b009Schristos LZ_BM_INIT(a.bm_high); \
404133e932aSrillig } while (0)
40541c9b009Schristos
40641c9b009Schristos static bool
lz_decode_member(struct lz_decoder * lz)40741c9b009Schristos lz_decode_member(struct lz_decoder *lz)
40841c9b009Schristos {
40941c9b009Schristos int bm_literal[1 << LITERAL_CONTEXT_BITS][0x300];
41041c9b009Schristos int bm_match[LZ_STATES][POS_STATES];
41141c9b009Schristos int bm_rep[4][LZ_STATES];
41241c9b009Schristos int bm_len[LZ_STATES][POS_STATES];
41341c9b009Schristos int bm_dis_slot[LZ_STATES][1 << DIS_SLOT_BITS];
41441c9b009Schristos int bm_dis[MODELED_DISTANCES - DIS_MODEL_END + 1];
41541c9b009Schristos int bm_align[DIS_ALIGN_SIZE];
41641c9b009Schristos
41741c9b009Schristos LZ_BM_INIT2(bm_literal);
41841c9b009Schristos LZ_BM_INIT2(bm_match);
41941c9b009Schristos LZ_BM_INIT2(bm_rep);
42041c9b009Schristos LZ_BM_INIT2(bm_len);
42141c9b009Schristos LZ_BM_INIT2(bm_dis_slot);
42241c9b009Schristos LZ_BM_INIT(bm_dis);
42341c9b009Schristos LZ_BM_INIT(bm_align);
42441c9b009Schristos
42541c9b009Schristos struct lz_len_model match_len_model;
42641c9b009Schristos struct lz_len_model rep_len_model;
42741c9b009Schristos
42841c9b009Schristos LZ_MODEL_INIT(match_len_model);
42941c9b009Schristos LZ_MODEL_INIT(rep_len_model);
43041c9b009Schristos
43141c9b009Schristos struct lz_range_decoder *rd = &lz->rdec;
43241c9b009Schristos unsigned rep[4] = { 0 };
43341c9b009Schristos
43441c9b009Schristos
43541c9b009Schristos int state = 0;
43641c9b009Schristos
43741c9b009Schristos while (!feof(lz->fin) && !ferror(lz->fin)) {
43841c9b009Schristos const int pos_state = lz_get_data_position(lz) & POS_STATE_MASK;
43941c9b009Schristos // bit 1
44041c9b009Schristos if (lz_rd_decode_bit(rd, &bm_match[state][pos_state]) == 0) {
44141c9b009Schristos const uint8_t prev_byte = lz_peek(lz, 0);
44241c9b009Schristos const int literal_state =
44341c9b009Schristos prev_byte >> (8 - LITERAL_CONTEXT_BITS);
44441c9b009Schristos int *bm = bm_literal[literal_state];
44541c9b009Schristos if (lz_st_is_char(state))
44641c9b009Schristos lz_put(lz, lz_rd_decode_tree(rd, bm, 8));
44741c9b009Schristos else {
44841c9b009Schristos int peek = lz_peek(lz, rep[0]);
44941c9b009Schristos lz_put(lz, lz_rd_decode_matched(rd, bm, peek));
45041c9b009Schristos }
45141c9b009Schristos state = lz_st_get_char(state);
45241c9b009Schristos continue;
45341c9b009Schristos }
45441c9b009Schristos int len;
45541c9b009Schristos // bit 2
45641c9b009Schristos if (lz_rd_decode_bit(rd, &bm_rep[0][state]) != 0) {
45741c9b009Schristos // bit 3
45841c9b009Schristos if (lz_rd_decode_bit(rd, &bm_rep[1][state]) == 0) {
45941c9b009Schristos // bit 4
46041c9b009Schristos if (lz_rd_decode_bit(rd,
46141c9b009Schristos &bm_len[state][pos_state]) == 0)
46241c9b009Schristos {
46341c9b009Schristos state = lz_st_get_short_rep(state);
46441c9b009Schristos lz_put(lz, lz_peek(lz, rep[0]));
46541c9b009Schristos continue;
46641c9b009Schristos }
46741c9b009Schristos } else {
46841c9b009Schristos unsigned distance;
46941c9b009Schristos // bit 4
47041c9b009Schristos if (lz_rd_decode_bit(rd, &bm_rep[2][state])
47141c9b009Schristos == 0)
47241c9b009Schristos distance = rep[1];
47341c9b009Schristos else {
47441c9b009Schristos // bit 5
47541c9b009Schristos if (lz_rd_decode_bit(rd,
47641c9b009Schristos &bm_rep[3][state]) == 0)
47741c9b009Schristos distance = rep[2];
47841c9b009Schristos else {
47941c9b009Schristos distance = rep[3];
48041c9b009Schristos rep[3] = rep[2];
48141c9b009Schristos }
48241c9b009Schristos rep[2] = rep[1];
48341c9b009Schristos }
48441c9b009Schristos rep[1] = rep[0];
48541c9b009Schristos rep[0] = distance;
48641c9b009Schristos }
48741c9b009Schristos state = lz_st_get_rep(state);
48841c9b009Schristos len = MIN_MATCH_LEN +
48941c9b009Schristos lz_rd_decode_len(rd, &rep_len_model, pos_state);
49041c9b009Schristos } else {
49141c9b009Schristos rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0];
49241c9b009Schristos len = MIN_MATCH_LEN +
49341c9b009Schristos lz_rd_decode_len(rd, &match_len_model, pos_state);
49441c9b009Schristos const int len_state =
49541c9b009Schristos MIN(len - MIN_MATCH_LEN, STATES - 1);
49641c9b009Schristos rep[0] = lz_rd_decode_tree(rd, bm_dis_slot[len_state],
49741c9b009Schristos DIS_SLOT_BITS);
49841c9b009Schristos if (rep[0] >= DIS_MODEL_START) {
49941c9b009Schristos const unsigned dis_slot = rep[0];
50041c9b009Schristos const int direct_bits = (dis_slot >> 1) - 1;
50141c9b009Schristos rep[0] = (2 | (dis_slot & 1)) << direct_bits;
50241c9b009Schristos if (dis_slot < DIS_MODEL_END)
50341c9b009Schristos rep[0] += lz_rd_decode_tree_reversed(rd,
50441c9b009Schristos &bm_dis[rep[0] - dis_slot],
50541c9b009Schristos direct_bits);
50641c9b009Schristos else {
50741c9b009Schristos rep[0] += lz_rd_decode(rd, direct_bits
50841c9b009Schristos - DIS_ALIGN_BITS) << DIS_ALIGN_BITS;
50941c9b009Schristos rep[0] += lz_rd_decode_tree_reversed(rd,
51041c9b009Schristos bm_align, DIS_ALIGN_BITS);
51141c9b009Schristos if (rep[0] == 0xFFFFFFFFU) {
51241c9b009Schristos lz_flush(lz);
51341c9b009Schristos return len == MIN_MATCH_LEN;
51441c9b009Schristos }
51541c9b009Schristos }
51641c9b009Schristos }
51741c9b009Schristos state = lz_st_get_match(state);
51841c9b009Schristos if (rep[0] >= lz->dict_size ||
51941c9b009Schristos (rep[0] >= lz->pos && !lz->wrapped)) {
52041c9b009Schristos lz_flush(lz);
52141c9b009Schristos return false;
52241c9b009Schristos }
52341c9b009Schristos }
52441c9b009Schristos for (int i = 0; i < len; i++)
52541c9b009Schristos lz_put(lz, lz_peek(lz, rep[0]));
52641c9b009Schristos }
52741c9b009Schristos lz_flush(lz);
52841c9b009Schristos return false;
52941c9b009Schristos }
53041c9b009Schristos
53141c9b009Schristos /*
53241c9b009Schristos * 0-3 CRC32 of the uncompressed data
53341c9b009Schristos * 4-11 size of the uncompressed data
53441c9b009Schristos * 12-19 member size including header and trailer
53541c9b009Schristos */
53641c9b009Schristos #define TRAILER_SIZE 20
53741c9b009Schristos
53841c9b009Schristos
53941c9b009Schristos static off_t
lz_decode(int fin,int fdout,unsigned dict_size,off_t * insize)540bc30e841Schristos lz_decode(int fin, int fdout, unsigned dict_size, off_t *insize)
54141c9b009Schristos {
54241c9b009Schristos struct lz_decoder lz;
54341c9b009Schristos off_t rv = -1;
54441c9b009Schristos
54541c9b009Schristos if (lz_create(&lz, fin, fdout, dict_size) == -1)
54641c9b009Schristos return -1;
54741c9b009Schristos
54841c9b009Schristos if (!lz_decode_member(&lz))
54941c9b009Schristos goto out;
55041c9b009Schristos
55141c9b009Schristos uint8_t trailer[TRAILER_SIZE];
55241c9b009Schristos
55341c9b009Schristos for(size_t i = 0; i < __arraycount(trailer); i++)
55441c9b009Schristos trailer[i] = (uint8_t)getc(lz.fin);
55541c9b009Schristos
55641c9b009Schristos unsigned crc = 0;
55741c9b009Schristos for (int i = 3; i >= 0; --i) {
55841c9b009Schristos crc <<= 8;
55941c9b009Schristos crc += trailer[i];
56041c9b009Schristos }
56141c9b009Schristos
56241c9b009Schristos int64_t data_size = 0;
56341c9b009Schristos for (int i = 11; i >= 4; --i) {
56441c9b009Schristos data_size <<= 8;
56541c9b009Schristos data_size += trailer[i];
56641c9b009Schristos }
56741c9b009Schristos
56841c9b009Schristos if (crc != lz_get_crc(&lz) || data_size != lz_get_data_position(&lz))
56941c9b009Schristos goto out;
57041c9b009Schristos
57141c9b009Schristos rv = 0;
57241c9b009Schristos for (int i = 19; i >= 12; --i) {
57341c9b009Schristos rv <<= 8;
57441c9b009Schristos rv += trailer[i];
57541c9b009Schristos }
576bc30e841Schristos if (insize)
577bc30e841Schristos *insize = rv;
5783b9b3869Schristos #if 0
5793b9b3869Schristos /* Does not work with pipes */
58041c9b009Schristos rv = ftello(lz.fout);
5813b9b3869Schristos #else
5823b9b3869Schristos rv = data_size;
5833b9b3869Schristos #endif
58441c9b009Schristos out:
58541c9b009Schristos lz_destroy(&lz);
58641c9b009Schristos return rv;
58741c9b009Schristos }
58841c9b009Schristos
58941c9b009Schristos
59041c9b009Schristos /*
59141c9b009Schristos * 0-3 magic
59241c9b009Schristos * 4 version
59341c9b009Schristos * 5 coded dict_size
59441c9b009Schristos */
59541c9b009Schristos #define HDR_SIZE 6
59641c9b009Schristos #define MIN_DICTIONARY_SIZE (1 << 12)
59741c9b009Schristos #define MAX_DICTIONARY_SIZE (1 << 29)
59841c9b009Schristos
59941c9b009Schristos static const char hdrmagic[] = { 'L', 'Z', 'I', 'P', 1 };
60041c9b009Schristos
60141c9b009Schristos static unsigned
lz_get_dict_size(unsigned char c)60241c9b009Schristos lz_get_dict_size(unsigned char c)
60341c9b009Schristos {
60441c9b009Schristos unsigned dict_size = 1 << (c & 0x1f);
605*5b4135cdSchristos dict_size -= (dict_size >> 4) * ( (c >> 5) & 0x7);
60641c9b009Schristos if (dict_size < MIN_DICTIONARY_SIZE || dict_size > MAX_DICTIONARY_SIZE)
60741c9b009Schristos return 0;
60841c9b009Schristos return dict_size;
60941c9b009Schristos }
61041c9b009Schristos
61141c9b009Schristos static off_t
unlz(int fin,int fout,char * pre,size_t prelen,off_t * bytes_in)61241c9b009Schristos unlz(int fin, int fout, char *pre, size_t prelen, off_t *bytes_in)
61341c9b009Schristos {
61441c9b009Schristos if (lz_crc[0] == 0)
61541c9b009Schristos lz_crc_init();
61641c9b009Schristos
61741c9b009Schristos char header[HDR_SIZE];
61841c9b009Schristos
619bc30e841Schristos if (pre && prelen)
620bc30e841Schristos memcpy(header, pre, prelen);
621bc30e841Schristos
622bc30e841Schristos ssize_t nr = read(fin, header + prelen, sizeof(header) - prelen);
623bc30e841Schristos switch (nr) {
624bc30e841Schristos case -1:
625bc30e841Schristos return -1;
626bc30e841Schristos case 0:
627bc30e841Schristos return prelen ? -1 : 0;
628bc30e841Schristos default:
629bc30e841Schristos if ((size_t)nr != sizeof(header) - prelen)
630bc30e841Schristos return -1;
631bc30e841Schristos break;
63241c9b009Schristos }
63341c9b009Schristos
63441c9b009Schristos if (memcmp(header, hdrmagic, sizeof(hdrmagic)) != 0)
63541c9b009Schristos return -1;
63641c9b009Schristos
63741c9b009Schristos unsigned dict_size = lz_get_dict_size(header[5]);
63841c9b009Schristos if (dict_size == 0)
63941c9b009Schristos return -1;
64041c9b009Schristos
641bc30e841Schristos return lz_decode(fin, fout, dict_size, bytes_in);
64241c9b009Schristos }
643