xref: /dflybsd-src/contrib/xz/src/liblzma/common/index_decoder.c (revision 46a2189dd86b644c3a76ac281d84b4182fd66b95)
12940b44dSPeter Avalos ///////////////////////////////////////////////////////////////////////////////
22940b44dSPeter Avalos //
32940b44dSPeter Avalos /// \file       index_decoder.c
42940b44dSPeter Avalos /// \brief      Decodes the Index field
52940b44dSPeter Avalos //
62940b44dSPeter Avalos //  Author:     Lasse Collin
72940b44dSPeter Avalos //
82940b44dSPeter Avalos //  This file has been put into the public domain.
92940b44dSPeter Avalos //  You can do whatever you want with this file.
102940b44dSPeter Avalos //
112940b44dSPeter Avalos ///////////////////////////////////////////////////////////////////////////////
122940b44dSPeter Avalos 
132940b44dSPeter Avalos #include "index.h"
142940b44dSPeter Avalos #include "check.h"
152940b44dSPeter Avalos 
162940b44dSPeter Avalos 
17*46a2189dSzrj typedef struct {
182940b44dSPeter Avalos 	enum {
192940b44dSPeter Avalos 		SEQ_INDICATOR,
202940b44dSPeter Avalos 		SEQ_COUNT,
212940b44dSPeter Avalos 		SEQ_MEMUSAGE,
222940b44dSPeter Avalos 		SEQ_UNPADDED,
232940b44dSPeter Avalos 		SEQ_UNCOMPRESSED,
242940b44dSPeter Avalos 		SEQ_PADDING_INIT,
252940b44dSPeter Avalos 		SEQ_PADDING,
262940b44dSPeter Avalos 		SEQ_CRC32,
272940b44dSPeter Avalos 	} sequence;
282940b44dSPeter Avalos 
292940b44dSPeter Avalos 	/// Memory usage limit
302940b44dSPeter Avalos 	uint64_t memlimit;
312940b44dSPeter Avalos 
322940b44dSPeter Avalos 	/// Target Index
332940b44dSPeter Avalos 	lzma_index *index;
342940b44dSPeter Avalos 
352940b44dSPeter Avalos 	/// Pointer give by the application, which is set after
362940b44dSPeter Avalos 	/// successful decoding.
372940b44dSPeter Avalos 	lzma_index **index_ptr;
382940b44dSPeter Avalos 
392940b44dSPeter Avalos 	/// Number of Records left to decode.
402940b44dSPeter Avalos 	lzma_vli count;
412940b44dSPeter Avalos 
422940b44dSPeter Avalos 	/// The most recent Unpadded Size field
432940b44dSPeter Avalos 	lzma_vli unpadded_size;
442940b44dSPeter Avalos 
452940b44dSPeter Avalos 	/// The most recent Uncompressed Size field
462940b44dSPeter Avalos 	lzma_vli uncompressed_size;
472940b44dSPeter Avalos 
482940b44dSPeter Avalos 	/// Position in integers
492940b44dSPeter Avalos 	size_t pos;
502940b44dSPeter Avalos 
512940b44dSPeter Avalos 	/// CRC32 of the List of Records field
522940b44dSPeter Avalos 	uint32_t crc32;
53*46a2189dSzrj } lzma_index_coder;
542940b44dSPeter Avalos 
552940b44dSPeter Avalos 
562940b44dSPeter Avalos static lzma_ret
index_decode(void * coder_ptr,const lzma_allocator * allocator,const uint8_t * restrict in,size_t * restrict in_pos,size_t in_size,uint8_t * restrict out lzma_attribute ((__unused__)),size_t * restrict out_pos lzma_attribute ((__unused__)),size_t out_size lzma_attribute ((__unused__)),lzma_action action lzma_attribute ((__unused__)))57*46a2189dSzrj index_decode(void *coder_ptr, const lzma_allocator *allocator,
582940b44dSPeter Avalos 		const uint8_t *restrict in, size_t *restrict in_pos,
59114db65bSPeter Avalos 		size_t in_size,
60114db65bSPeter Avalos 		uint8_t *restrict out lzma_attribute((__unused__)),
61114db65bSPeter Avalos 		size_t *restrict out_pos lzma_attribute((__unused__)),
62114db65bSPeter Avalos 		size_t out_size lzma_attribute((__unused__)),
63114db65bSPeter Avalos 		lzma_action action lzma_attribute((__unused__)))
642940b44dSPeter Avalos {
65*46a2189dSzrj 	lzma_index_coder *coder = coder_ptr;
66*46a2189dSzrj 
672940b44dSPeter Avalos 	// Similar optimization as in index_encoder.c
682940b44dSPeter Avalos 	const size_t in_start = *in_pos;
692940b44dSPeter Avalos 	lzma_ret ret = LZMA_OK;
702940b44dSPeter Avalos 
712940b44dSPeter Avalos 	while (*in_pos < in_size)
722940b44dSPeter Avalos 	switch (coder->sequence) {
732940b44dSPeter Avalos 	case SEQ_INDICATOR:
742940b44dSPeter Avalos 		// Return LZMA_DATA_ERROR instead of e.g. LZMA_PROG_ERROR or
752940b44dSPeter Avalos 		// LZMA_FORMAT_ERROR, because a typical usage case for Index
762940b44dSPeter Avalos 		// decoder is when parsing the Stream backwards. If seeking
772940b44dSPeter Avalos 		// backward from the Stream Footer gives us something that
782940b44dSPeter Avalos 		// doesn't begin with Index Indicator, the file is considered
792940b44dSPeter Avalos 		// corrupt, not "programming error" or "unrecognized file
802940b44dSPeter Avalos 		// format". One could argue that the application should
812940b44dSPeter Avalos 		// verify the Index Indicator before trying to decode the
822940b44dSPeter Avalos 		// Index, but well, I suppose it is simpler this way.
832940b44dSPeter Avalos 		if (in[(*in_pos)++] != 0x00)
842940b44dSPeter Avalos 			return LZMA_DATA_ERROR;
852940b44dSPeter Avalos 
862940b44dSPeter Avalos 		coder->sequence = SEQ_COUNT;
872940b44dSPeter Avalos 		break;
882940b44dSPeter Avalos 
892940b44dSPeter Avalos 	case SEQ_COUNT:
902940b44dSPeter Avalos 		ret = lzma_vli_decode(&coder->count, &coder->pos,
912940b44dSPeter Avalos 				in, in_pos, in_size);
922940b44dSPeter Avalos 		if (ret != LZMA_STREAM_END)
932940b44dSPeter Avalos 			goto out;
942940b44dSPeter Avalos 
952940b44dSPeter Avalos 		coder->pos = 0;
962940b44dSPeter Avalos 		coder->sequence = SEQ_MEMUSAGE;
972940b44dSPeter Avalos 
982940b44dSPeter Avalos 	// Fall through
992940b44dSPeter Avalos 
1002940b44dSPeter Avalos 	case SEQ_MEMUSAGE:
1012940b44dSPeter Avalos 		if (lzma_index_memusage(1, coder->count) > coder->memlimit) {
1022940b44dSPeter Avalos 			ret = LZMA_MEMLIMIT_ERROR;
1032940b44dSPeter Avalos 			goto out;
1042940b44dSPeter Avalos 		}
1052940b44dSPeter Avalos 
1062940b44dSPeter Avalos 		// Tell the Index handling code how many Records this
1072940b44dSPeter Avalos 		// Index has to allow it to allocate memory more efficiently.
1082940b44dSPeter Avalos 		lzma_index_prealloc(coder->index, coder->count);
1092940b44dSPeter Avalos 
1102940b44dSPeter Avalos 		ret = LZMA_OK;
1112940b44dSPeter Avalos 		coder->sequence = coder->count == 0
1122940b44dSPeter Avalos 				? SEQ_PADDING_INIT : SEQ_UNPADDED;
1132940b44dSPeter Avalos 		break;
1142940b44dSPeter Avalos 
1152940b44dSPeter Avalos 	case SEQ_UNPADDED:
1162940b44dSPeter Avalos 	case SEQ_UNCOMPRESSED: {
1172940b44dSPeter Avalos 		lzma_vli *size = coder->sequence == SEQ_UNPADDED
1182940b44dSPeter Avalos 				? &coder->unpadded_size
1192940b44dSPeter Avalos 				: &coder->uncompressed_size;
1202940b44dSPeter Avalos 
1212940b44dSPeter Avalos 		ret = lzma_vli_decode(size, &coder->pos,
1222940b44dSPeter Avalos 				in, in_pos, in_size);
1232940b44dSPeter Avalos 		if (ret != LZMA_STREAM_END)
1242940b44dSPeter Avalos 			goto out;
1252940b44dSPeter Avalos 
1262940b44dSPeter Avalos 		ret = LZMA_OK;
1272940b44dSPeter Avalos 		coder->pos = 0;
1282940b44dSPeter Avalos 
1292940b44dSPeter Avalos 		if (coder->sequence == SEQ_UNPADDED) {
1302940b44dSPeter Avalos 			// Validate that encoded Unpadded Size isn't too small
1312940b44dSPeter Avalos 			// or too big.
1322940b44dSPeter Avalos 			if (coder->unpadded_size < UNPADDED_SIZE_MIN
1332940b44dSPeter Avalos 					|| coder->unpadded_size
1342940b44dSPeter Avalos 						> UNPADDED_SIZE_MAX)
1352940b44dSPeter Avalos 				return LZMA_DATA_ERROR;
1362940b44dSPeter Avalos 
1372940b44dSPeter Avalos 			coder->sequence = SEQ_UNCOMPRESSED;
1382940b44dSPeter Avalos 		} else {
1392940b44dSPeter Avalos 			// Add the decoded Record to the Index.
1402940b44dSPeter Avalos 			return_if_error(lzma_index_append(
1412940b44dSPeter Avalos 					coder->index, allocator,
1422940b44dSPeter Avalos 					coder->unpadded_size,
1432940b44dSPeter Avalos 					coder->uncompressed_size));
1442940b44dSPeter Avalos 
1452940b44dSPeter Avalos 			// Check if this was the last Record.
1462940b44dSPeter Avalos 			coder->sequence = --coder->count == 0
1472940b44dSPeter Avalos 					? SEQ_PADDING_INIT
1482940b44dSPeter Avalos 					: SEQ_UNPADDED;
1492940b44dSPeter Avalos 		}
1502940b44dSPeter Avalos 
1512940b44dSPeter Avalos 		break;
1522940b44dSPeter Avalos 	}
1532940b44dSPeter Avalos 
1542940b44dSPeter Avalos 	case SEQ_PADDING_INIT:
1552940b44dSPeter Avalos 		coder->pos = lzma_index_padding_size(coder->index);
1562940b44dSPeter Avalos 		coder->sequence = SEQ_PADDING;
1572940b44dSPeter Avalos 
1582940b44dSPeter Avalos 	// Fall through
1592940b44dSPeter Avalos 
1602940b44dSPeter Avalos 	case SEQ_PADDING:
1612940b44dSPeter Avalos 		if (coder->pos > 0) {
1622940b44dSPeter Avalos 			--coder->pos;
1632940b44dSPeter Avalos 			if (in[(*in_pos)++] != 0x00)
1642940b44dSPeter Avalos 				return LZMA_DATA_ERROR;
1652940b44dSPeter Avalos 
1662940b44dSPeter Avalos 			break;
1672940b44dSPeter Avalos 		}
1682940b44dSPeter Avalos 
1692940b44dSPeter Avalos 		// Finish the CRC32 calculation.
1702940b44dSPeter Avalos 		coder->crc32 = lzma_crc32(in + in_start,
1712940b44dSPeter Avalos 				*in_pos - in_start, coder->crc32);
1722940b44dSPeter Avalos 
1732940b44dSPeter Avalos 		coder->sequence = SEQ_CRC32;
1742940b44dSPeter Avalos 
1752940b44dSPeter Avalos 	// Fall through
1762940b44dSPeter Avalos 
1772940b44dSPeter Avalos 	case SEQ_CRC32:
1782940b44dSPeter Avalos 		do {
1792940b44dSPeter Avalos 			if (*in_pos == in_size)
1802940b44dSPeter Avalos 				return LZMA_OK;
1812940b44dSPeter Avalos 
1822940b44dSPeter Avalos 			if (((coder->crc32 >> (coder->pos * 8)) & 0xFF)
1832940b44dSPeter Avalos 					!= in[(*in_pos)++])
1842940b44dSPeter Avalos 				return LZMA_DATA_ERROR;
1852940b44dSPeter Avalos 
1862940b44dSPeter Avalos 		} while (++coder->pos < 4);
1872940b44dSPeter Avalos 
1882940b44dSPeter Avalos 		// Decoding was successful, now we can let the application
1892940b44dSPeter Avalos 		// see the decoded Index.
1902940b44dSPeter Avalos 		*coder->index_ptr = coder->index;
1912940b44dSPeter Avalos 
1922940b44dSPeter Avalos 		// Make index NULL so we don't free it unintentionally.
1932940b44dSPeter Avalos 		coder->index = NULL;
1942940b44dSPeter Avalos 
1952940b44dSPeter Avalos 		return LZMA_STREAM_END;
1962940b44dSPeter Avalos 
1972940b44dSPeter Avalos 	default:
1982940b44dSPeter Avalos 		assert(0);
1992940b44dSPeter Avalos 		return LZMA_PROG_ERROR;
2002940b44dSPeter Avalos 	}
2012940b44dSPeter Avalos 
2022940b44dSPeter Avalos out:
2032940b44dSPeter Avalos 	// Update the CRC32,
2042940b44dSPeter Avalos 	coder->crc32 = lzma_crc32(in + in_start,
2052940b44dSPeter Avalos 			*in_pos - in_start, coder->crc32);
2062940b44dSPeter Avalos 
2072940b44dSPeter Avalos 	return ret;
2082940b44dSPeter Avalos }
2092940b44dSPeter Avalos 
2102940b44dSPeter Avalos 
2112940b44dSPeter Avalos static void
index_decoder_end(void * coder_ptr,const lzma_allocator * allocator)212*46a2189dSzrj index_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
2132940b44dSPeter Avalos {
214*46a2189dSzrj 	lzma_index_coder *coder = coder_ptr;
2152940b44dSPeter Avalos 	lzma_index_end(coder->index, allocator);
2162940b44dSPeter Avalos 	lzma_free(coder, allocator);
2172940b44dSPeter Avalos 	return;
2182940b44dSPeter Avalos }
2192940b44dSPeter Avalos 
2202940b44dSPeter Avalos 
2212940b44dSPeter Avalos static lzma_ret
index_decoder_memconfig(void * coder_ptr,uint64_t * memusage,uint64_t * old_memlimit,uint64_t new_memlimit)222*46a2189dSzrj index_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
2232940b44dSPeter Avalos 		uint64_t *old_memlimit, uint64_t new_memlimit)
2242940b44dSPeter Avalos {
225*46a2189dSzrj 	lzma_index_coder *coder = coder_ptr;
226*46a2189dSzrj 
2272940b44dSPeter Avalos 	*memusage = lzma_index_memusage(1, coder->count);
2282940b44dSPeter Avalos 	*old_memlimit = coder->memlimit;
2292940b44dSPeter Avalos 
2302940b44dSPeter Avalos 	if (new_memlimit != 0) {
2312940b44dSPeter Avalos 		if (new_memlimit < *memusage)
2322940b44dSPeter Avalos 			return LZMA_MEMLIMIT_ERROR;
2332940b44dSPeter Avalos 
2342940b44dSPeter Avalos 		coder->memlimit = new_memlimit;
2352940b44dSPeter Avalos 	}
2362940b44dSPeter Avalos 
2372940b44dSPeter Avalos 	return LZMA_OK;
2382940b44dSPeter Avalos }
2392940b44dSPeter Avalos 
2402940b44dSPeter Avalos 
2412940b44dSPeter Avalos static lzma_ret
index_decoder_reset(lzma_index_coder * coder,const lzma_allocator * allocator,lzma_index ** i,uint64_t memlimit)242*46a2189dSzrj index_decoder_reset(lzma_index_coder *coder, const lzma_allocator *allocator,
2432940b44dSPeter Avalos 		lzma_index **i, uint64_t memlimit)
2442940b44dSPeter Avalos {
2452940b44dSPeter Avalos 	// Remember the pointer given by the application. We will set it
2462940b44dSPeter Avalos 	// to point to the decoded Index only if decoding is successful.
2472940b44dSPeter Avalos 	// Before that, keep it NULL so that applications can always safely
2482940b44dSPeter Avalos 	// pass it to lzma_index_end() no matter did decoding succeed or not.
2492940b44dSPeter Avalos 	coder->index_ptr = i;
2502940b44dSPeter Avalos 	*i = NULL;
2512940b44dSPeter Avalos 
2522940b44dSPeter Avalos 	// We always allocate a new lzma_index.
2532940b44dSPeter Avalos 	coder->index = lzma_index_init(allocator);
2542940b44dSPeter Avalos 	if (coder->index == NULL)
2552940b44dSPeter Avalos 		return LZMA_MEM_ERROR;
2562940b44dSPeter Avalos 
2572940b44dSPeter Avalos 	// Initialize the rest.
2582940b44dSPeter Avalos 	coder->sequence = SEQ_INDICATOR;
259*46a2189dSzrj 	coder->memlimit = my_max(1, memlimit);
2602940b44dSPeter Avalos 	coder->count = 0; // Needs to be initialized due to _memconfig().
2612940b44dSPeter Avalos 	coder->pos = 0;
2622940b44dSPeter Avalos 	coder->crc32 = 0;
2632940b44dSPeter Avalos 
2642940b44dSPeter Avalos 	return LZMA_OK;
2652940b44dSPeter Avalos }
2662940b44dSPeter Avalos 
2672940b44dSPeter Avalos 
2682940b44dSPeter Avalos static lzma_ret
index_decoder_init(lzma_next_coder * next,const lzma_allocator * allocator,lzma_index ** i,uint64_t memlimit)26915ab8c86SJohn Marino index_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
2702940b44dSPeter Avalos 		lzma_index **i, uint64_t memlimit)
2712940b44dSPeter Avalos {
2722940b44dSPeter Avalos 	lzma_next_coder_init(&index_decoder_init, next, allocator);
2732940b44dSPeter Avalos 
274*46a2189dSzrj 	if (i == NULL)
2752940b44dSPeter Avalos 		return LZMA_PROG_ERROR;
2762940b44dSPeter Avalos 
277*46a2189dSzrj 	lzma_index_coder *coder = next->coder;
278*46a2189dSzrj 	if (coder == NULL) {
279*46a2189dSzrj 		coder = lzma_alloc(sizeof(lzma_index_coder), allocator);
280*46a2189dSzrj 		if (coder == NULL)
2812940b44dSPeter Avalos 			return LZMA_MEM_ERROR;
2822940b44dSPeter Avalos 
283*46a2189dSzrj 		next->coder = coder;
2842940b44dSPeter Avalos 		next->code = &index_decode;
2852940b44dSPeter Avalos 		next->end = &index_decoder_end;
2862940b44dSPeter Avalos 		next->memconfig = &index_decoder_memconfig;
287*46a2189dSzrj 		coder->index = NULL;
2882940b44dSPeter Avalos 	} else {
289*46a2189dSzrj 		lzma_index_end(coder->index, allocator);
2902940b44dSPeter Avalos 	}
2912940b44dSPeter Avalos 
292*46a2189dSzrj 	return index_decoder_reset(coder, allocator, i, memlimit);
2932940b44dSPeter Avalos }
2942940b44dSPeter Avalos 
2952940b44dSPeter Avalos 
2962940b44dSPeter Avalos extern LZMA_API(lzma_ret)
lzma_index_decoder(lzma_stream * strm,lzma_index ** i,uint64_t memlimit)2972940b44dSPeter Avalos lzma_index_decoder(lzma_stream *strm, lzma_index **i, uint64_t memlimit)
2982940b44dSPeter Avalos {
2992940b44dSPeter Avalos 	lzma_next_strm_init(index_decoder_init, strm, i, memlimit);
3002940b44dSPeter Avalos 
3012940b44dSPeter Avalos 	strm->internal->supported_actions[LZMA_RUN] = true;
3022940b44dSPeter Avalos 	strm->internal->supported_actions[LZMA_FINISH] = true;
3032940b44dSPeter Avalos 
3042940b44dSPeter Avalos 	return LZMA_OK;
3052940b44dSPeter Avalos }
3062940b44dSPeter Avalos 
3072940b44dSPeter Avalos 
3082940b44dSPeter Avalos extern LZMA_API(lzma_ret)
lzma_index_buffer_decode(lzma_index ** i,uint64_t * memlimit,const lzma_allocator * allocator,const uint8_t * in,size_t * in_pos,size_t in_size)30915ab8c86SJohn Marino lzma_index_buffer_decode(lzma_index **i, uint64_t *memlimit,
31015ab8c86SJohn Marino 		const lzma_allocator *allocator,
3112940b44dSPeter Avalos 		const uint8_t *in, size_t *in_pos, size_t in_size)
3122940b44dSPeter Avalos {
3132940b44dSPeter Avalos 	// Sanity checks
3142940b44dSPeter Avalos 	if (i == NULL || memlimit == NULL
3152940b44dSPeter Avalos 			|| in == NULL || in_pos == NULL || *in_pos > in_size)
3162940b44dSPeter Avalos 		return LZMA_PROG_ERROR;
3172940b44dSPeter Avalos 
3182940b44dSPeter Avalos 	// Initialize the decoder.
319*46a2189dSzrj 	lzma_index_coder coder;
3202940b44dSPeter Avalos 	return_if_error(index_decoder_reset(&coder, allocator, i, *memlimit));
3212940b44dSPeter Avalos 
3222940b44dSPeter Avalos 	// Store the input start position so that we can restore it in case
3232940b44dSPeter Avalos 	// of an error.
3242940b44dSPeter Avalos 	const size_t in_start = *in_pos;
3252940b44dSPeter Avalos 
3262940b44dSPeter Avalos 	// Do the actual decoding.
3272940b44dSPeter Avalos 	lzma_ret ret = index_decode(&coder, allocator, in, in_pos, in_size,
3282940b44dSPeter Avalos 			NULL, NULL, 0, LZMA_RUN);
3292940b44dSPeter Avalos 
3302940b44dSPeter Avalos 	if (ret == LZMA_STREAM_END) {
3312940b44dSPeter Avalos 		ret = LZMA_OK;
3322940b44dSPeter Avalos 	} else {
3332940b44dSPeter Avalos 		// Something went wrong, free the Index structure and restore
3342940b44dSPeter Avalos 		// the input position.
3352940b44dSPeter Avalos 		lzma_index_end(coder.index, allocator);
3362940b44dSPeter Avalos 		*in_pos = in_start;
3372940b44dSPeter Avalos 
3382940b44dSPeter Avalos 		if (ret == LZMA_OK) {
3392940b44dSPeter Avalos 			// The input is truncated or otherwise corrupt.
3402940b44dSPeter Avalos 			// Use LZMA_DATA_ERROR instead of LZMA_BUF_ERROR
3412940b44dSPeter Avalos 			// like lzma_vli_decode() does in single-call mode.
3422940b44dSPeter Avalos 			ret = LZMA_DATA_ERROR;
3432940b44dSPeter Avalos 
3442940b44dSPeter Avalos 		} else if (ret == LZMA_MEMLIMIT_ERROR) {
3452940b44dSPeter Avalos 			// Tell the caller how much memory would have
3462940b44dSPeter Avalos 			// been needed.
3472940b44dSPeter Avalos 			*memlimit = lzma_index_memusage(1, coder.count);
3482940b44dSPeter Avalos 		}
3492940b44dSPeter Avalos 	}
3502940b44dSPeter Avalos 
3512940b44dSPeter Avalos 	return ret;
3522940b44dSPeter Avalos }
353