xref: /freebsd-src/contrib/xz/src/liblzma/common/microlzma_decoder.c (revision 3b35e7ee8de9b0260149a2b77e87a2b9c7a36244)
1*3b35e7eeSXin LI // SPDX-License-Identifier: 0BSD
2*3b35e7eeSXin LI 
373ed8e77SXin LI ///////////////////////////////////////////////////////////////////////////////
473ed8e77SXin LI //
573ed8e77SXin LI /// \file       microlzma_decoder.c
673ed8e77SXin LI /// \brief      Decode MicroLZMA format
773ed8e77SXin LI //
873ed8e77SXin LI //  Author:     Lasse Collin
973ed8e77SXin LI //
1073ed8e77SXin LI ///////////////////////////////////////////////////////////////////////////////
1173ed8e77SXin LI 
1273ed8e77SXin LI #include "lzma_decoder.h"
1373ed8e77SXin LI #include "lz_decoder.h"
1473ed8e77SXin LI 
1573ed8e77SXin LI 
1673ed8e77SXin LI typedef struct {
1773ed8e77SXin LI 	/// LZMA1 decoder
1873ed8e77SXin LI 	lzma_next_coder lzma;
1973ed8e77SXin LI 
2073ed8e77SXin LI 	/// Compressed size of the stream as given by the application.
2173ed8e77SXin LI 	/// This must be exactly correct.
2273ed8e77SXin LI 	///
2373ed8e77SXin LI 	/// This will be decremented when input is read.
2473ed8e77SXin LI 	uint64_t comp_size;
2573ed8e77SXin LI 
2673ed8e77SXin LI 	/// Uncompressed size of the stream as given by the application.
2773ed8e77SXin LI 	/// This may be less than the actual uncompressed size if
2873ed8e77SXin LI 	/// uncomp_size_is_exact is false.
2973ed8e77SXin LI 	///
3073ed8e77SXin LI 	/// This will be decremented when output is produced.
3173ed8e77SXin LI 	lzma_vli uncomp_size;
3273ed8e77SXin LI 
3373ed8e77SXin LI 	/// LZMA dictionary size as given by the application
3473ed8e77SXin LI 	uint32_t dict_size;
3573ed8e77SXin LI 
3673ed8e77SXin LI 	/// If true, the exact uncompressed size is known. If false,
3773ed8e77SXin LI 	/// uncomp_size may be smaller than the real uncompressed size;
3873ed8e77SXin LI 	/// uncomp_size may never be bigger than the real uncompressed size.
3973ed8e77SXin LI 	bool uncomp_size_is_exact;
4073ed8e77SXin LI 
4173ed8e77SXin LI 	/// True once the first byte of the MicroLZMA stream
4273ed8e77SXin LI 	/// has been processed.
4373ed8e77SXin LI 	bool props_decoded;
4473ed8e77SXin LI } lzma_microlzma_coder;
4573ed8e77SXin LI 
4673ed8e77SXin LI 
4773ed8e77SXin LI static lzma_ret
microlzma_decode(void * coder_ptr,const lzma_allocator * allocator,const uint8_t * restrict in,size_t * restrict in_pos,size_t in_size,uint8_t * restrict out,size_t * restrict out_pos,size_t out_size,lzma_action action)4873ed8e77SXin LI microlzma_decode(void *coder_ptr, const lzma_allocator *allocator,
4973ed8e77SXin LI 		const uint8_t *restrict in, size_t *restrict in_pos,
5073ed8e77SXin LI 		size_t in_size, uint8_t *restrict out,
5173ed8e77SXin LI 		size_t *restrict out_pos, size_t out_size, lzma_action action)
5273ed8e77SXin LI {
5373ed8e77SXin LI 	lzma_microlzma_coder *coder = coder_ptr;
5473ed8e77SXin LI 
5573ed8e77SXin LI 	// Remember the in start position so that we can update comp_size.
5673ed8e77SXin LI 	const size_t in_start = *in_pos;
5773ed8e77SXin LI 
5873ed8e77SXin LI 	// Remember the out start position so that we can update uncomp_size.
5973ed8e77SXin LI 	const size_t out_start = *out_pos;
6073ed8e77SXin LI 
6173ed8e77SXin LI 	// Limit the amount of input so that the decoder won't read more than
6273ed8e77SXin LI 	// comp_size. This is required when uncomp_size isn't exact because
6373ed8e77SXin LI 	// in that case the LZMA decoder will try to decode more input even
6473ed8e77SXin LI 	// when it has no output space (it can be looking for EOPM).
6573ed8e77SXin LI 	if (in_size - *in_pos > coder->comp_size)
6673ed8e77SXin LI 		in_size = *in_pos + (size_t)(coder->comp_size);
6773ed8e77SXin LI 
6873ed8e77SXin LI 	// When the exact uncompressed size isn't known, we must limit
6973ed8e77SXin LI 	// the available output space to prevent the LZMA decoder from
7073ed8e77SXin LI 	// trying to decode too much.
7173ed8e77SXin LI 	if (!coder->uncomp_size_is_exact
7273ed8e77SXin LI 			&& out_size - *out_pos > coder->uncomp_size)
7373ed8e77SXin LI 		out_size = *out_pos + (size_t)(coder->uncomp_size);
7473ed8e77SXin LI 
7573ed8e77SXin LI 	if (!coder->props_decoded) {
7673ed8e77SXin LI 		// There must be at least one byte of input to decode
7773ed8e77SXin LI 		// the properties byte.
7873ed8e77SXin LI 		if (*in_pos >= in_size)
7973ed8e77SXin LI 			return LZMA_OK;
8073ed8e77SXin LI 
8173ed8e77SXin LI 		lzma_options_lzma options = {
8273ed8e77SXin LI 			.dict_size = coder->dict_size,
8373ed8e77SXin LI 			.preset_dict = NULL,
8473ed8e77SXin LI 			.preset_dict_size = 0,
8573ed8e77SXin LI 			.ext_flags = 0, // EOPM not allowed when size is known
8673ed8e77SXin LI 			.ext_size_low = UINT32_MAX, // Unknown size by default
8773ed8e77SXin LI 			.ext_size_high = UINT32_MAX,
8873ed8e77SXin LI 		};
8973ed8e77SXin LI 
9073ed8e77SXin LI 		if (coder->uncomp_size_is_exact)
9173ed8e77SXin LI 			lzma_set_ext_size(options, coder->uncomp_size);
9273ed8e77SXin LI 
9373ed8e77SXin LI 		// The properties are stored as bitwise-negation
9473ed8e77SXin LI 		// of the typical encoding.
9573ed8e77SXin LI 		if (lzma_lzma_lclppb_decode(&options, ~in[*in_pos]))
9673ed8e77SXin LI 			return LZMA_OPTIONS_ERROR;
9773ed8e77SXin LI 
9873ed8e77SXin LI 		++*in_pos;
9973ed8e77SXin LI 
10073ed8e77SXin LI 		// Initialize the decoder.
10173ed8e77SXin LI 		lzma_filter_info filters[2] = {
10273ed8e77SXin LI 			{
10373ed8e77SXin LI 				.id = LZMA_FILTER_LZMA1EXT,
10473ed8e77SXin LI 				.init = &lzma_lzma_decoder_init,
10573ed8e77SXin LI 				.options = &options,
10673ed8e77SXin LI 			}, {
10773ed8e77SXin LI 				.init = NULL,
10873ed8e77SXin LI 			}
10973ed8e77SXin LI 		};
11073ed8e77SXin LI 
11173ed8e77SXin LI 		return_if_error(lzma_next_filter_init(&coder->lzma,
11273ed8e77SXin LI 				allocator, filters));
11373ed8e77SXin LI 
11473ed8e77SXin LI 		// Pass one dummy 0x00 byte to the LZMA decoder since that
11573ed8e77SXin LI 		// is what it expects the first byte to be.
11673ed8e77SXin LI 		const uint8_t dummy_in = 0;
11773ed8e77SXin LI 		size_t dummy_in_pos = 0;
11873ed8e77SXin LI 		if (coder->lzma.code(coder->lzma.coder, allocator,
11973ed8e77SXin LI 				&dummy_in, &dummy_in_pos, 1,
12073ed8e77SXin LI 				out, out_pos, out_size, LZMA_RUN) != LZMA_OK)
12173ed8e77SXin LI 			return LZMA_PROG_ERROR;
12273ed8e77SXin LI 
12373ed8e77SXin LI 		assert(dummy_in_pos == 1);
12473ed8e77SXin LI 		coder->props_decoded = true;
12573ed8e77SXin LI 	}
12673ed8e77SXin LI 
12773ed8e77SXin LI 	// The rest is normal LZMA decoding.
12873ed8e77SXin LI 	lzma_ret ret = coder->lzma.code(coder->lzma.coder, allocator,
12973ed8e77SXin LI 				in, in_pos, in_size,
13073ed8e77SXin LI 				out, out_pos, out_size, action);
13173ed8e77SXin LI 
13273ed8e77SXin LI 	// Update the remaining compressed size.
13373ed8e77SXin LI 	assert(coder->comp_size >= *in_pos - in_start);
13473ed8e77SXin LI 	coder->comp_size -= *in_pos - in_start;
13573ed8e77SXin LI 
13673ed8e77SXin LI 	if (coder->uncomp_size_is_exact) {
13773ed8e77SXin LI 		// After successful decompression of the complete stream
13873ed8e77SXin LI 		// the compressed size must match.
13973ed8e77SXin LI 		if (ret == LZMA_STREAM_END && coder->comp_size != 0)
14073ed8e77SXin LI 			ret = LZMA_DATA_ERROR;
14173ed8e77SXin LI 	} else {
14273ed8e77SXin LI 		// Update the amount of output remaining.
14373ed8e77SXin LI 		assert(coder->uncomp_size >= *out_pos - out_start);
14473ed8e77SXin LI 		coder->uncomp_size -= *out_pos - out_start;
14573ed8e77SXin LI 
14673ed8e77SXin LI 		// - We must not get LZMA_STREAM_END because the stream
14773ed8e77SXin LI 		//   shouldn't have EOPM.
14873ed8e77SXin LI 		// - We must use uncomp_size to determine when to
14973ed8e77SXin LI 		//   return LZMA_STREAM_END.
15073ed8e77SXin LI 		if (ret == LZMA_STREAM_END)
15173ed8e77SXin LI 			ret = LZMA_DATA_ERROR;
15273ed8e77SXin LI 		else if (coder->uncomp_size == 0)
15373ed8e77SXin LI 			ret = LZMA_STREAM_END;
15473ed8e77SXin LI 	}
15573ed8e77SXin LI 
15673ed8e77SXin LI 	return ret;
15773ed8e77SXin LI }
15873ed8e77SXin LI 
15973ed8e77SXin LI 
16073ed8e77SXin LI static void
microlzma_decoder_end(void * coder_ptr,const lzma_allocator * allocator)16173ed8e77SXin LI microlzma_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
16273ed8e77SXin LI {
16373ed8e77SXin LI 	lzma_microlzma_coder *coder = coder_ptr;
16473ed8e77SXin LI 	lzma_next_end(&coder->lzma, allocator);
16573ed8e77SXin LI 	lzma_free(coder, allocator);
16673ed8e77SXin LI 	return;
16773ed8e77SXin LI }
16873ed8e77SXin LI 
16973ed8e77SXin LI 
17073ed8e77SXin LI static lzma_ret
microlzma_decoder_init(lzma_next_coder * next,const lzma_allocator * allocator,uint64_t comp_size,uint64_t uncomp_size,bool uncomp_size_is_exact,uint32_t dict_size)17173ed8e77SXin LI microlzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
17273ed8e77SXin LI 		uint64_t comp_size,
17373ed8e77SXin LI 		uint64_t uncomp_size, bool uncomp_size_is_exact,
17473ed8e77SXin LI 		uint32_t dict_size)
17573ed8e77SXin LI {
17673ed8e77SXin LI 	lzma_next_coder_init(&microlzma_decoder_init, next, allocator);
17773ed8e77SXin LI 
17873ed8e77SXin LI 	lzma_microlzma_coder *coder = next->coder;
17973ed8e77SXin LI 
18073ed8e77SXin LI 	if (coder == NULL) {
18173ed8e77SXin LI 		coder = lzma_alloc(sizeof(lzma_microlzma_coder), allocator);
18273ed8e77SXin LI 		if (coder == NULL)
18373ed8e77SXin LI 			return LZMA_MEM_ERROR;
18473ed8e77SXin LI 
18573ed8e77SXin LI 		next->coder = coder;
18673ed8e77SXin LI 		next->code = &microlzma_decode;
18773ed8e77SXin LI 		next->end = &microlzma_decoder_end;
18873ed8e77SXin LI 
18973ed8e77SXin LI 		coder->lzma = LZMA_NEXT_CODER_INIT;
19073ed8e77SXin LI 	}
19173ed8e77SXin LI 
19273ed8e77SXin LI 	// The public API is uint64_t but the internal LZ decoder API uses
19373ed8e77SXin LI 	// lzma_vli.
19473ed8e77SXin LI 	if (uncomp_size > LZMA_VLI_MAX)
19573ed8e77SXin LI 		return LZMA_OPTIONS_ERROR;
19673ed8e77SXin LI 
19773ed8e77SXin LI 	coder->comp_size = comp_size;
19873ed8e77SXin LI 	coder->uncomp_size = uncomp_size;
19973ed8e77SXin LI 	coder->uncomp_size_is_exact = uncomp_size_is_exact;
20073ed8e77SXin LI 	coder->dict_size = dict_size;
20173ed8e77SXin LI 
20273ed8e77SXin LI 	coder->props_decoded = false;
20373ed8e77SXin LI 
20473ed8e77SXin LI 	return LZMA_OK;
20573ed8e77SXin LI }
20673ed8e77SXin LI 
20773ed8e77SXin LI 
20873ed8e77SXin LI extern LZMA_API(lzma_ret)
lzma_microlzma_decoder(lzma_stream * strm,uint64_t comp_size,uint64_t uncomp_size,lzma_bool uncomp_size_is_exact,uint32_t dict_size)20973ed8e77SXin LI lzma_microlzma_decoder(lzma_stream *strm, uint64_t comp_size,
21073ed8e77SXin LI 		uint64_t uncomp_size, lzma_bool uncomp_size_is_exact,
21173ed8e77SXin LI 		uint32_t dict_size)
21273ed8e77SXin LI {
21373ed8e77SXin LI 	lzma_next_strm_init(microlzma_decoder_init, strm, comp_size,
21473ed8e77SXin LI 			uncomp_size, uncomp_size_is_exact, dict_size);
21573ed8e77SXin LI 
21673ed8e77SXin LI 	strm->internal->supported_actions[LZMA_RUN] = true;
21773ed8e77SXin LI 	strm->internal->supported_actions[LZMA_FINISH] = true;
21873ed8e77SXin LI 
21973ed8e77SXin LI 	return LZMA_OK;
22073ed8e77SXin LI }
221