1*3b35e7eeSXin LI // SPDX-License-Identifier: 0BSD
2*3b35e7eeSXin LI
373ed8e77SXin LI ///////////////////////////////////////////////////////////////////////////////
473ed8e77SXin LI //
573ed8e77SXin LI /// \file microlzma_decoder.c
673ed8e77SXin LI /// \brief Decode MicroLZMA format
773ed8e77SXin LI //
873ed8e77SXin LI // Author: Lasse Collin
973ed8e77SXin LI //
1073ed8e77SXin LI ///////////////////////////////////////////////////////////////////////////////
1173ed8e77SXin LI
1273ed8e77SXin LI #include "lzma_decoder.h"
1373ed8e77SXin LI #include "lz_decoder.h"
1473ed8e77SXin LI
1573ed8e77SXin LI
1673ed8e77SXin LI typedef struct {
1773ed8e77SXin LI /// LZMA1 decoder
1873ed8e77SXin LI lzma_next_coder lzma;
1973ed8e77SXin LI
2073ed8e77SXin LI /// Compressed size of the stream as given by the application.
2173ed8e77SXin LI /// This must be exactly correct.
2273ed8e77SXin LI ///
2373ed8e77SXin LI /// This will be decremented when input is read.
2473ed8e77SXin LI uint64_t comp_size;
2573ed8e77SXin LI
2673ed8e77SXin LI /// Uncompressed size of the stream as given by the application.
2773ed8e77SXin LI /// This may be less than the actual uncompressed size if
2873ed8e77SXin LI /// uncomp_size_is_exact is false.
2973ed8e77SXin LI ///
3073ed8e77SXin LI /// This will be decremented when output is produced.
3173ed8e77SXin LI lzma_vli uncomp_size;
3273ed8e77SXin LI
3373ed8e77SXin LI /// LZMA dictionary size as given by the application
3473ed8e77SXin LI uint32_t dict_size;
3573ed8e77SXin LI
3673ed8e77SXin LI /// If true, the exact uncompressed size is known. If false,
3773ed8e77SXin LI /// uncomp_size may be smaller than the real uncompressed size;
3873ed8e77SXin LI /// uncomp_size may never be bigger than the real uncompressed size.
3973ed8e77SXin LI bool uncomp_size_is_exact;
4073ed8e77SXin LI
4173ed8e77SXin LI /// True once the first byte of the MicroLZMA stream
4273ed8e77SXin LI /// has been processed.
4373ed8e77SXin LI bool props_decoded;
4473ed8e77SXin LI } lzma_microlzma_coder;
4573ed8e77SXin LI
4673ed8e77SXin LI
4773ed8e77SXin LI static lzma_ret
microlzma_decode(void * coder_ptr,const lzma_allocator * allocator,const uint8_t * restrict in,size_t * restrict in_pos,size_t in_size,uint8_t * restrict out,size_t * restrict out_pos,size_t out_size,lzma_action action)4873ed8e77SXin LI microlzma_decode(void *coder_ptr, const lzma_allocator *allocator,
4973ed8e77SXin LI const uint8_t *restrict in, size_t *restrict in_pos,
5073ed8e77SXin LI size_t in_size, uint8_t *restrict out,
5173ed8e77SXin LI size_t *restrict out_pos, size_t out_size, lzma_action action)
5273ed8e77SXin LI {
5373ed8e77SXin LI lzma_microlzma_coder *coder = coder_ptr;
5473ed8e77SXin LI
5573ed8e77SXin LI // Remember the in start position so that we can update comp_size.
5673ed8e77SXin LI const size_t in_start = *in_pos;
5773ed8e77SXin LI
5873ed8e77SXin LI // Remember the out start position so that we can update uncomp_size.
5973ed8e77SXin LI const size_t out_start = *out_pos;
6073ed8e77SXin LI
6173ed8e77SXin LI // Limit the amount of input so that the decoder won't read more than
6273ed8e77SXin LI // comp_size. This is required when uncomp_size isn't exact because
6373ed8e77SXin LI // in that case the LZMA decoder will try to decode more input even
6473ed8e77SXin LI // when it has no output space (it can be looking for EOPM).
6573ed8e77SXin LI if (in_size - *in_pos > coder->comp_size)
6673ed8e77SXin LI in_size = *in_pos + (size_t)(coder->comp_size);
6773ed8e77SXin LI
6873ed8e77SXin LI // When the exact uncompressed size isn't known, we must limit
6973ed8e77SXin LI // the available output space to prevent the LZMA decoder from
7073ed8e77SXin LI // trying to decode too much.
7173ed8e77SXin LI if (!coder->uncomp_size_is_exact
7273ed8e77SXin LI && out_size - *out_pos > coder->uncomp_size)
7373ed8e77SXin LI out_size = *out_pos + (size_t)(coder->uncomp_size);
7473ed8e77SXin LI
7573ed8e77SXin LI if (!coder->props_decoded) {
7673ed8e77SXin LI // There must be at least one byte of input to decode
7773ed8e77SXin LI // the properties byte.
7873ed8e77SXin LI if (*in_pos >= in_size)
7973ed8e77SXin LI return LZMA_OK;
8073ed8e77SXin LI
8173ed8e77SXin LI lzma_options_lzma options = {
8273ed8e77SXin LI .dict_size = coder->dict_size,
8373ed8e77SXin LI .preset_dict = NULL,
8473ed8e77SXin LI .preset_dict_size = 0,
8573ed8e77SXin LI .ext_flags = 0, // EOPM not allowed when size is known
8673ed8e77SXin LI .ext_size_low = UINT32_MAX, // Unknown size by default
8773ed8e77SXin LI .ext_size_high = UINT32_MAX,
8873ed8e77SXin LI };
8973ed8e77SXin LI
9073ed8e77SXin LI if (coder->uncomp_size_is_exact)
9173ed8e77SXin LI lzma_set_ext_size(options, coder->uncomp_size);
9273ed8e77SXin LI
9373ed8e77SXin LI // The properties are stored as bitwise-negation
9473ed8e77SXin LI // of the typical encoding.
9573ed8e77SXin LI if (lzma_lzma_lclppb_decode(&options, ~in[*in_pos]))
9673ed8e77SXin LI return LZMA_OPTIONS_ERROR;
9773ed8e77SXin LI
9873ed8e77SXin LI ++*in_pos;
9973ed8e77SXin LI
10073ed8e77SXin LI // Initialize the decoder.
10173ed8e77SXin LI lzma_filter_info filters[2] = {
10273ed8e77SXin LI {
10373ed8e77SXin LI .id = LZMA_FILTER_LZMA1EXT,
10473ed8e77SXin LI .init = &lzma_lzma_decoder_init,
10573ed8e77SXin LI .options = &options,
10673ed8e77SXin LI }, {
10773ed8e77SXin LI .init = NULL,
10873ed8e77SXin LI }
10973ed8e77SXin LI };
11073ed8e77SXin LI
11173ed8e77SXin LI return_if_error(lzma_next_filter_init(&coder->lzma,
11273ed8e77SXin LI allocator, filters));
11373ed8e77SXin LI
11473ed8e77SXin LI // Pass one dummy 0x00 byte to the LZMA decoder since that
11573ed8e77SXin LI // is what it expects the first byte to be.
11673ed8e77SXin LI const uint8_t dummy_in = 0;
11773ed8e77SXin LI size_t dummy_in_pos = 0;
11873ed8e77SXin LI if (coder->lzma.code(coder->lzma.coder, allocator,
11973ed8e77SXin LI &dummy_in, &dummy_in_pos, 1,
12073ed8e77SXin LI out, out_pos, out_size, LZMA_RUN) != LZMA_OK)
12173ed8e77SXin LI return LZMA_PROG_ERROR;
12273ed8e77SXin LI
12373ed8e77SXin LI assert(dummy_in_pos == 1);
12473ed8e77SXin LI coder->props_decoded = true;
12573ed8e77SXin LI }
12673ed8e77SXin LI
12773ed8e77SXin LI // The rest is normal LZMA decoding.
12873ed8e77SXin LI lzma_ret ret = coder->lzma.code(coder->lzma.coder, allocator,
12973ed8e77SXin LI in, in_pos, in_size,
13073ed8e77SXin LI out, out_pos, out_size, action);
13173ed8e77SXin LI
13273ed8e77SXin LI // Update the remaining compressed size.
13373ed8e77SXin LI assert(coder->comp_size >= *in_pos - in_start);
13473ed8e77SXin LI coder->comp_size -= *in_pos - in_start;
13573ed8e77SXin LI
13673ed8e77SXin LI if (coder->uncomp_size_is_exact) {
13773ed8e77SXin LI // After successful decompression of the complete stream
13873ed8e77SXin LI // the compressed size must match.
13973ed8e77SXin LI if (ret == LZMA_STREAM_END && coder->comp_size != 0)
14073ed8e77SXin LI ret = LZMA_DATA_ERROR;
14173ed8e77SXin LI } else {
14273ed8e77SXin LI // Update the amount of output remaining.
14373ed8e77SXin LI assert(coder->uncomp_size >= *out_pos - out_start);
14473ed8e77SXin LI coder->uncomp_size -= *out_pos - out_start;
14573ed8e77SXin LI
14673ed8e77SXin LI // - We must not get LZMA_STREAM_END because the stream
14773ed8e77SXin LI // shouldn't have EOPM.
14873ed8e77SXin LI // - We must use uncomp_size to determine when to
14973ed8e77SXin LI // return LZMA_STREAM_END.
15073ed8e77SXin LI if (ret == LZMA_STREAM_END)
15173ed8e77SXin LI ret = LZMA_DATA_ERROR;
15273ed8e77SXin LI else if (coder->uncomp_size == 0)
15373ed8e77SXin LI ret = LZMA_STREAM_END;
15473ed8e77SXin LI }
15573ed8e77SXin LI
15673ed8e77SXin LI return ret;
15773ed8e77SXin LI }
15873ed8e77SXin LI
15973ed8e77SXin LI
16073ed8e77SXin LI static void
microlzma_decoder_end(void * coder_ptr,const lzma_allocator * allocator)16173ed8e77SXin LI microlzma_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
16273ed8e77SXin LI {
16373ed8e77SXin LI lzma_microlzma_coder *coder = coder_ptr;
16473ed8e77SXin LI lzma_next_end(&coder->lzma, allocator);
16573ed8e77SXin LI lzma_free(coder, allocator);
16673ed8e77SXin LI return;
16773ed8e77SXin LI }
16873ed8e77SXin LI
16973ed8e77SXin LI
17073ed8e77SXin LI static lzma_ret
microlzma_decoder_init(lzma_next_coder * next,const lzma_allocator * allocator,uint64_t comp_size,uint64_t uncomp_size,bool uncomp_size_is_exact,uint32_t dict_size)17173ed8e77SXin LI microlzma_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
17273ed8e77SXin LI uint64_t comp_size,
17373ed8e77SXin LI uint64_t uncomp_size, bool uncomp_size_is_exact,
17473ed8e77SXin LI uint32_t dict_size)
17573ed8e77SXin LI {
17673ed8e77SXin LI lzma_next_coder_init(µlzma_decoder_init, next, allocator);
17773ed8e77SXin LI
17873ed8e77SXin LI lzma_microlzma_coder *coder = next->coder;
17973ed8e77SXin LI
18073ed8e77SXin LI if (coder == NULL) {
18173ed8e77SXin LI coder = lzma_alloc(sizeof(lzma_microlzma_coder), allocator);
18273ed8e77SXin LI if (coder == NULL)
18373ed8e77SXin LI return LZMA_MEM_ERROR;
18473ed8e77SXin LI
18573ed8e77SXin LI next->coder = coder;
18673ed8e77SXin LI next->code = µlzma_decode;
18773ed8e77SXin LI next->end = µlzma_decoder_end;
18873ed8e77SXin LI
18973ed8e77SXin LI coder->lzma = LZMA_NEXT_CODER_INIT;
19073ed8e77SXin LI }
19173ed8e77SXin LI
19273ed8e77SXin LI // The public API is uint64_t but the internal LZ decoder API uses
19373ed8e77SXin LI // lzma_vli.
19473ed8e77SXin LI if (uncomp_size > LZMA_VLI_MAX)
19573ed8e77SXin LI return LZMA_OPTIONS_ERROR;
19673ed8e77SXin LI
19773ed8e77SXin LI coder->comp_size = comp_size;
19873ed8e77SXin LI coder->uncomp_size = uncomp_size;
19973ed8e77SXin LI coder->uncomp_size_is_exact = uncomp_size_is_exact;
20073ed8e77SXin LI coder->dict_size = dict_size;
20173ed8e77SXin LI
20273ed8e77SXin LI coder->props_decoded = false;
20373ed8e77SXin LI
20473ed8e77SXin LI return LZMA_OK;
20573ed8e77SXin LI }
20673ed8e77SXin LI
20773ed8e77SXin LI
20873ed8e77SXin LI extern LZMA_API(lzma_ret)
lzma_microlzma_decoder(lzma_stream * strm,uint64_t comp_size,uint64_t uncomp_size,lzma_bool uncomp_size_is_exact,uint32_t dict_size)20973ed8e77SXin LI lzma_microlzma_decoder(lzma_stream *strm, uint64_t comp_size,
21073ed8e77SXin LI uint64_t uncomp_size, lzma_bool uncomp_size_is_exact,
21173ed8e77SXin LI uint32_t dict_size)
21273ed8e77SXin LI {
21373ed8e77SXin LI lzma_next_strm_init(microlzma_decoder_init, strm, comp_size,
21473ed8e77SXin LI uncomp_size, uncomp_size_is_exact, dict_size);
21573ed8e77SXin LI
21673ed8e77SXin LI strm->internal->supported_actions[LZMA_RUN] = true;
21773ed8e77SXin LI strm->internal->supported_actions[LZMA_FINISH] = true;
21873ed8e77SXin LI
21973ed8e77SXin LI return LZMA_OK;
22073ed8e77SXin LI }
221