1 // SPDX-License-Identifier: 0BSD
2
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file lzma2_decoder.c
6 /// \brief LZMA2 decoder
7 ///
8 // Authors: Igor Pavlov
9 // Lasse Collin
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12
13 #include "lzma2_decoder.h"
14 #include "lz_decoder.h"
15 #include "lzma_decoder.h"
16
17
18 typedef struct {
19 enum sequence {
20 SEQ_CONTROL,
21 SEQ_UNCOMPRESSED_1,
22 SEQ_UNCOMPRESSED_2,
23 SEQ_COMPRESSED_0,
24 SEQ_COMPRESSED_1,
25 SEQ_PROPERTIES,
26 SEQ_LZMA,
27 SEQ_COPY,
28 } sequence;
29
30 /// Sequence after the size fields have been decoded.
31 enum sequence next_sequence;
32
33 /// LZMA decoder
34 lzma_lz_decoder lzma;
35
36 /// Uncompressed size of LZMA chunk
37 size_t uncompressed_size;
38
39 /// Compressed size of the chunk (naturally equals to uncompressed
40 /// size of uncompressed chunk)
41 size_t compressed_size;
42
43 /// True if properties are needed. This is false before the
44 /// first LZMA chunk.
45 bool need_properties;
46
47 /// True if dictionary reset is needed. This is false before the
48 /// first chunk (LZMA or uncompressed).
49 bool need_dictionary_reset;
50
51 lzma_options_lzma options;
52 } lzma_lzma2_coder;
53
54
55 static lzma_ret
lzma2_decode(void * coder_ptr,lzma_dict * restrict dict,const uint8_t * restrict in,size_t * restrict in_pos,size_t in_size)56 lzma2_decode(void *coder_ptr, lzma_dict *restrict dict,
57 const uint8_t *restrict in, size_t *restrict in_pos,
58 size_t in_size)
59 {
60 lzma_lzma2_coder *restrict coder = coder_ptr;
61
62 // With SEQ_LZMA it is possible that no new input is needed to do
63 // some progress. The rest of the sequences assume that there is
64 // at least one byte of input.
65 while (*in_pos < in_size || coder->sequence == SEQ_LZMA)
66 switch (coder->sequence) {
67 case SEQ_CONTROL: {
68 const uint32_t control = in[*in_pos];
69 ++*in_pos;
70
71 // End marker
72 if (control == 0x00)
73 return LZMA_STREAM_END;
74
75 if (control >= 0xE0 || control == 1) {
76 // Dictionary reset implies that next LZMA chunk has
77 // to set new properties.
78 coder->need_properties = true;
79 coder->need_dictionary_reset = true;
80 } else if (coder->need_dictionary_reset) {
81 return LZMA_DATA_ERROR;
82 }
83
84 if (control >= 0x80) {
85 // LZMA chunk. The highest five bits of the
86 // uncompressed size are taken from the control byte.
87 coder->uncompressed_size = (control & 0x1F) << 16;
88 coder->sequence = SEQ_UNCOMPRESSED_1;
89
90 // See if there are new properties or if we need to
91 // reset the state.
92 if (control >= 0xC0) {
93 // When there are new properties, state reset
94 // is done at SEQ_PROPERTIES.
95 coder->need_properties = false;
96 coder->next_sequence = SEQ_PROPERTIES;
97
98 } else if (coder->need_properties) {
99 return LZMA_DATA_ERROR;
100
101 } else {
102 coder->next_sequence = SEQ_LZMA;
103
104 // If only state reset is wanted with old
105 // properties, do the resetting here for
106 // simplicity.
107 if (control >= 0xA0)
108 coder->lzma.reset(coder->lzma.coder,
109 &coder->options);
110 }
111 } else {
112 // Invalid control values
113 if (control > 2)
114 return LZMA_DATA_ERROR;
115
116 // It's uncompressed chunk
117 coder->sequence = SEQ_COMPRESSED_0;
118 coder->next_sequence = SEQ_COPY;
119 }
120
121 if (coder->need_dictionary_reset) {
122 // Finish the dictionary reset and let the caller
123 // flush the dictionary to the actual output buffer.
124 coder->need_dictionary_reset = false;
125 dict_reset(dict);
126 return LZMA_OK;
127 }
128
129 break;
130 }
131
132 case SEQ_UNCOMPRESSED_1:
133 coder->uncompressed_size += (uint32_t)(in[(*in_pos)++]) << 8;
134 coder->sequence = SEQ_UNCOMPRESSED_2;
135 break;
136
137 case SEQ_UNCOMPRESSED_2:
138 coder->uncompressed_size += in[(*in_pos)++] + 1U;
139 coder->sequence = SEQ_COMPRESSED_0;
140 coder->lzma.set_uncompressed(coder->lzma.coder,
141 coder->uncompressed_size, false);
142 break;
143
144 case SEQ_COMPRESSED_0:
145 coder->compressed_size = (uint32_t)(in[(*in_pos)++]) << 8;
146 coder->sequence = SEQ_COMPRESSED_1;
147 break;
148
149 case SEQ_COMPRESSED_1:
150 coder->compressed_size += in[(*in_pos)++] + 1U;
151 coder->sequence = coder->next_sequence;
152 break;
153
154 case SEQ_PROPERTIES:
155 if (lzma_lzma_lclppb_decode(&coder->options, in[(*in_pos)++]))
156 return LZMA_DATA_ERROR;
157
158 coder->lzma.reset(coder->lzma.coder, &coder->options);
159
160 coder->sequence = SEQ_LZMA;
161 break;
162
163 case SEQ_LZMA: {
164 // Store the start offset so that we can update
165 // coder->compressed_size later.
166 const size_t in_start = *in_pos;
167
168 // Decode from in[] to *dict.
169 const lzma_ret ret = coder->lzma.code(coder->lzma.coder,
170 dict, in, in_pos, in_size);
171
172 // Validate and update coder->compressed_size.
173 const size_t in_used = *in_pos - in_start;
174 if (in_used > coder->compressed_size)
175 return LZMA_DATA_ERROR;
176
177 coder->compressed_size -= in_used;
178
179 // Return if we didn't finish the chunk, or an error occurred.
180 if (ret != LZMA_STREAM_END)
181 return ret;
182
183 // The LZMA decoder must have consumed the whole chunk now.
184 // We don't need to worry about uncompressed size since it
185 // is checked by the LZMA decoder.
186 if (coder->compressed_size != 0)
187 return LZMA_DATA_ERROR;
188
189 coder->sequence = SEQ_CONTROL;
190 break;
191 }
192
193 case SEQ_COPY: {
194 // Copy from input to the dictionary as is.
195 dict_write(dict, in, in_pos, in_size, &coder->compressed_size);
196 if (coder->compressed_size != 0)
197 return LZMA_OK;
198
199 coder->sequence = SEQ_CONTROL;
200 break;
201 }
202
203 default:
204 assert(0);
205 return LZMA_PROG_ERROR;
206 }
207
208 return LZMA_OK;
209 }
210
211
212 static void
lzma2_decoder_end(void * coder_ptr,const lzma_allocator * allocator)213 lzma2_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
214 {
215 lzma_lzma2_coder *coder = coder_ptr;
216
217 assert(coder->lzma.end == NULL);
218 lzma_free(coder->lzma.coder, allocator);
219
220 lzma_free(coder, allocator);
221
222 return;
223 }
224
225
226 static lzma_ret
lzma2_decoder_init(lzma_lz_decoder * lz,const lzma_allocator * allocator,lzma_vli id lzma_attribute ((__unused__)),const void * opt,lzma_lz_options * lz_options)227 lzma2_decoder_init(lzma_lz_decoder *lz, const lzma_allocator *allocator,
228 lzma_vli id lzma_attribute((__unused__)), const void *opt,
229 lzma_lz_options *lz_options)
230 {
231 lzma_lzma2_coder *coder = lz->coder;
232 if (coder == NULL) {
233 coder = lzma_alloc(sizeof(lzma_lzma2_coder), allocator);
234 if (coder == NULL)
235 return LZMA_MEM_ERROR;
236
237 lz->coder = coder;
238 lz->code = &lzma2_decode;
239 lz->end = &lzma2_decoder_end;
240
241 coder->lzma = LZMA_LZ_DECODER_INIT;
242 }
243
244 const lzma_options_lzma *options = opt;
245
246 coder->sequence = SEQ_CONTROL;
247 coder->need_properties = true;
248 coder->need_dictionary_reset = options->preset_dict == NULL
249 || options->preset_dict_size == 0;
250
251 return lzma_lzma_decoder_create(&coder->lzma,
252 allocator, options, lz_options);
253 }
254
255
256 extern lzma_ret
lzma_lzma2_decoder_init(lzma_next_coder * next,const lzma_allocator * allocator,const lzma_filter_info * filters)257 lzma_lzma2_decoder_init(lzma_next_coder *next, const lzma_allocator *allocator,
258 const lzma_filter_info *filters)
259 {
260 // LZMA2 can only be the last filter in the chain. This is enforced
261 // by the raw_decoder initialization.
262 assert(filters[1].init == NULL);
263
264 return lzma_lz_decoder_init(next, allocator, filters,
265 &lzma2_decoder_init);
266 }
267
268
269 extern uint64_t
lzma_lzma2_decoder_memusage(const void * options)270 lzma_lzma2_decoder_memusage(const void *options)
271 {
272 return sizeof(lzma_lzma2_coder)
273 + lzma_lzma_decoder_memusage_nocheck(options);
274 }
275
276
277 extern lzma_ret
lzma_lzma2_props_decode(void ** options,const lzma_allocator * allocator,const uint8_t * props,size_t props_size)278 lzma_lzma2_props_decode(void **options, const lzma_allocator *allocator,
279 const uint8_t *props, size_t props_size)
280 {
281 if (props_size != 1)
282 return LZMA_OPTIONS_ERROR;
283
284 // Check that reserved bits are unset.
285 if (props[0] & 0xC0)
286 return LZMA_OPTIONS_ERROR;
287
288 // Decode the dictionary size.
289 if (props[0] > 40)
290 return LZMA_OPTIONS_ERROR;
291
292 lzma_options_lzma *opt = lzma_alloc(
293 sizeof(lzma_options_lzma), allocator);
294 if (opt == NULL)
295 return LZMA_MEM_ERROR;
296
297 if (props[0] == 40) {
298 opt->dict_size = UINT32_MAX;
299 } else {
300 opt->dict_size = 2 | (props[0] & 1U);
301 opt->dict_size <<= props[0] / 2U + 11;
302 }
303
304 opt->preset_dict = NULL;
305 opt->preset_dict_size = 0;
306
307 *options = opt;
308
309 return LZMA_OK;
310 }
311