xref: /freebsd-src/contrib/xz/src/xz/coder.c (revision 3b35e7ee8de9b0260149a2b77e87a2b9c7a36244)
1 // SPDX-License-Identifier: 0BSD
2 
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       coder.c
6 /// \brief      Compresses or uncompresses a file
7 //
8 //  Authors:    Lasse Collin
9 //              Jia Tan
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #include "private.h"
14 #include "tuklib_integer.h"
15 
16 
17 /// Return value type for coder_init().
18 enum coder_init_ret {
19 	CODER_INIT_NORMAL,
20 	CODER_INIT_PASSTHRU,
21 	CODER_INIT_ERROR,
22 };
23 
24 
25 enum operation_mode opt_mode = MODE_COMPRESS;
26 enum format_type opt_format = FORMAT_AUTO;
27 bool opt_auto_adjust = true;
28 bool opt_single_stream = false;
29 uint64_t opt_block_size = 0;
30 block_list_entry *opt_block_list = NULL;
31 uint64_t block_list_largest;
32 uint32_t block_list_chain_mask;
33 
34 /// Stream used to communicate with liblzma
35 static lzma_stream strm = LZMA_STREAM_INIT;
36 
37 /// Maximum number of filter chains. The first filter chain is the default,
38 /// and 9 other filter chains can be specified with --filtersX.
39 #define NUM_FILTER_CHAIN_MAX 10
40 
41 /// The default filter chain is in chains[0]. It is used for encoding
42 /// in all supported formats and also for decdoing raw streams. The other
43 /// filter chains are set by --filtersX to support changing filters with
44 /// the --block-list option.
45 static lzma_filter chains[NUM_FILTER_CHAIN_MAX][LZMA_FILTERS_MAX + 1];
46 
47 /// Bitmask indicating which filter chains are actually used when encoding
48 /// in the .xz format. This is needed since the filter chains specified using
49 /// --filtersX (or the default filter chain) might in reality be unneeded
50 /// if they are never used in --block-list. When --block-list isn't
51 /// specified, only the default filter chain is used, thus the initial
52 /// value of this variable is 1U << 0 (the number of the default chain is 0).
53 static uint32_t chains_used_mask = 1U << 0;
54 
55 /// Input and output buffers
56 static io_buf in_buf;
57 static io_buf out_buf;
58 
59 /// Number of filters in the default filter chain. Zero indicates that
60 /// we are using a preset.
61 static uint32_t filters_count = 0;
62 
63 /// Number of the preset (0-9)
64 static uint32_t preset_number = LZMA_PRESET_DEFAULT;
65 
66 /// True if the current default filter chain was set using the --filters
67 /// option. The filter chain is reset if a preset option (like -9) or an
68 /// old-style filter option (like --lzma2) is used after a --filters option.
69 static bool string_to_filter_used = false;
70 
71 /// Integrity check type
72 static lzma_check check;
73 
74 /// This becomes false if the --check=CHECK option is used.
75 static bool check_default = true;
76 
77 /// Indicates if unconsumed input is allowed to remain after
78 /// decoding has successfully finished. This is set for each file
79 /// in coder_init().
80 static bool allow_trailing_input;
81 
82 #ifdef MYTHREAD_ENABLED
83 static lzma_mt mt_options = {
84 	.flags = 0,
85 	.timeout = 300,
86 };
87 #endif
88 
89 
90 extern void
coder_set_check(lzma_check new_check)91 coder_set_check(lzma_check new_check)
92 {
93 	check = new_check;
94 	check_default = false;
95 	return;
96 }
97 
98 
99 static void
forget_filter_chain(void)100 forget_filter_chain(void)
101 {
102 	// Setting a preset or using --filters makes us forget
103 	// the earlier custom filter chain (if any).
104 	if (filters_count > 0) {
105 		lzma_filters_free(chains[0], NULL);
106 		filters_count = 0;
107 	}
108 
109 	string_to_filter_used = false;
110 	return;
111 }
112 
113 
114 extern void
coder_set_preset(uint32_t new_preset)115 coder_set_preset(uint32_t new_preset)
116 {
117 	preset_number &= ~LZMA_PRESET_LEVEL_MASK;
118 	preset_number |= new_preset;
119 	forget_filter_chain();
120 	return;
121 }
122 
123 
124 extern void
coder_set_extreme(void)125 coder_set_extreme(void)
126 {
127 	preset_number |= LZMA_PRESET_EXTREME;
128 	forget_filter_chain();
129 	return;
130 }
131 
132 
133 extern void
coder_add_filter(lzma_vli id,void * options)134 coder_add_filter(lzma_vli id, void *options)
135 {
136 	if (filters_count == LZMA_FILTERS_MAX)
137 		message_fatal(_("Maximum number of filters is four"));
138 
139 	if (string_to_filter_used)
140 		forget_filter_chain();
141 
142 	chains[0][filters_count].id = id;
143 	chains[0][filters_count].options = options;
144 
145 	// Terminate the filter chain with LZMA_VLI_UNKNOWN to simplify
146 	// implementation of forget_filter_chain().
147 	chains[0][++filters_count].id = LZMA_VLI_UNKNOWN;
148 
149 	// Setting a custom filter chain makes us forget the preset options.
150 	// This makes a difference if one specifies e.g. "xz -9 --lzma2 -e"
151 	// where the custom filter chain resets the preset level back to
152 	// the default 6, making the example equivalent to "xz -6e".
153 	preset_number = LZMA_PRESET_DEFAULT;
154 
155 	return;
156 }
157 
158 
159 static void
str_to_filters(const char * str,uint32_t index,uint32_t flags)160 str_to_filters(const char *str, uint32_t index, uint32_t flags)
161 {
162 	int error_pos;
163 	const char *err = lzma_str_to_filters(str, &error_pos,
164 			chains[index], flags, NULL);
165 
166 	if (err != NULL) {
167 		char filter_num[2] = "";
168 		if (index > 0)
169 			filter_num[0] = '0' + index;
170 
171 		// FIXME? The message in err isn't translated.
172 		// Including the translations in the xz translations is
173 		// slightly ugly but possible. Creating a new domain for
174 		// liblzma might not be worth it especially since on some
175 		// OSes it adds extra dependencies to translation libraries.
176 		message(V_ERROR, _("Error in --filters%s=FILTERS option:"),
177 				filter_num);
178 		message(V_ERROR, "%s", str);
179 		message(V_ERROR, "%*s^", error_pos, "");
180 		message_fatal("%s", err);
181 	}
182 }
183 
184 
185 extern void
coder_add_filters_from_str(const char * filter_str)186 coder_add_filters_from_str(const char *filter_str)
187 {
188 	// Forget presets and previously defined filter chain. See
189 	// coder_add_filter() above for why preset_number must be reset too.
190 	forget_filter_chain();
191 	preset_number = LZMA_PRESET_DEFAULT;
192 
193 	string_to_filter_used = true;
194 
195 	// Include LZMA_STR_ALL_FILTERS so this can be used with --format=raw.
196 	str_to_filters(filter_str, 0, LZMA_STR_ALL_FILTERS);
197 
198 	// Set the filters_count to be the number of filters converted from
199 	// the string.
200 	for (filters_count = 0; chains[0][filters_count].id
201 			!= LZMA_VLI_UNKNOWN;
202 			++filters_count) ;
203 
204 	assert(filters_count > 0);
205 	return;
206 }
207 
208 
209 extern void
coder_add_block_filters(const char * str,size_t slot)210 coder_add_block_filters(const char *str, size_t slot)
211 {
212 	// Free old filters first, if they were previously allocated.
213 	if (chains_used_mask & (1U << slot))
214 		lzma_filters_free(chains[slot], NULL);
215 
216 	str_to_filters(str, slot, 0);
217 
218 	chains_used_mask |= 1U << slot;
219 }
220 
221 
222 tuklib_attr_noreturn
223 static void
memlimit_too_small(uint64_t memory_usage)224 memlimit_too_small(uint64_t memory_usage)
225 {
226 	message(V_ERROR, _("Memory usage limit is too low for the given "
227 			"filter setup."));
228 	message_mem_needed(V_ERROR, memory_usage);
229 	tuklib_exit(E_ERROR, E_ERROR, false);
230 }
231 
232 
233 #ifdef HAVE_ENCODERS
234 /// \brief      Calculate the memory usage of each filter chain.
235 ///
236 /// \param      chains_memusages    If non-NULL, the memusage of the encoder
237 ///                                 or decoder for each chain is stored in
238 ///                                 this array.
239 /// \param      mt                  If non-NULL, calculate memory usage of
240 ///                                 multithreaded encoder.
241 /// \param      encode              Whether to calculate encoder or decoder
242 ///                                 memory usage. This must be true if
243 ///                                 mt != NULL.
244 ///
245 /// \return     Return the highest memory usage of all of the filter chains.
246 static uint64_t
get_chains_memusage(uint64_t * chains_memusages,const lzma_mt * mt,bool encode)247 get_chains_memusage(uint64_t *chains_memusages, const lzma_mt *mt, bool encode)
248 {
249 	uint64_t max_memusage = 0;
250 
251 #ifdef MYTHREAD_ENABLED
252 	// Copy multithreading options to a temporary struct since the
253 	// "filters" member needs to be changed.
254 	lzma_mt mt_local;
255 	if (mt != NULL)
256 		mt_local = *mt;
257 #else
258 	(void)mt;
259 #endif
260 
261 	for (uint32_t i = 0; i < ARRAY_SIZE(chains); i++) {
262 		if (!(chains_used_mask & (1U << i)))
263 			continue;
264 
265 		uint64_t memusage = UINT64_MAX;
266 #ifdef MYTHREAD_ENABLED
267 		if (mt != NULL) {
268 			assert(encode);
269 			mt_local.filters = chains[i];
270 			memusage = lzma_stream_encoder_mt_memusage(&mt_local);
271 		} else
272 #endif
273 		if (encode) {
274 			memusage = lzma_raw_encoder_memusage(chains[i]);
275 		}
276 #ifdef HAVE_DECODERS
277 		else {
278 			memusage = lzma_raw_decoder_memusage(chains[i]);
279 		}
280 #endif
281 
282 		if (chains_memusages != NULL)
283 			chains_memusages[i] = memusage;
284 
285 		if (memusage > max_memusage)
286 			max_memusage = memusage;
287 	}
288 
289 	return max_memusage;
290 }
291 #endif
292 
293 
294 extern void
coder_set_compression_settings(void)295 coder_set_compression_settings(void)
296 {
297 #ifdef HAVE_LZIP_DECODER
298 	// .lz compression isn't supported.
299 	assert(opt_format != FORMAT_LZIP);
300 #endif
301 
302 	// The default check type is CRC64, but fallback to CRC32
303 	// if CRC64 isn't supported by the copy of liblzma we are
304 	// using. CRC32 is always supported.
305 	if (check_default) {
306 		check = LZMA_CHECK_CRC64;
307 		if (!lzma_check_is_supported(check))
308 			check = LZMA_CHECK_CRC32;
309 	}
310 
311 #ifdef HAVE_ENCODERS
312 	if (opt_block_list != NULL) {
313 		// args.c ensures these.
314 		assert(opt_mode == MODE_COMPRESS);
315 		assert(opt_format == FORMAT_XZ);
316 
317 		// Find out if block_list_chain_mask has a bit set that
318 		// isn't set in chains_used_mask.
319 		const uint32_t missing_chains_mask
320 				= (block_list_chain_mask ^ chains_used_mask)
321 				& block_list_chain_mask;
322 
323 		// If a filter chain was specified in --block-list but no
324 		// matching --filtersX option was used, exit with an error.
325 		if (missing_chains_mask != 0) {
326 			// Get the number of the first missing filter chain
327 			// and show it in the error message.
328 			const unsigned first_missing
329 				= (unsigned)ctz32(missing_chains_mask);
330 
331 			message_fatal(_("filter chain %u used by "
332 				"--block-list but not specified "
333 				"with --filters%u="),
334 				first_missing, first_missing);
335 		}
336 
337 		// Omit the unused filter chains from mask of used chains.
338 		//
339 		// (FIXME? When built with debugging, coder_free() will free()
340 		// the filter chains (except the default chain) which makes
341 		// Valgrind show fewer reachable allocations. But coder_free()
342 		// uses this mask to determine which chains to free. Thus it
343 		// won't free the ones that are cleared here from the mask.
344 		// In practice this doesn't matter.)
345 		chains_used_mask &= block_list_chain_mask;
346 	} else {
347 		// Reset filters used mask in case --block-list is not
348 		// used, but --filtersX is used.
349 		chains_used_mask = 1U << 0;
350 	}
351 #endif
352 
353 	// Options for LZMA1 or LZMA2 in case we are using a preset.
354 	static lzma_options_lzma opt_lzma;
355 
356 	// The first filter in the chains[] array is for the default
357 	// filter chain.
358 	lzma_filter *default_filters = chains[0];
359 
360 	if (filters_count == 0 && chains_used_mask & 1) {
361 		// We are using a preset. This is not a good idea in raw mode
362 		// except when playing around with things. Different versions
363 		// of this software may use different options in presets, and
364 		// thus make uncompressing the raw data difficult.
365 		if (opt_format == FORMAT_RAW) {
366 			// The message is shown only if warnings are allowed
367 			// but the exit status isn't changed.
368 			message(V_WARNING, _("Using a preset in raw mode "
369 					"is discouraged."));
370 			message(V_WARNING, _("The exact options of the "
371 					"presets may vary between software "
372 					"versions."));
373 		}
374 
375 		// Get the preset for LZMA1 or LZMA2.
376 		if (lzma_lzma_preset(&opt_lzma, preset_number))
377 			message_bug();
378 
379 		// Use LZMA2 except with --format=lzma we use LZMA1.
380 		default_filters[0].id = opt_format == FORMAT_LZMA
381 				? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2;
382 		default_filters[0].options = &opt_lzma;
383 
384 		filters_count = 1;
385 
386 		// Terminate the filter options array.
387 		default_filters[1].id = LZMA_VLI_UNKNOWN;
388 	}
389 
390 	// If we are using the .lzma format, allow exactly one filter
391 	// which has to be LZMA1. There is no need to check if the default
392 	// filter chain is being used since it can only be disabled if
393 	// --block-list is used, which is incompatible with FORMAT_LZMA.
394 	if (opt_format == FORMAT_LZMA && (filters_count != 1
395 			|| default_filters[0].id != LZMA_FILTER_LZMA1))
396 		message_fatal(_("The .lzma format supports only "
397 				"the LZMA1 filter"));
398 
399 	// If we are using the .xz format, make sure that there is no LZMA1
400 	// filter to prevent LZMA_PROG_ERROR. With the chains from --filtersX
401 	// we have already ensured this by calling lzma_str_to_filters()
402 	// without setting the flags that would allow non-.xz filters.
403 	if (opt_format == FORMAT_XZ && chains_used_mask & 1)
404 		for (size_t i = 0; i < filters_count; ++i)
405 			if (default_filters[i].id == LZMA_FILTER_LZMA1)
406 				message_fatal(_("LZMA1 cannot be used "
407 						"with the .xz format"));
408 
409 	if (chains_used_mask & 1) {
410 		// Print the selected default filter chain.
411 		message_filters_show(V_DEBUG, default_filters);
412 	}
413 
414 	// The --flush-timeout option requires LZMA_SYNC_FLUSH support
415 	// from the filter chain. Currently the threaded encoder doesn't
416 	// support LZMA_SYNC_FLUSH so single-threaded mode must be used.
417 	if (opt_mode == MODE_COMPRESS && opt_flush_timeout != 0) {
418 		for (unsigned i = 0; i < ARRAY_SIZE(chains); ++i) {
419 			if (!(chains_used_mask & (1U << i)))
420 				continue;
421 
422 			const lzma_filter *fc = chains[i];
423 			for (size_t j = 0; fc[j].id != LZMA_VLI_UNKNOWN; j++) {
424 				switch (fc[j].id) {
425 				case LZMA_FILTER_LZMA2:
426 				case LZMA_FILTER_DELTA:
427 					break;
428 
429 				default:
430 					message_fatal(_("Filter chain %u is "
431 							"incompatible with "
432 							"--flush-timeout"),
433 							i);
434 				}
435 			}
436 		}
437 
438 		if (hardware_threads_is_mt()) {
439 			message(V_WARNING, _("Switching to single-threaded "
440 					"mode due to --flush-timeout"));
441 			hardware_threads_set(1);
442 		}
443 	}
444 
445 	// Get memory limit and the memory usage of the used filter chains.
446 	// Note that if --format=raw was used, we can be decompressing
447 	// using the default filter chain.
448 	//
449 	// If multithreaded .xz compression is done, the memory limit
450 	// will be replaced.
451 	uint64_t memory_limit = hardware_memlimit_get(opt_mode);
452 	uint64_t memory_usage = UINT64_MAX;
453 
454 #ifdef HAVE_ENCODERS
455 	// Memory usage for each encoder filter chain (default
456 	// or --filtersX). The encoder options may need to be
457 	// scaled down depending on the memory usage limit.
458 	uint64_t encoder_memusages[ARRAY_SIZE(chains)];
459 #endif
460 
461 	if (opt_mode == MODE_COMPRESS) {
462 #ifdef HAVE_ENCODERS
463 #	ifdef MYTHREAD_ENABLED
464 		if (opt_format == FORMAT_XZ && hardware_threads_is_mt()) {
465 			memory_limit = hardware_memlimit_mtenc_get();
466 			mt_options.threads = hardware_threads_get();
467 
468 			uint64_t block_size = opt_block_size;
469 
470 			// If opt_block_size is not set, find the maximum
471 			// recommended Block size based on the filter chains
472 			if (block_size == 0) {
473 				for (unsigned i = 0; i < ARRAY_SIZE(chains);
474 						i++) {
475 					if (!(chains_used_mask & (1U << i)))
476 						continue;
477 
478 					uint64_t size = lzma_mt_block_size(
479 							chains[i]);
480 
481 					// If this returns an error, then one
482 					// of the filter chains in use is
483 					// invalid, so there is no point in
484 					// progressing further.
485 					if (size == UINT64_MAX)
486 						message_fatal(_("Unsupported "
487 							"options in filter "
488 							"chain %u"), i);
489 
490 					if (size > block_size)
491 						block_size = size;
492 				}
493 
494 				// If --block-list was used and our current
495 				// Block size exceeds the largest size
496 				// in --block-list, reduce the Block size of
497 				// the multithreaded encoder. The extra size
498 				// would only be a waste of RAM. With a
499 				// smaller Block size we might even be able
500 				// to use more threads in some cases.
501 				if (block_list_largest > 0 && block_size
502 						> block_list_largest)
503 					block_size = block_list_largest;
504 			}
505 
506 			mt_options.block_size = block_size;
507 			mt_options.check = check;
508 
509 			memory_usage = get_chains_memusage(encoder_memusages,
510 						&mt_options, true);
511 			if (memory_usage != UINT64_MAX)
512 				message(V_DEBUG, _("Using up to %" PRIu32
513 						" threads."),
514 						mt_options.threads);
515 		} else
516 #	endif
517 		{
518 			memory_usage = get_chains_memusage(encoder_memusages,
519 					NULL, true);
520 		}
521 #endif
522 	} else {
523 #ifdef HAVE_DECODERS
524 		memory_usage = lzma_raw_decoder_memusage(default_filters);
525 #endif
526 	}
527 
528 	if (memory_usage == UINT64_MAX)
529 		message_fatal(_("Unsupported filter chain or filter options"));
530 
531 	// Print memory usage info before possible dictionary
532 	// size auto-adjusting.
533 	//
534 	// NOTE: If only encoder support was built, we cannot show
535 	// what the decoder memory usage will be.
536 	message_mem_needed(V_DEBUG, memory_usage);
537 
538 #if defined(HAVE_ENCODERS) && defined(HAVE_DECODERS)
539 	if (opt_mode == MODE_COMPRESS && message_verbosity_get() >= V_DEBUG) {
540 		const uint64_t decmem = get_chains_memusage(NULL, NULL, false);
541 		if (decmem != UINT64_MAX)
542 			message(V_DEBUG, _("Decompression will need "
543 					"%s MiB of memory."), uint64_to_str(
544 						round_up_to_mib(decmem), 0));
545 	}
546 #endif
547 
548 	if (memory_usage <= memory_limit)
549 		return;
550 
551 	// With --format=raw settings are never adjusted to meet
552 	// the memory usage limit.
553 	if (opt_format == FORMAT_RAW)
554 		memlimit_too_small(memory_usage);
555 
556 	assert(opt_mode == MODE_COMPRESS);
557 
558 #ifdef HAVE_ENCODERS
559 #	ifdef MYTHREAD_ENABLED
560 	if (opt_format == FORMAT_XZ && hardware_threads_is_mt()) {
561 		// Try to reduce the number of threads before
562 		// adjusting the compression settings down.
563 		while (mt_options.threads > 1) {
564 			// Reduce the number of threads by one and check
565 			// the memory usage.
566 			--mt_options.threads;
567 			memory_usage = get_chains_memusage(encoder_memusages,
568 					&mt_options, true);
569 			if (memory_usage == UINT64_MAX)
570 				message_bug();
571 
572 			if (memory_usage <= memory_limit) {
573 				// The memory usage is now low enough.
574 				//
575 				// Since 5.6.1: This is only shown at
576 				// V_DEBUG instead of V_WARNING because
577 				// changing the number of threads doesn't
578 				// affect the output. On some systems this
579 				// message would be too common now that
580 				// multithreaded compression is the default.
581 				message(V_DEBUG, _("Reduced the number of "
582 					"threads from %s to %s to not exceed "
583 					"the memory usage limit of %s MiB"),
584 					uint64_to_str(
585 						hardware_threads_get(), 0),
586 					uint64_to_str(mt_options.threads, 1),
587 					uint64_to_str(round_up_to_mib(
588 						memory_limit), 2));
589 				return;
590 			}
591 		}
592 
593 		// If the memory usage limit is only a soft limit (automatic
594 		// number of threads and no --memlimit-compress), the limit
595 		// is only used to reduce the number of threads and once at
596 		// just one thread, the limit is completely ignored. This
597 		// way -T0 won't use insane amount of memory but at the same
598 		// time the soft limit will never make xz fail and never make
599 		// xz change settings that would affect the compressed output.
600 		//
601 		// Since 5.6.1: Like above, this is now shown at V_DEBUG
602 		// instead of V_WARNING.
603 		if (hardware_memlimit_mtenc_is_default()) {
604 			message(V_DEBUG, _("Reduced the number of threads "
605 				"from %s to one. The automatic memory usage "
606 				"limit of %s MiB is still being exceeded. "
607 				"%s MiB of memory is required. "
608 				"Continuing anyway."),
609 				uint64_to_str(hardware_threads_get(), 0),
610 				uint64_to_str(
611 					round_up_to_mib(memory_limit), 1),
612 				uint64_to_str(
613 					round_up_to_mib(memory_usage), 2));
614 			return;
615 		}
616 
617 		// If --no-adjust was used, we cannot drop to single-threaded
618 		// mode since it produces different compressed output.
619 		//
620 		// NOTE: In xz 5.2.x, --no-adjust also prevented reducing
621 		// the number of threads. This changed in 5.3.3alpha.
622 		if (!opt_auto_adjust)
623 			memlimit_too_small(memory_usage);
624 
625 		// Switch to single-threaded mode. It uses
626 		// less memory than using one thread in
627 		// the multithreaded mode but the output
628 		// is also different.
629 		hardware_threads_set(1);
630 		memory_usage = get_chains_memusage(encoder_memusages,
631 				NULL, true);
632 		message(V_WARNING, _("Switching to single-threaded mode "
633 			"to not exceed the memory usage limit of %s MiB"),
634 			uint64_to_str(round_up_to_mib(memory_limit), 0));
635 	}
636 #	endif
637 
638 	if (memory_usage <= memory_limit)
639 		return;
640 
641 	// Don't adjust LZMA2 or LZMA1 dictionary size if --no-adjust
642 	// was specified as that would change the compressed output.
643 	if (!opt_auto_adjust)
644 		memlimit_too_small(memory_usage);
645 
646 	// Adjust each filter chain that is exceeding the memory usage limit.
647 	for (unsigned i = 0; i < ARRAY_SIZE(chains); i++) {
648 		// Skip unused chains.
649 		if (!(chains_used_mask & (1U << i)))
650 			continue;
651 
652 		// Skip chains that already meet the memory usage limit.
653 		if (encoder_memusages[i] <=  memory_limit)
654 			continue;
655 
656 		// Look for the last filter if it is LZMA2 or LZMA1, so we
657 		// can make it use less RAM. We cannot adjust other filters.
658 		unsigned j = 0;
659 		while (chains[i][j].id != LZMA_FILTER_LZMA2
660 				&& chains[i][j].id != LZMA_FILTER_LZMA1) {
661 			// NOTE: This displays the too high limit of this
662 			// particular filter chain. If multiple chains are
663 			// specified and another one would need more then
664 			// this message could be confusing. As long as LZMA2
665 			// is the only memory hungry filter in .xz this
666 			// doesn't matter at all in practice.
667 			//
668 			// FIXME? However, it's sort of odd still if we had
669 			// switched from multithreaded mode to single-threaded
670 			// mode because single-threaded produces different
671 			// output. So the messages could perhaps be clearer.
672 			// Another case of this is a few lines below.
673 			if (chains[i][j].id == LZMA_VLI_UNKNOWN)
674 				memlimit_too_small(encoder_memusages[i]);
675 
676 			++j;
677 		}
678 
679 		// Decrease the dictionary size until we meet the memory
680 		// usage limit. First round down to full mebibytes.
681 		lzma_options_lzma *opt = chains[i][j].options;
682 		const uint32_t orig_dict_size = opt->dict_size;
683 		opt->dict_size &= ~((UINT32_C(1) << 20) - 1);
684 
685 		while (true) {
686 			// If it is below 1 MiB, auto-adjusting failed.
687 			//
688 			// FIXME? See the FIXME a few lines above.
689 			if (opt->dict_size < (UINT32_C(1) << 20))
690 				memlimit_too_small(encoder_memusages[i]);
691 
692 			encoder_memusages[i]
693 				= lzma_raw_encoder_memusage(chains[i]);
694 			if (encoder_memusages[i] == UINT64_MAX)
695 				message_bug();
696 
697 			// Accept it if it is low enough.
698 			if (encoder_memusages[i] <= memory_limit)
699 				break;
700 
701 			// Otherwise adjust it 1 MiB down and try again.
702 			opt->dict_size -= UINT32_C(1) << 20;
703 		}
704 
705 		// Tell the user that we decreased the dictionary size.
706 		// The message is slightly different between the default
707 		// filter chain (0) or and chains from --filtersX.
708 		const char lzma_num = chains[i][j].id == LZMA_FILTER_LZMA2
709 					? '2' : '1';
710 		const char *from_size = uint64_to_str(orig_dict_size >> 20, 0);
711 		const char *to_size = uint64_to_str(opt->dict_size >> 20, 1);
712 		const char *limit_size = uint64_to_str(round_up_to_mib(
713 					memory_limit), 2);
714 		if (i == 0)
715 			message(V_WARNING, _("Adjusted LZMA%c dictionary "
716 				"size from %s MiB to %s MiB to not exceed the "
717 				"memory usage limit of %s MiB"),
718 				lzma_num, from_size, to_size, limit_size);
719 		else
720 			message(V_WARNING, _("Adjusted LZMA%c dictionary size "
721 				"for --filters%u from %s MiB to %s MiB to not "
722 				"exceed the memory usage limit of %s MiB"),
723 				lzma_num, i, from_size, to_size, limit_size);
724 	}
725 #endif
726 
727 	return;
728 }
729 
730 
731 #ifdef HAVE_DECODERS
732 /// Return true if the data in in_buf seems to be in the .xz format.
733 static bool
is_format_xz(void)734 is_format_xz(void)
735 {
736 	// Specify the magic as hex to be compatible with EBCDIC systems.
737 	static const uint8_t magic[6] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 };
738 	return strm.avail_in >= sizeof(magic)
739 			&& memcmp(in_buf.u8, magic, sizeof(magic)) == 0;
740 }
741 
742 
743 /// Return true if the data in in_buf seems to be in the .lzma format.
744 static bool
is_format_lzma(void)745 is_format_lzma(void)
746 {
747 	// The .lzma header is 13 bytes.
748 	if (strm.avail_in < 13)
749 		return false;
750 
751 	// Decode the LZMA1 properties.
752 	lzma_filter filter = { .id = LZMA_FILTER_LZMA1 };
753 	if (lzma_properties_decode(&filter, NULL, in_buf.u8, 5) != LZMA_OK)
754 		return false;
755 
756 	// A hack to ditch tons of false positives: We allow only dictionary
757 	// sizes that are 2^n or 2^n + 2^(n-1) or UINT32_MAX. LZMA_Alone
758 	// created only files with 2^n, but accepts any dictionary size.
759 	// If someone complains, this will be reconsidered.
760 	lzma_options_lzma *opt = filter.options;
761 	const uint32_t dict_size = opt->dict_size;
762 	free(opt);
763 
764 	if (dict_size != UINT32_MAX) {
765 		uint32_t d = dict_size - 1;
766 		d |= d >> 2;
767 		d |= d >> 3;
768 		d |= d >> 4;
769 		d |= d >> 8;
770 		d |= d >> 16;
771 		++d;
772 		if (d != dict_size || dict_size == 0)
773 			return false;
774 	}
775 
776 	// Another hack to ditch false positives: Assume that if the
777 	// uncompressed size is known, it must be less than 256 GiB.
778 	// Again, if someone complains, this will be reconsidered.
779 	uint64_t uncompressed_size = 0;
780 	for (size_t i = 0; i < 8; ++i)
781 		uncompressed_size |= (uint64_t)(in_buf.u8[5 + i]) << (i * 8);
782 
783 	if (uncompressed_size != UINT64_MAX
784 			&& uncompressed_size > (UINT64_C(1) << 38))
785 		return false;
786 
787 	return true;
788 }
789 
790 
791 #ifdef HAVE_LZIP_DECODER
792 /// Return true if the data in in_buf seems to be in the .lz format.
793 static bool
is_format_lzip(void)794 is_format_lzip(void)
795 {
796 	static const uint8_t magic[4] = { 0x4C, 0x5A, 0x49, 0x50 };
797 	return strm.avail_in >= sizeof(magic)
798 			&& memcmp(in_buf.u8, magic, sizeof(magic)) == 0;
799 }
800 #endif
801 #endif
802 
803 
804 /// Detect the input file type (for now, this done only when decompressing),
805 /// and initialize an appropriate coder. Return value indicates if a normal
806 /// liblzma-based coder was initialized (CODER_INIT_NORMAL), if passthru
807 /// mode should be used (CODER_INIT_PASSTHRU), or if an error occurred
808 /// (CODER_INIT_ERROR).
809 static enum coder_init_ret
coder_init(file_pair * pair)810 coder_init(file_pair *pair)
811 {
812 	lzma_ret ret = LZMA_PROG_ERROR;
813 
814 	// In most cases if there is input left when coding finishes,
815 	// something has gone wrong. Exceptions are --single-stream
816 	// and decoding .lz files which can contain trailing non-.lz data.
817 	// These will be handled later in this function.
818 	allow_trailing_input = false;
819 
820 	// Set the first filter chain. If the --block-list option is not
821 	// used then use the default filter chain (chains[0]).
822 	// Otherwise, use first filter chain from the block list.
823 	lzma_filter *active_filters = opt_block_list == NULL
824 			? chains[0]
825 			: chains[opt_block_list[0].chain_num];
826 
827 	if (opt_mode == MODE_COMPRESS) {
828 #ifdef HAVE_ENCODERS
829 		switch (opt_format) {
830 		case FORMAT_AUTO:
831 			// args.c ensures this.
832 			assert(0);
833 			break;
834 
835 		case FORMAT_XZ:
836 #	ifdef MYTHREAD_ENABLED
837 			mt_options.filters = active_filters;
838 			if (hardware_threads_is_mt())
839 				ret = lzma_stream_encoder_mt(
840 						&strm, &mt_options);
841 			else
842 #	endif
843 				ret = lzma_stream_encoder(
844 						&strm, active_filters, check);
845 			break;
846 
847 		case FORMAT_LZMA:
848 			ret = lzma_alone_encoder(&strm,
849 					active_filters[0].options);
850 			break;
851 
852 #	ifdef HAVE_LZIP_DECODER
853 		case FORMAT_LZIP:
854 			// args.c should disallow this.
855 			assert(0);
856 			ret = LZMA_PROG_ERROR;
857 			break;
858 #	endif
859 
860 		case FORMAT_RAW:
861 			ret = lzma_raw_encoder(&strm, active_filters);
862 			break;
863 		}
864 #endif
865 	} else {
866 #ifdef HAVE_DECODERS
867 		uint32_t flags = 0;
868 
869 		// It seems silly to warn about unsupported check if the
870 		// check won't be verified anyway due to --ignore-check.
871 		if (opt_ignore_check)
872 			flags |= LZMA_IGNORE_CHECK;
873 		else
874 			flags |= LZMA_TELL_UNSUPPORTED_CHECK;
875 
876 		if (opt_single_stream)
877 			allow_trailing_input = true;
878 		else
879 			flags |= LZMA_CONCATENATED;
880 
881 		// We abuse FORMAT_AUTO to indicate unknown file format,
882 		// for which we may consider passthru mode.
883 		enum format_type init_format = FORMAT_AUTO;
884 
885 		switch (opt_format) {
886 		case FORMAT_AUTO:
887 			// .lz is checked before .lzma since .lzma detection
888 			// is more complicated (no magic bytes).
889 			if (is_format_xz())
890 				init_format = FORMAT_XZ;
891 #	ifdef HAVE_LZIP_DECODER
892 			else if (is_format_lzip())
893 				init_format = FORMAT_LZIP;
894 #	endif
895 			else if (is_format_lzma())
896 				init_format = FORMAT_LZMA;
897 			break;
898 
899 		case FORMAT_XZ:
900 			if (is_format_xz())
901 				init_format = FORMAT_XZ;
902 			break;
903 
904 		case FORMAT_LZMA:
905 			if (is_format_lzma())
906 				init_format = FORMAT_LZMA;
907 			break;
908 
909 #	ifdef HAVE_LZIP_DECODER
910 		case FORMAT_LZIP:
911 			if (is_format_lzip())
912 				init_format = FORMAT_LZIP;
913 			break;
914 #	endif
915 
916 		case FORMAT_RAW:
917 			init_format = FORMAT_RAW;
918 			break;
919 		}
920 
921 		switch (init_format) {
922 		case FORMAT_AUTO:
923 			// Unknown file format. If --decompress --stdout
924 			// --force have been given, then we copy the input
925 			// as is to stdout. Checking for MODE_DECOMPRESS
926 			// is needed, because we don't want to do use
927 			// passthru mode with --test.
928 			if (opt_mode == MODE_DECOMPRESS
929 					&& opt_stdout && opt_force) {
930 				// These are needed for progress info.
931 				strm.total_in = 0;
932 				strm.total_out = 0;
933 				return CODER_INIT_PASSTHRU;
934 			}
935 
936 			ret = LZMA_FORMAT_ERROR;
937 			break;
938 
939 		case FORMAT_XZ:
940 #	ifdef MYTHREAD_ENABLED
941 			mt_options.flags = flags;
942 
943 			mt_options.threads = hardware_threads_get();
944 			mt_options.memlimit_stop
945 				= hardware_memlimit_get(MODE_DECOMPRESS);
946 
947 			// If single-threaded mode was requested, set the
948 			// memlimit for threading to zero. This forces the
949 			// decoder to use single-threaded mode which matches
950 			// the behavior of lzma_stream_decoder().
951 			//
952 			// Otherwise use the limit for threaded decompression
953 			// which has a sane default (users are still free to
954 			// make it insanely high though).
955 			mt_options.memlimit_threading
956 					= mt_options.threads == 1
957 					? 0 : hardware_memlimit_mtdec_get();
958 
959 			ret = lzma_stream_decoder_mt(&strm, &mt_options);
960 #	else
961 			ret = lzma_stream_decoder(&strm,
962 					hardware_memlimit_get(
963 						MODE_DECOMPRESS), flags);
964 #	endif
965 			break;
966 
967 		case FORMAT_LZMA:
968 			ret = lzma_alone_decoder(&strm,
969 					hardware_memlimit_get(
970 						MODE_DECOMPRESS));
971 			break;
972 
973 #	ifdef HAVE_LZIP_DECODER
974 		case FORMAT_LZIP:
975 			allow_trailing_input = true;
976 			ret = lzma_lzip_decoder(&strm,
977 					hardware_memlimit_get(
978 						MODE_DECOMPRESS), flags);
979 			break;
980 #	endif
981 
982 		case FORMAT_RAW:
983 			// Memory usage has already been checked in
984 			// coder_set_compression_settings().
985 			ret = lzma_raw_decoder(&strm, active_filters);
986 			break;
987 		}
988 
989 		// Try to decode the headers. This will catch too low
990 		// memory usage limit in case it happens in the first
991 		// Block of the first Stream, which is where it very
992 		// probably will happen if it is going to happen.
993 		//
994 		// This will also catch unsupported check type which
995 		// we treat as a warning only. If there are empty
996 		// concatenated Streams with unsupported check type then
997 		// the message can be shown more than once here. The loop
998 		// is used in case there is first a warning about
999 		// unsupported check type and then the first Block
1000 		// would exceed the memlimit.
1001 		if (ret == LZMA_OK && init_format != FORMAT_RAW) {
1002 			strm.next_out = NULL;
1003 			strm.avail_out = 0;
1004 			while ((ret = lzma_code(&strm, LZMA_RUN))
1005 					== LZMA_UNSUPPORTED_CHECK)
1006 				message_warning(_("%s: %s"), pair->src_name,
1007 						message_strm(ret));
1008 
1009 			// With --single-stream lzma_code won't wait for
1010 			// LZMA_FINISH and thus it can return LZMA_STREAM_END
1011 			// if the file has no uncompressed data inside.
1012 			// So treat LZMA_STREAM_END as LZMA_OK here.
1013 			// When lzma_code() is called again in coder_normal()
1014 			// it will return LZMA_STREAM_END again.
1015 			if (ret == LZMA_STREAM_END)
1016 				ret = LZMA_OK;
1017 		}
1018 #endif
1019 	}
1020 
1021 	if (ret != LZMA_OK) {
1022 		message_error(_("%s: %s"), pair->src_name, message_strm(ret));
1023 		if (ret == LZMA_MEMLIMIT_ERROR)
1024 			message_mem_needed(V_ERROR, lzma_memusage(&strm));
1025 
1026 		return CODER_INIT_ERROR;
1027 	}
1028 
1029 	return CODER_INIT_NORMAL;
1030 }
1031 
1032 
1033 #ifdef HAVE_ENCODERS
1034 /// Resolve conflicts between opt_block_size and opt_block_list in single
1035 /// threaded mode. We want to default to opt_block_list, except when it is
1036 /// larger than opt_block_size. If this is the case for the current Block
1037 /// at *list_pos, then we break into smaller Blocks. Otherwise advance
1038 /// to the next Block in opt_block_list, and break apart if needed.
1039 static void
split_block(uint64_t * block_remaining,uint64_t * next_block_remaining,size_t * list_pos)1040 split_block(uint64_t *block_remaining,
1041 	    uint64_t *next_block_remaining,
1042 	    size_t *list_pos)
1043 {
1044 	if (*next_block_remaining > 0) {
1045 		// The Block at *list_pos has previously been split up.
1046 		assert(!hardware_threads_is_mt());
1047 		assert(opt_block_size > 0);
1048 		assert(opt_block_list != NULL);
1049 
1050 		if (*next_block_remaining > opt_block_size) {
1051 			// We have to split the current Block at *list_pos
1052 			// into another opt_block_size length Block.
1053 			*block_remaining = opt_block_size;
1054 		} else {
1055 			// This is the last remaining split Block for the
1056 			// Block at *list_pos.
1057 			*block_remaining = *next_block_remaining;
1058 		}
1059 
1060 		*next_block_remaining -= *block_remaining;
1061 
1062 	} else {
1063 		// The Block at *list_pos has been finished. Go to the next
1064 		// entry in the list. If the end of the list has been
1065 		// reached, reuse the size and filters of the last Block.
1066 		if (opt_block_list[*list_pos + 1].size != 0) {
1067 			++*list_pos;
1068 
1069 			// Update the filters if needed.
1070 			if (opt_block_list[*list_pos - 1].chain_num
1071 				!= opt_block_list[*list_pos].chain_num) {
1072 				const unsigned chain_num
1073 					= opt_block_list[*list_pos].chain_num;
1074 				const lzma_filter *next = chains[chain_num];
1075 				const lzma_ret ret = lzma_filters_update(
1076 						&strm, next);
1077 
1078 				if (ret != LZMA_OK) {
1079 					// This message is only possible if
1080 					// the filter chain has unsupported
1081 					// options since the filter chain is
1082 					// validated using
1083 					// lzma_raw_encoder_memusage() or
1084 					// lzma_stream_encoder_mt_memusage().
1085 					// Some options are not validated until
1086 					// the encoders are initialized.
1087 					message_fatal(
1088 						_("Error changing to "
1089 						"filter chain %u: %s"),
1090 						chain_num,
1091 						message_strm(ret));
1092 				}
1093 			}
1094 		}
1095 
1096 		*block_remaining = opt_block_list[*list_pos].size;
1097 
1098 		// If in single-threaded mode, split up the Block if needed.
1099 		// This is not needed in multi-threaded mode because liblzma
1100 		// will do this due to how threaded encoding works.
1101 		if (!hardware_threads_is_mt() && opt_block_size > 0
1102 				&& *block_remaining > opt_block_size) {
1103 			*next_block_remaining
1104 					= *block_remaining - opt_block_size;
1105 			*block_remaining = opt_block_size;
1106 		}
1107 	}
1108 }
1109 #endif
1110 
1111 
1112 static bool
coder_write_output(file_pair * pair)1113 coder_write_output(file_pair *pair)
1114 {
1115 	if (opt_mode != MODE_TEST) {
1116 		if (io_write(pair, &out_buf, IO_BUFFER_SIZE - strm.avail_out))
1117 			return true;
1118 	}
1119 
1120 	strm.next_out = out_buf.u8;
1121 	strm.avail_out = IO_BUFFER_SIZE;
1122 	return false;
1123 }
1124 
1125 
1126 /// Compress or decompress using liblzma.
1127 static bool
coder_normal(file_pair * pair)1128 coder_normal(file_pair *pair)
1129 {
1130 	// Encoder needs to know when we have given all the input to it.
1131 	// The decoders need to know it too when we are using
1132 	// LZMA_CONCATENATED. We need to check for src_eof here, because
1133 	// the first input chunk has been already read if decompressing,
1134 	// and that may have been the only chunk we will read.
1135 	lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN;
1136 
1137 	lzma_ret ret;
1138 
1139 	// Assume that something goes wrong.
1140 	bool success = false;
1141 
1142 #ifdef HAVE_ENCODERS
1143 	// block_remaining indicates how many input bytes to encode before
1144 	// finishing the current .xz Block. The Block size is set with
1145 	// --block-size=SIZE and --block-list. They have an effect only when
1146 	// compressing to the .xz format. If block_remaining == UINT64_MAX,
1147 	// only a single block is created.
1148 	uint64_t block_remaining = UINT64_MAX;
1149 
1150 	// next_block_remaining for when we are in single-threaded mode and
1151 	// the Block in --block-list is larger than the --block-size=SIZE.
1152 	uint64_t next_block_remaining = 0;
1153 
1154 	// Position in opt_block_list. Unused if --block-list wasn't used.
1155 	size_t list_pos = 0;
1156 
1157 	// Handle --block-size for single-threaded mode and the first step
1158 	// of --block-list.
1159 	if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) {
1160 		// --block-size doesn't do anything here in threaded mode,
1161 		// because the threaded encoder will take care of splitting
1162 		// to fixed-sized Blocks.
1163 		if (!hardware_threads_is_mt() && opt_block_size > 0)
1164 			block_remaining = opt_block_size;
1165 
1166 		// If --block-list was used, start with the first size.
1167 		//
1168 		// For threaded case, --block-size specifies how big Blocks
1169 		// the encoder needs to be prepared to create at maximum
1170 		// and --block-list will simultaneously cause new Blocks
1171 		// to be started at specified intervals. To keep things
1172 		// logical, the same is done in single-threaded mode. The
1173 		// output is still not identical because in single-threaded
1174 		// mode the size info isn't written into Block Headers.
1175 		if (opt_block_list != NULL) {
1176 			if (block_remaining < opt_block_list[list_pos].size) {
1177 				assert(!hardware_threads_is_mt());
1178 				next_block_remaining =
1179 						opt_block_list[list_pos].size
1180 						- block_remaining;
1181 			} else {
1182 				block_remaining =
1183 						opt_block_list[list_pos].size;
1184 			}
1185 		}
1186 	}
1187 #endif
1188 
1189 	strm.next_out = out_buf.u8;
1190 	strm.avail_out = IO_BUFFER_SIZE;
1191 
1192 	while (!user_abort) {
1193 		// Fill the input buffer if it is empty and we aren't
1194 		// flushing or finishing.
1195 		if (strm.avail_in == 0 && action == LZMA_RUN) {
1196 			strm.next_in = in_buf.u8;
1197 #ifdef HAVE_ENCODERS
1198 			const size_t read_size = my_min(block_remaining,
1199 					IO_BUFFER_SIZE);
1200 #else
1201 			const size_t read_size = IO_BUFFER_SIZE;
1202 #endif
1203 			strm.avail_in = io_read(pair, &in_buf, read_size);
1204 
1205 			if (strm.avail_in == SIZE_MAX)
1206 				break;
1207 
1208 			if (pair->src_eof) {
1209 				action = LZMA_FINISH;
1210 			}
1211 #ifdef HAVE_ENCODERS
1212 			else if (block_remaining != UINT64_MAX) {
1213 				// Start a new Block after every
1214 				// opt_block_size bytes of input.
1215 				block_remaining -= strm.avail_in;
1216 				if (block_remaining == 0)
1217 					action = LZMA_FULL_BARRIER;
1218 			}
1219 
1220 			if (action == LZMA_RUN && pair->flush_needed)
1221 				action = LZMA_SYNC_FLUSH;
1222 #endif
1223 		}
1224 
1225 		// Let liblzma do the actual work.
1226 		ret = lzma_code(&strm, action);
1227 
1228 		// Write out if the output buffer became full.
1229 		if (strm.avail_out == 0) {
1230 			if (coder_write_output(pair))
1231 				break;
1232 		}
1233 
1234 #ifdef HAVE_ENCODERS
1235 		if (ret == LZMA_STREAM_END && (action == LZMA_SYNC_FLUSH
1236 				|| action == LZMA_FULL_BARRIER)) {
1237 			if (action == LZMA_SYNC_FLUSH) {
1238 				// Flushing completed. Write the pending data
1239 				// out immediately so that the reading side
1240 				// can decompress everything compressed so far.
1241 				if (coder_write_output(pair))
1242 					break;
1243 
1244 				// Mark that we haven't seen any new input
1245 				// since the previous flush.
1246 				pair->src_has_seen_input = false;
1247 				pair->flush_needed = false;
1248 			} else {
1249 				// Start a new Block after LZMA_FULL_BARRIER.
1250 				if (opt_block_list == NULL) {
1251 					assert(!hardware_threads_is_mt());
1252 					assert(opt_block_size > 0);
1253 					block_remaining = opt_block_size;
1254 				} else {
1255 					split_block(&block_remaining,
1256 							&next_block_remaining,
1257 							&list_pos);
1258 				}
1259 			}
1260 
1261 			// Start a new Block after LZMA_FULL_FLUSH or continue
1262 			// the same block after LZMA_SYNC_FLUSH.
1263 			action = LZMA_RUN;
1264 		} else
1265 #endif
1266 		if (ret != LZMA_OK) {
1267 			// Determine if the return value indicates that we
1268 			// won't continue coding. LZMA_NO_CHECK would be
1269 			// here too if LZMA_TELL_ANY_CHECK was used.
1270 			const bool stop = ret != LZMA_UNSUPPORTED_CHECK;
1271 
1272 			if (stop) {
1273 				// Write the remaining bytes even if something
1274 				// went wrong, because that way the user gets
1275 				// as much data as possible, which can be good
1276 				// when trying to get at least some useful
1277 				// data out of damaged files.
1278 				if (coder_write_output(pair))
1279 					break;
1280 			}
1281 
1282 			if (ret == LZMA_STREAM_END) {
1283 				if (allow_trailing_input) {
1284 					io_fix_src_pos(pair, strm.avail_in);
1285 					success = true;
1286 					break;
1287 				}
1288 
1289 				// Check that there is no trailing garbage.
1290 				// This is needed for LZMA_Alone and raw
1291 				// streams. This is *not* done with .lz files
1292 				// as that format specifically requires
1293 				// allowing trailing garbage.
1294 				if (strm.avail_in == 0 && !pair->src_eof) {
1295 					// Try reading one more byte.
1296 					// Hopefully we don't get any more
1297 					// input, and thus pair->src_eof
1298 					// becomes true.
1299 					strm.avail_in = io_read(
1300 							pair, &in_buf, 1);
1301 					if (strm.avail_in == SIZE_MAX)
1302 						break;
1303 
1304 					assert(strm.avail_in == 0
1305 							|| strm.avail_in == 1);
1306 				}
1307 
1308 				if (strm.avail_in == 0) {
1309 					assert(pair->src_eof);
1310 					success = true;
1311 					break;
1312 				}
1313 
1314 				// We hadn't reached the end of the file.
1315 				ret = LZMA_DATA_ERROR;
1316 				assert(stop);
1317 			}
1318 
1319 			// If we get here and stop is true, something went
1320 			// wrong and we print an error. Otherwise it's just
1321 			// a warning and coding can continue.
1322 			if (stop) {
1323 				message_error(_("%s: %s"), pair->src_name,
1324 						message_strm(ret));
1325 			} else {
1326 				message_warning(_("%s: %s"), pair->src_name,
1327 						message_strm(ret));
1328 
1329 				// When compressing, all possible errors set
1330 				// stop to true.
1331 				assert(opt_mode != MODE_COMPRESS);
1332 			}
1333 
1334 			if (ret == LZMA_MEMLIMIT_ERROR) {
1335 				// Display how much memory it would have
1336 				// actually needed.
1337 				message_mem_needed(V_ERROR,
1338 						lzma_memusage(&strm));
1339 			}
1340 
1341 			if (stop)
1342 				break;
1343 		}
1344 
1345 		// Show progress information under certain conditions.
1346 		message_progress_update();
1347 	}
1348 
1349 	return success;
1350 }
1351 
1352 
1353 /// Copy from input file to output file without processing the data in any
1354 /// way. This is used only when trying to decompress unrecognized files
1355 /// with --decompress --stdout --force, so the output is always stdout.
1356 static bool
coder_passthru(file_pair * pair)1357 coder_passthru(file_pair *pair)
1358 {
1359 	while (strm.avail_in != 0) {
1360 		if (user_abort)
1361 			return false;
1362 
1363 		if (io_write(pair, &in_buf, strm.avail_in))
1364 			return false;
1365 
1366 		strm.total_in += strm.avail_in;
1367 		strm.total_out = strm.total_in;
1368 		message_progress_update();
1369 
1370 		strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
1371 		if (strm.avail_in == SIZE_MAX)
1372 			return false;
1373 	}
1374 
1375 	return true;
1376 }
1377 
1378 
1379 extern void
coder_run(const char * filename)1380 coder_run(const char *filename)
1381 {
1382 	// Set and possibly print the filename for the progress message.
1383 	message_filename(filename);
1384 
1385 	// Try to open the input file.
1386 	file_pair *pair = io_open_src(filename);
1387 	if (pair == NULL)
1388 		return;
1389 
1390 	// Assume that something goes wrong.
1391 	bool success = false;
1392 
1393 	if (opt_mode == MODE_COMPRESS) {
1394 		strm.next_in = NULL;
1395 		strm.avail_in = 0;
1396 	} else {
1397 		// Read the first chunk of input data. This is needed
1398 		// to detect the input file type.
1399 		strm.next_in = in_buf.u8;
1400 		strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
1401 	}
1402 
1403 	if (strm.avail_in != SIZE_MAX) {
1404 		// Initialize the coder. This will detect the file format
1405 		// and, in decompression or testing mode, check the memory
1406 		// usage of the first Block too. This way we don't try to
1407 		// open the destination file if we see that coding wouldn't
1408 		// work at all anyway. This also avoids deleting the old
1409 		// "target" file if --force was used.
1410 		const enum coder_init_ret init_ret = coder_init(pair);
1411 
1412 		if (init_ret != CODER_INIT_ERROR && !user_abort) {
1413 			// Don't open the destination file when --test
1414 			// is used.
1415 			if (opt_mode == MODE_TEST || !io_open_dest(pair)) {
1416 				// Remember the current time. It is needed
1417 				// for progress indicator.
1418 				mytime_set_start_time();
1419 
1420 				// Initialize the progress indicator.
1421 				//
1422 				// NOTE: When reading from stdin, fstat()
1423 				// isn't called on it and thus src_st.st_size
1424 				// is zero. If stdin pointed to a regular
1425 				// file, it would still be possible to know
1426 				// the file size but then we would also need
1427 				// to take into account the current reading
1428 				// position since with stdin it isn't
1429 				// necessarily at the beginning of the file.
1430 				const bool is_passthru = init_ret
1431 						== CODER_INIT_PASSTHRU;
1432 				const uint64_t in_size
1433 					= pair->src_st.st_size <= 0
1434 					? 0 : (uint64_t)(pair->src_st.st_size);
1435 				message_progress_start(&strm,
1436 						is_passthru, in_size);
1437 
1438 				// Do the actual coding or passthru.
1439 				if (is_passthru)
1440 					success = coder_passthru(pair);
1441 				else
1442 					success = coder_normal(pair);
1443 
1444 				message_progress_end(success);
1445 			}
1446 		}
1447 	}
1448 
1449 	// Close the file pair. It needs to know if coding was successful to
1450 	// know if the source or target file should be unlinked.
1451 	io_close(pair, success);
1452 
1453 	return;
1454 }
1455 
1456 
1457 #ifndef NDEBUG
1458 extern void
coder_free(void)1459 coder_free(void)
1460 {
1461 	// Free starting from the second filter chain since the default
1462 	// filter chain may have its options set from a static variable
1463 	// in coder_set_compression_settings(). Since this is only run in
1464 	// debug mode and will be freed when the process ends anyway, we
1465 	// don't worry about freeing it.
1466 	for (uint32_t i = 1; i < ARRAY_SIZE(chains); i++) {
1467 		if (chains_used_mask & (1U << i))
1468 			lzma_filters_free(chains[i], NULL);
1469 	}
1470 
1471 	lzma_end(&strm);
1472 	return;
1473 }
1474 #endif
1475