xref: /freebsd-src/contrib/xz/src/xz/args.c (revision 26743408e9ff53ac0e041407c359ed3c17c15596)
1 // SPDX-License-Identifier: 0BSD
2 
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       args.c
6 /// \brief      Argument parsing
7 ///
8 /// \note       Filter-specific options parsing is in options.c.
9 //
10 //  Authors:    Lasse Collin
11 //              Jia Tan
12 //
13 ///////////////////////////////////////////////////////////////////////////////
14 
15 #include "private.h"
16 
17 #include "getopt.h"
18 #include <ctype.h>
19 
20 
21 bool opt_stdout = false;
22 bool opt_force = false;
23 bool opt_keep_original = false;
24 bool opt_robot = false;
25 bool opt_ignore_check = false;
26 
27 // We don't modify or free() this, but we need to assign it in some
28 // non-const pointers.
29 const char stdin_filename[] = "(stdin)";
30 
31 
32 /// Parse and set the memory usage limit for compression, decompression,
33 /// and/or multithreaded decompression.
34 static void
35 parse_memlimit(const char *name, const char *name_percentage, const char *str,
36 		bool set_compress, bool set_decompress, bool set_mtdec)
37 {
38 	bool is_percentage = false;
39 	uint64_t value;
40 
41 	const size_t len = strlen(str);
42 	if (len > 0 && str[len - 1] == '%') {
43 		// Make a copy so that we can get rid of %.
44 		//
45 		// In the past str wasn't const and we modified it directly
46 		// but that modified argv[] and thus affected what was visible
47 		// in "ps auxf" or similar tools which was confusing. For
48 		// example, --memlimit=50% would show up as --memlimit=50
49 		// since the percent sign was overwritten here.
50 		char *s = xstrdup(str);
51 		s[len - 1] = '\0';
52 		is_percentage = true;
53 		value = str_to_uint64(name_percentage, s, 1, 100);
54 		free(s);
55 	} else {
56 		// On 32-bit systems, SIZE_MAX would make more sense than
57 		// UINT64_MAX. But use UINT64_MAX still so that scripts
58 		// that assume > 4 GiB values don't break.
59 		value = str_to_uint64(name, str, 0, UINT64_MAX);
60 	}
61 
62 	hardware_memlimit_set(value, set_compress, set_decompress, set_mtdec,
63 			is_percentage);
64 	return;
65 }
66 
67 
68 static void
69 parse_block_list(const char *str_const)
70 {
71 	// We need a modifiable string in the for-loop.
72 	char *str_start = xstrdup(str_const);
73 	char *str = str_start;
74 
75 	// It must be non-empty and not begin with a comma.
76 	if (str[0] == '\0' || str[0] == ',')
77 		message_fatal(_("%s: Invalid argument to --block-list"), str);
78 
79 	// Count the number of comma-separated strings.
80 	size_t count = 1;
81 	for (size_t i = 0; str[i] != '\0'; ++i)
82 		if (str[i] == ',')
83 			++count;
84 
85 	// Prevent an unlikely integer overflow.
86 	if (count > SIZE_MAX / sizeof(block_list_entry) - 1)
87 		message_fatal(_("%s: Too many arguments to --block-list"),
88 				str);
89 
90 	// Allocate memory to hold all the sizes specified.
91 	// If --block-list was specified already, its value is forgotten.
92 	free(opt_block_list);
93 	opt_block_list = xmalloc((count + 1) * sizeof(block_list_entry));
94 
95 	// Clear the bitmask of filter chains in use.
96 	block_list_chain_mask = 0;
97 
98 	// Reset the largest Block size found in --block-list.
99 	block_list_largest = 0;
100 
101 	for (size_t i = 0; i < count; ++i) {
102 		// Locate the next comma and replace it with \0.
103 		char *p = strchr(str, ',');
104 		if (p != NULL)
105 			*p = '\0';
106 
107 		// Use the default filter chain unless overridden.
108 		opt_block_list[i].chain_num = 0;
109 
110 		// To specify a filter chain, the block list entry may be
111 		// prepended with "[filter-chain-number]:". The size is
112 		// still required for every block.
113 		// For instance:
114 		// --block-list=2:10MiB,1:5MiB,,8MiB,0:0
115 		//
116 		// Translates to:
117 		// 1. Block of 10 MiB using filter chain 2
118 		// 2. Block of 5 MiB using filter chain 1
119 		// 3. Block of 5 MiB using filter chain 1
120 		// 4. Block of 8 MiB using the default filter chain
121 		// 5. The last block uses the default filter chain
122 		//
123 		// The block list:
124 		// --block-list=2:MiB,1:,0
125 		//
126 		// Is not allowed because the second block does not specify
127 		// the block size, only the filter chain.
128 		if (str[0] >= '0' && str[0] <= '9' && str[1] == ':') {
129 			if (str[2] == '\0')
130 				message_fatal(_("In --block-list, block "
131 						"size is missing after "
132 						"filter chain number '%c:'"),
133 						str[0]);
134 
135 			const unsigned chain_num = (unsigned)(str[0] - '0');
136 			opt_block_list[i].chain_num = chain_num;
137 			block_list_chain_mask |= 1U << chain_num;
138 			str += 2;
139 		} else {
140 			// This Block uses the default filter chain.
141 			block_list_chain_mask |= 1U << 0;
142 		}
143 
144 		if (str[0] == '\0') {
145 			// There is no string, that is, a comma follows
146 			// another comma. Use the previous value.
147 			//
148 			// NOTE: We checked earlier that the first char
149 			// of the whole list cannot be a comma.
150 			assert(i > 0);
151 			opt_block_list[i] = opt_block_list[i - 1];
152 		} else {
153 			opt_block_list[i].size = str_to_uint64("block-list",
154 					str, 0, UINT64_MAX);
155 
156 			// Zero indicates no more new Blocks.
157 			if (opt_block_list[i].size == 0) {
158 				if (i + 1 != count)
159 					message_fatal(_("0 can only be used "
160 							"as the last element "
161 							"in --block-list"));
162 
163 				opt_block_list[i].size = UINT64_MAX;
164 			}
165 
166 			// Remember the largest Block size in the list.
167 			//
168 			// NOTE: Do this after handling the special value 0
169 			// because when 0 is used, we don't want to reduce
170 			// the Block size of the multithreaded encoder.
171 			if (block_list_largest < opt_block_list[i].size)
172 				block_list_largest = opt_block_list[i].size;
173 		}
174 
175 		// Be standards compliant: p + 1 is undefined behavior
176 		// if p == NULL. That occurs on the last iteration of
177 		// the loop when we won't care about the value of str
178 		// anymore anyway. That is, this is done conditionally
179 		// solely for standard conformance reasons.
180 		if (p != NULL)
181 			str = p + 1;
182 	}
183 
184 	// Terminate the array.
185 	opt_block_list[count].size = 0;
186 
187 	free(str_start);
188 	return;
189 }
190 
191 
192 static void
193 parse_real(args_info *args, int argc, char **argv)
194 {
195 	enum {
196 		OPT_FILTERS = INT_MIN,
197 		OPT_FILTERS1,
198 		OPT_FILTERS2,
199 		OPT_FILTERS3,
200 		OPT_FILTERS4,
201 		OPT_FILTERS5,
202 		OPT_FILTERS6,
203 		OPT_FILTERS7,
204 		OPT_FILTERS8,
205 		OPT_FILTERS9,
206 		OPT_FILTERS_HELP,
207 
208 		OPT_X86,
209 		OPT_POWERPC,
210 		OPT_IA64,
211 		OPT_ARM,
212 		OPT_ARMTHUMB,
213 		OPT_ARM64,
214 		OPT_SPARC,
215 		OPT_RISCV,
216 		OPT_DELTA,
217 		OPT_LZMA1,
218 		OPT_LZMA2,
219 
220 		OPT_SINGLE_STREAM,
221 		OPT_NO_SPARSE,
222 		OPT_FILES,
223 		OPT_FILES0,
224 		OPT_BLOCK_SIZE,
225 		OPT_BLOCK_LIST,
226 		OPT_MEM_COMPRESS,
227 		OPT_MEM_DECOMPRESS,
228 		OPT_MEM_MT_DECOMPRESS,
229 		OPT_NO_ADJUST,
230 		OPT_INFO_MEMORY,
231 		OPT_ROBOT,
232 		OPT_FLUSH_TIMEOUT,
233 		OPT_IGNORE_CHECK,
234 	};
235 
236 	static const char short_opts[]
237 			= "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
238 
239 	static const struct option long_opts[] = {
240 		// Operation mode
241 		{ "compress",     no_argument,       NULL,  'z' },
242 		{ "decompress",   no_argument,       NULL,  'd' },
243 		{ "uncompress",   no_argument,       NULL,  'd' },
244 		{ "test",         no_argument,       NULL,  't' },
245 		{ "list",         no_argument,       NULL,  'l' },
246 
247 		// Operation modifiers
248 		{ "keep",         no_argument,       NULL,  'k' },
249 		{ "force",        no_argument,       NULL,  'f' },
250 		{ "stdout",       no_argument,       NULL,  'c' },
251 		{ "to-stdout",    no_argument,       NULL,  'c' },
252 		{ "single-stream", no_argument,      NULL,  OPT_SINGLE_STREAM },
253 		{ "no-sparse",    no_argument,       NULL,  OPT_NO_SPARSE },
254 		{ "suffix",       required_argument, NULL,  'S' },
255 		{ "files",        optional_argument, NULL,  OPT_FILES },
256 		{ "files0",       optional_argument, NULL,  OPT_FILES0 },
257 
258 		// Basic compression settings
259 		{ "format",       required_argument, NULL,  'F' },
260 		{ "check",        required_argument, NULL,  'C' },
261 		{ "ignore-check", no_argument,       NULL,  OPT_IGNORE_CHECK },
262 		{ "block-size",   required_argument, NULL,  OPT_BLOCK_SIZE },
263 		{ "block-list",   required_argument, NULL,  OPT_BLOCK_LIST },
264 		{ "memlimit-compress",   required_argument, NULL, OPT_MEM_COMPRESS },
265 		{ "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
266 		{ "memlimit-mt-decompress", required_argument, NULL, OPT_MEM_MT_DECOMPRESS },
267 		{ "memlimit",     required_argument, NULL,  'M' },
268 		{ "memory",       required_argument, NULL,  'M' }, // Old alias
269 		{ "no-adjust",    no_argument,       NULL,  OPT_NO_ADJUST },
270 		{ "threads",      required_argument, NULL,  'T' },
271 		{ "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT },
272 
273 		{ "extreme",      no_argument,       NULL,  'e' },
274 		{ "fast",         no_argument,       NULL,  '0' },
275 		{ "best",         no_argument,       NULL,  '9' },
276 
277 		// Filters
278 		{ "filters",      optional_argument, NULL,  OPT_FILTERS},
279 		{ "filters1",     optional_argument, NULL,  OPT_FILTERS1},
280 		{ "filters2",     optional_argument, NULL,  OPT_FILTERS2},
281 		{ "filters3",     optional_argument, NULL,  OPT_FILTERS3},
282 		{ "filters4",     optional_argument, NULL,  OPT_FILTERS4},
283 		{ "filters5",     optional_argument, NULL,  OPT_FILTERS5},
284 		{ "filters6",     optional_argument, NULL,  OPT_FILTERS6},
285 		{ "filters7",     optional_argument, NULL,  OPT_FILTERS7},
286 		{ "filters8",     optional_argument, NULL,  OPT_FILTERS8},
287 		{ "filters9",     optional_argument, NULL,  OPT_FILTERS9},
288 		{ "filters-help", optional_argument, NULL,  OPT_FILTERS_HELP},
289 
290 		{ "lzma1",        optional_argument, NULL,  OPT_LZMA1 },
291 		{ "lzma2",        optional_argument, NULL,  OPT_LZMA2 },
292 		{ "x86",          optional_argument, NULL,  OPT_X86 },
293 		{ "powerpc",      optional_argument, NULL,  OPT_POWERPC },
294 		{ "ia64",         optional_argument, NULL,  OPT_IA64 },
295 		{ "arm",          optional_argument, NULL,  OPT_ARM },
296 		{ "armthumb",     optional_argument, NULL,  OPT_ARMTHUMB },
297 		{ "arm64",        optional_argument, NULL,  OPT_ARM64 },
298 		{ "sparc",        optional_argument, NULL,  OPT_SPARC },
299 		{ "riscv",        optional_argument, NULL,  OPT_RISCV },
300 		{ "delta",        optional_argument, NULL,  OPT_DELTA },
301 
302 		// Other options
303 		{ "quiet",        no_argument,       NULL,  'q' },
304 		{ "verbose",      no_argument,       NULL,  'v' },
305 		{ "no-warn",      no_argument,       NULL,  'Q' },
306 		{ "robot",        no_argument,       NULL,  OPT_ROBOT },
307 		{ "info-memory",  no_argument,       NULL,  OPT_INFO_MEMORY },
308 		{ "help",         no_argument,       NULL,  'h' },
309 		{ "long-help",    no_argument,       NULL,  'H' },
310 		{ "version",      no_argument,       NULL,  'V' },
311 
312 		{ NULL,           0,                 NULL,   0 }
313 	};
314 
315 	int c;
316 
317 	while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
318 			!= -1) {
319 		switch (c) {
320 		// Compression preset (also for decompression if --format=raw)
321 		case '0': case '1': case '2': case '3': case '4':
322 		case '5': case '6': case '7': case '8': case '9':
323 			coder_set_preset((uint32_t)(c - '0'));
324 			break;
325 
326 		// --memlimit-compress
327 		case OPT_MEM_COMPRESS:
328 			parse_memlimit("memlimit-compress",
329 					"memlimit-compress%", optarg,
330 					true, false, false);
331 			break;
332 
333 		// --memlimit-decompress
334 		case OPT_MEM_DECOMPRESS:
335 			parse_memlimit("memlimit-decompress",
336 					"memlimit-decompress%", optarg,
337 					false, true, false);
338 			break;
339 
340 		// --memlimit-mt-decompress
341 		case OPT_MEM_MT_DECOMPRESS:
342 			parse_memlimit("memlimit-mt-decompress",
343 					"memlimit-mt-decompress%", optarg,
344 					false, false, true);
345 			break;
346 
347 		// --memlimit
348 		case 'M':
349 			parse_memlimit("memlimit", "memlimit%", optarg,
350 					true, true, true);
351 			break;
352 
353 		// --suffix
354 		case 'S':
355 			suffix_set(optarg);
356 			break;
357 
358 		case 'T': {
359 			// Since xz 5.4.0: Ignore leading '+' first.
360 			const char *s = optarg;
361 			if (optarg[0] == '+')
362 				++s;
363 
364 			// The max is from src/liblzma/common/common.h.
365 			uint32_t t = str_to_uint64("threads", s, 0, 16384);
366 
367 			// If leading '+' was used then use multi-threaded
368 			// mode even if exactly one thread was specified.
369 			if (t == 1 && optarg[0] == '+')
370 				t = UINT32_MAX;
371 
372 			hardware_threads_set(t);
373 			break;
374 		}
375 
376 		// --version
377 		case 'V':
378 			// This doesn't return.
379 			message_version();
380 
381 		// --stdout
382 		case 'c':
383 			opt_stdout = true;
384 			break;
385 
386 		// --decompress
387 		case 'd':
388 			opt_mode = MODE_DECOMPRESS;
389 			break;
390 
391 		// --extreme
392 		case 'e':
393 			coder_set_extreme();
394 			break;
395 
396 		// --force
397 		case 'f':
398 			opt_force = true;
399 			break;
400 
401 		// --info-memory
402 		case OPT_INFO_MEMORY:
403 			// This doesn't return.
404 			hardware_memlimit_show();
405 
406 		// --help
407 		case 'h':
408 			// This doesn't return.
409 			message_help(false);
410 
411 		// --long-help
412 		case 'H':
413 			// This doesn't return.
414 			message_help(true);
415 
416 		// --list
417 		case 'l':
418 			opt_mode = MODE_LIST;
419 			break;
420 
421 		// --keep
422 		case 'k':
423 			opt_keep_original = true;
424 			break;
425 
426 		// --quiet
427 		case 'q':
428 			message_verbosity_decrease();
429 			break;
430 
431 		case 'Q':
432 			set_exit_no_warn();
433 			break;
434 
435 		case 't':
436 			opt_mode = MODE_TEST;
437 			break;
438 
439 		// --verbose
440 		case 'v':
441 			message_verbosity_increase();
442 			break;
443 
444 		// --robot
445 		case OPT_ROBOT:
446 			opt_robot = true;
447 
448 			// This is to make sure that floating point numbers
449 			// always have a dot as decimal separator.
450 			setlocale(LC_NUMERIC, "C");
451 			break;
452 
453 		case 'z':
454 			opt_mode = MODE_COMPRESS;
455 			break;
456 
457 		// --filters
458 		case OPT_FILTERS:
459 			coder_add_filters_from_str(optarg);
460 			break;
461 
462 		// --filters1...--filters9
463 		case OPT_FILTERS1:
464 		case OPT_FILTERS2:
465 		case OPT_FILTERS3:
466 		case OPT_FILTERS4:
467 		case OPT_FILTERS5:
468 		case OPT_FILTERS6:
469 		case OPT_FILTERS7:
470 		case OPT_FILTERS8:
471 		case OPT_FILTERS9:
472 			coder_add_block_filters(optarg,
473 					(size_t)(c - OPT_FILTERS));
474 			break;
475 
476 		// --filters-help
477 		case OPT_FILTERS_HELP:
478 			// This doesn't return.
479 			message_filters_help();
480 			break;
481 
482 		case OPT_X86:
483 			coder_add_filter(LZMA_FILTER_X86,
484 					options_bcj(optarg));
485 			break;
486 
487 		case OPT_POWERPC:
488 			coder_add_filter(LZMA_FILTER_POWERPC,
489 					options_bcj(optarg));
490 			break;
491 
492 		case OPT_IA64:
493 			coder_add_filter(LZMA_FILTER_IA64,
494 					options_bcj(optarg));
495 			break;
496 
497 		case OPT_ARM:
498 			coder_add_filter(LZMA_FILTER_ARM,
499 					options_bcj(optarg));
500 			break;
501 
502 		case OPT_ARMTHUMB:
503 			coder_add_filter(LZMA_FILTER_ARMTHUMB,
504 					options_bcj(optarg));
505 			break;
506 
507 		case OPT_ARM64:
508 			coder_add_filter(LZMA_FILTER_ARM64,
509 					options_bcj(optarg));
510 			break;
511 
512 		case OPT_SPARC:
513 			coder_add_filter(LZMA_FILTER_SPARC,
514 					options_bcj(optarg));
515 			break;
516 
517 		case OPT_RISCV:
518 			coder_add_filter(LZMA_FILTER_RISCV,
519 					options_bcj(optarg));
520 			break;
521 
522 		case OPT_DELTA:
523 			coder_add_filter(LZMA_FILTER_DELTA,
524 					options_delta(optarg));
525 			break;
526 
527 		case OPT_LZMA1:
528 			coder_add_filter(LZMA_FILTER_LZMA1,
529 					options_lzma(optarg));
530 			break;
531 
532 		case OPT_LZMA2:
533 			coder_add_filter(LZMA_FILTER_LZMA2,
534 					options_lzma(optarg));
535 			break;
536 
537 		// Other
538 
539 		// --format
540 		case 'F': {
541 			// Just in case, support both "lzma" and "alone" since
542 			// the latter was used for forward compatibility in
543 			// LZMA Utils 4.32.x.
544 			static const struct {
545 				char str[8];
546 				enum format_type format;
547 			} types[] = {
548 				{ "auto",   FORMAT_AUTO },
549 				{ "xz",     FORMAT_XZ },
550 				{ "lzma",   FORMAT_LZMA },
551 				{ "alone",  FORMAT_LZMA },
552 #ifdef HAVE_LZIP_DECODER
553 				{ "lzip",   FORMAT_LZIP },
554 #endif
555 				{ "raw",    FORMAT_RAW },
556 			};
557 
558 			size_t i = 0;
559 			while (strcmp(types[i].str, optarg) != 0)
560 				if (++i == ARRAY_SIZE(types))
561 					message_fatal(_("%s: Unknown file "
562 							"format type"),
563 							optarg);
564 
565 			opt_format = types[i].format;
566 			break;
567 		}
568 
569 		// --check
570 		case 'C': {
571 			static const struct {
572 				char str[8];
573 				lzma_check check;
574 			} types[] = {
575 				{ "none",   LZMA_CHECK_NONE },
576 				{ "crc32",  LZMA_CHECK_CRC32 },
577 				{ "crc64",  LZMA_CHECK_CRC64 },
578 				{ "sha256", LZMA_CHECK_SHA256 },
579 			};
580 
581 			size_t i = 0;
582 			while (strcmp(types[i].str, optarg) != 0) {
583 				if (++i == ARRAY_SIZE(types))
584 					message_fatal(_("%s: Unsupported "
585 							"integrity "
586 							"check type"), optarg);
587 			}
588 
589 			// Use a separate check in case we are using different
590 			// liblzma than what was used to compile us.
591 			if (!lzma_check_is_supported(types[i].check))
592 				message_fatal(_("%s: Unsupported integrity "
593 						"check type"), optarg);
594 
595 			coder_set_check(types[i].check);
596 			break;
597 		}
598 
599 		case OPT_IGNORE_CHECK:
600 			opt_ignore_check = true;
601 			break;
602 
603 		case OPT_BLOCK_SIZE:
604 			opt_block_size = str_to_uint64("block-size", optarg,
605 					0, LZMA_VLI_MAX);
606 			break;
607 
608 		case OPT_BLOCK_LIST: {
609 			parse_block_list(optarg);
610 			break;
611 		}
612 
613 		case OPT_SINGLE_STREAM:
614 			opt_single_stream = true;
615 			break;
616 
617 		case OPT_NO_SPARSE:
618 			io_no_sparse();
619 			break;
620 
621 		case OPT_FILES:
622 			args->files_delim = '\n';
623 
624 		// Fall through
625 
626 		case OPT_FILES0:
627 			if (args->files_name != NULL)
628 				message_fatal(_("Only one file can be "
629 						"specified with '--files' "
630 						"or '--files0'."));
631 
632 			if (optarg == NULL) {
633 				args->files_name = stdin_filename;
634 				args->files_file = stdin;
635 			} else {
636 				args->files_name = optarg;
637 				args->files_file = fopen(optarg,
638 						c == OPT_FILES ? "r" : "rb");
639 				if (args->files_file == NULL)
640 					// TRANSLATORS: This is a translatable
641 					// string because French needs a space
642 					// before the colon ("%s : %s").
643 					message_fatal(_("%s: %s"), optarg,
644 							strerror(errno));
645 			}
646 
647 			break;
648 
649 		case OPT_NO_ADJUST:
650 			opt_auto_adjust = false;
651 			break;
652 
653 		case OPT_FLUSH_TIMEOUT:
654 			opt_flush_timeout = str_to_uint64("flush-timeout",
655 					optarg, 0, UINT64_MAX);
656 			break;
657 
658 		default:
659 			message_try_help();
660 			tuklib_exit(E_ERROR, E_ERROR, false);
661 		}
662 	}
663 
664 	return;
665 }
666 
667 
668 static void
669 parse_environment(args_info *args, char *argv0, const char *varname)
670 {
671 	char *env = getenv(varname);
672 	if (env == NULL)
673 		return;
674 
675 	// We modify the string, so make a copy of it.
676 	env = xstrdup(env);
677 
678 	// Calculate the number of arguments in env. argc stats at one
679 	// to include space for the program name.
680 	int argc = 1;
681 	bool prev_was_space = true;
682 	for (size_t i = 0; env[i] != '\0'; ++i) {
683 		// NOTE: Cast to unsigned char is needed so that correct
684 		// value gets passed to isspace(), which expects
685 		// unsigned char cast to int. Casting to int is done
686 		// automatically due to integer promotion, but we need to
687 		// force char to unsigned char manually. Otherwise 8-bit
688 		// characters would get promoted to wrong value if
689 		// char is signed.
690 		if (isspace((unsigned char)env[i])) {
691 			prev_was_space = true;
692 		} else if (prev_was_space) {
693 			prev_was_space = false;
694 
695 			// Keep argc small enough to fit into a signed int
696 			// and to keep it usable for memory allocation.
697 			if (++argc == my_min(
698 					INT_MAX, SIZE_MAX / sizeof(char *)))
699 				message_fatal(_("The environment variable "
700 						"%s contains too many "
701 						"arguments"), varname);
702 		}
703 	}
704 
705 	// Allocate memory to hold pointers to the arguments. Add one to get
706 	// space for the terminating NULL (if some systems happen to need it).
707 	char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
708 	argv[0] = argv0;
709 	argv[argc] = NULL;
710 
711 	// Go through the string again. Split the arguments using '\0'
712 	// characters and add pointers to the resulting strings to argv.
713 	argc = 1;
714 	prev_was_space = true;
715 	for (size_t i = 0; env[i] != '\0'; ++i) {
716 		if (isspace((unsigned char)env[i])) {
717 			prev_was_space = true;
718 			env[i] = '\0';
719 		} else if (prev_was_space) {
720 			prev_was_space = false;
721 			argv[argc++] = env + i;
722 		}
723 	}
724 
725 	// Parse the argument list we got from the environment. All non-option
726 	// arguments i.e. filenames are ignored.
727 	parse_real(args, argc, argv);
728 
729 	// Reset the state of the getopt_long() so that we can parse the
730 	// command line options too. There are two incompatible ways to
731 	// do it.
732 #ifdef HAVE_OPTRESET
733 	// BSD
734 	optind = 1;
735 	optreset = 1;
736 #else
737 	// GNU, Solaris
738 	optind = 0;
739 #endif
740 
741 	// We don't need the argument list from environment anymore.
742 	free(argv);
743 	free(env);
744 
745 	return;
746 }
747 
748 
749 extern void
750 args_parse(args_info *args, int argc, char **argv)
751 {
752 	// Initialize those parts of *args that we need later.
753 	args->files_name = NULL;
754 	args->files_file = NULL;
755 	args->files_delim = '\0';
756 
757 	// Check how we were called.
758 	{
759 		// Remove the leading path name, if any.
760 		const char *name = strrchr(argv[0], '/');
761 		if (name == NULL)
762 			name = argv[0];
763 		else
764 			++name;
765 
766 		// NOTE: It's possible that name[0] is now '\0' if argv[0]
767 		// is weird, but it doesn't matter here.
768 
769 		// Look for full command names instead of substrings like
770 		// "un", "cat", and "lz" to reduce possibility of false
771 		// positives when the programs have been renamed.
772 		if (strstr(name, "xzcat") != NULL) {
773 			opt_mode = MODE_DECOMPRESS;
774 			opt_stdout = true;
775 		} else if (strstr(name, "unxz") != NULL) {
776 			opt_mode = MODE_DECOMPRESS;
777 		} else if (strstr(name, "lzcat") != NULL) {
778 			opt_format = FORMAT_LZMA;
779 			opt_mode = MODE_DECOMPRESS;
780 			opt_stdout = true;
781 		} else if (strstr(name, "unlzma") != NULL) {
782 			opt_format = FORMAT_LZMA;
783 			opt_mode = MODE_DECOMPRESS;
784 		} else if (strstr(name, "lzma") != NULL) {
785 			opt_format = FORMAT_LZMA;
786 		}
787 	}
788 
789 	// First the flags from the environment
790 	parse_environment(args, argv[0], "XZ_DEFAULTS");
791 	parse_environment(args, argv[0], "XZ_OPT");
792 
793 	// Then from the command line
794 	parse_real(args, argc, argv);
795 
796 	// If encoder or decoder support was omitted at build time,
797 	// show an error now so that the rest of the code can rely on
798 	// that whatever is in opt_mode is also supported.
799 #ifndef HAVE_ENCODERS
800 	if (opt_mode == MODE_COMPRESS)
801 		message_fatal(_("Compression support was disabled "
802 				"at build time"));
803 #endif
804 #ifndef HAVE_DECODERS
805 	// Even MODE_LIST cannot work without decoder support so MODE_COMPRESS
806 	// is the only valid choice.
807 	if (opt_mode != MODE_COMPRESS)
808 		message_fatal(_("Decompression support was disabled "
809 				"at build time"));
810 #endif
811 
812 #ifdef HAVE_LZIP_DECODER
813 	if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_LZIP)
814 		message_fatal(_("Compression of lzip files (.lz) "
815 				"is not supported"));
816 #endif
817 
818 	// Never remove the source file when the destination is not on disk.
819 	// In test mode the data is written nowhere, but setting opt_stdout
820 	// will make the rest of the code behave well.
821 	if (opt_stdout || opt_mode == MODE_TEST) {
822 		opt_keep_original = true;
823 		opt_stdout = true;
824 	}
825 
826 	// When compressing, if no --format flag was used, or it
827 	// was --format=auto, we compress to the .xz format.
828 	if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
829 		opt_format = FORMAT_XZ;
830 
831 	// Set opt_block_list to NULL if we are not compressing to the .xz
832 	// format. This option cannot be used outside of this case, and
833 	// simplifies the implementation later.
834 	if ((opt_mode != MODE_COMPRESS || opt_format != FORMAT_XZ)
835 			&& opt_block_list != NULL) {
836 		message(V_WARNING, _("--block-list is ignored unless "
837 				"compressing to the .xz format"));
838 		free(opt_block_list);
839 		opt_block_list = NULL;
840 	}
841 
842 	// If raw format is used and a custom suffix is not provided,
843 	// then only stdout mode can be used when compressing or
844 	// decompressing.
845 	if (opt_format == FORMAT_RAW && !suffix_is_set() && !opt_stdout
846 			&& (opt_mode == MODE_COMPRESS
847 				|| opt_mode == MODE_DECOMPRESS)) {
848 		if (args->files_name != NULL)
849 			message_fatal(_("With --format=raw, "
850 					"--suffix=.SUF is required "
851 					"unless writing to stdout"));
852 
853 		// If all of the filenames provided are "-" (more than one
854 		// "-" could be specified) or no filenames are provided,
855 		// then we are only going to be writing to standard out.
856 		for (int i = optind; i < argc; i++) {
857 			if (strcmp(argv[i], "-") != 0)
858 				message_fatal(_("With --format=raw, "
859 						"--suffix=.SUF is required "
860 						"unless writing to stdout"));
861 		}
862 	}
863 
864 	// Compression settings need to be validated (options themselves and
865 	// their memory usage) when compressing to any file format. It has to
866 	// be done also when uncompressing raw data, since for raw decoding
867 	// the options given on the command line are used to know what kind
868 	// of raw data we are supposed to decode.
869 	if (opt_mode == MODE_COMPRESS || (opt_format == FORMAT_RAW
870 			&& opt_mode != MODE_LIST))
871 		coder_set_compression_settings();
872 
873 	// If no filenames are given, use stdin.
874 	if (argv[optind] == NULL && args->files_name == NULL) {
875 		// We don't modify or free() the "-" constant. The caller
876 		// modifies this so don't make the struct itself const.
877 		static char *names_stdin[2] = { (char *)"-", NULL };
878 		args->arg_names = names_stdin;
879 		args->arg_count = 1;
880 	} else {
881 		// We got at least one filename from the command line, or
882 		// --files or --files0 was specified.
883 		args->arg_names = argv + optind;
884 		args->arg_count = (unsigned int)(argc - optind);
885 	}
886 
887 	return;
888 }
889 
890 
891 #ifndef NDEBUG
892 extern void
893 args_free(void)
894 {
895 	free(opt_block_list);
896 	return;
897 }
898 #endif
899