xref: /minix3/external/public-domain/xz/dist/src/xz/args.c (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       args.c
4 /// \brief      Argument parsing
5 ///
6 /// \note       Filter-specific options parsing is in options.c.
7 //
8 //  Author:     Lasse Collin
9 //
10 //  This file has been put into the public domain.
11 //  You can do whatever you want with this file.
12 //
13 ///////////////////////////////////////////////////////////////////////////////
14 
15 #include "private.h"
16 
17 #include "getopt.h"
18 #include <ctype.h>
19 
20 
21 bool opt_stdout = false;
22 bool opt_force = false;
23 bool opt_keep_original = false;
24 bool opt_robot = false;
25 bool opt_ignore_check = false;
26 
27 // We don't modify or free() this, but we need to assign it in some
28 // non-const pointers.
29 const char stdin_filename[] = "(stdin)";
30 
31 
32 /// Parse and set the memory usage limit for compression and/or decompression.
33 static void
parse_memlimit(const char * name,const char * name_percentage,char * str,bool set_compress,bool set_decompress)34 parse_memlimit(const char *name, const char *name_percentage, char *str,
35 		bool set_compress, bool set_decompress)
36 {
37 	bool is_percentage = false;
38 	uint64_t value;
39 
40 	const size_t len = strlen(str);
41 	if (len > 0 && str[len - 1] == '%') {
42 		str[len - 1] = '\0';
43 		is_percentage = true;
44 		value = str_to_uint64(name_percentage, str, 1, 100);
45 	} else {
46 		// On 32-bit systems, SIZE_MAX would make more sense than
47 		// UINT64_MAX. But use UINT64_MAX still so that scripts
48 		// that assume > 4 GiB values don't break.
49 		value = str_to_uint64(name, str, 0, UINT64_MAX);
50 	}
51 
52 	hardware_memlimit_set(
53 			value, set_compress, set_decompress, is_percentage);
54 	return;
55 }
56 
57 
58 static void
parse_block_list(char * str)59 parse_block_list(char *str)
60 {
61 	// It must be non-empty and not begin with a comma.
62 	if (str[0] == '\0' || str[0] == ',')
63 		message_fatal(_("%s: Invalid argument to --block-list"), str);
64 
65 	// Count the number of comma-separated strings.
66 	size_t count = 1;
67 	for (size_t i = 0; str[i] != '\0'; ++i)
68 		if (str[i] == ',')
69 			++count;
70 
71 	// Prevent an unlikely integer overflow.
72 	if (count > SIZE_MAX / sizeof(uint64_t) - 1)
73 		message_fatal(_("%s: Too many arguments to --block-list"),
74 				str);
75 
76 	// Allocate memory to hold all the sizes specified.
77 	// If --block-list was specified already, its value is forgotten.
78 	free(opt_block_list);
79 	opt_block_list = xmalloc((count + 1) * sizeof(uint64_t));
80 
81 	for (size_t i = 0; i < count; ++i) {
82 		// Locate the next comma and replace it with \0.
83 		char *p = strchr(str, ',');
84 		if (p != NULL)
85 			*p = '\0';
86 
87 		if (str[0] == '\0') {
88 			// There is no string, that is, a comma follows
89 			// another comma. Use the previous value.
90 			//
91 			// NOTE: We checked earler that the first char
92 			// of the whole list cannot be a comma.
93 			assert(i > 0);
94 			opt_block_list[i] = opt_block_list[i - 1];
95 		} else {
96 			opt_block_list[i] = str_to_uint64("block-list", str,
97 					0, UINT64_MAX);
98 
99 			// Zero indicates no more new Blocks.
100 			if (opt_block_list[i] == 0) {
101 				if (i + 1 != count)
102 					message_fatal(_("0 can only be used "
103 							"as the last element "
104 							"in --block-list"));
105 
106 				opt_block_list[i] = UINT64_MAX;
107 			}
108 		}
109 
110 		if (p == NULL)
111 			break;
112 		str = p + 1;
113 	}
114 
115 	// Terminate the array.
116 	opt_block_list[count] = 0;
117 	return;
118 }
119 
120 
121 static void
parse_real(args_info * args,int argc,char ** argv)122 parse_real(args_info *args, int argc, char **argv)
123 {
124 	enum {
125 		OPT_X86 = INT_MIN,
126 		OPT_POWERPC,
127 		OPT_IA64,
128 		OPT_ARM,
129 		OPT_ARMTHUMB,
130 		OPT_SPARC,
131 		OPT_DELTA,
132 		OPT_LZMA1,
133 		OPT_LZMA2,
134 
135 		OPT_SINGLE_STREAM,
136 		OPT_NO_SPARSE,
137 		OPT_FILES,
138 		OPT_FILES0,
139 		OPT_BLOCK_SIZE,
140 		OPT_BLOCK_LIST,
141 		OPT_MEM_COMPRESS,
142 		OPT_MEM_DECOMPRESS,
143 		OPT_NO_ADJUST,
144 		OPT_INFO_MEMORY,
145 		OPT_ROBOT,
146 		OPT_FLUSH_TIMEOUT,
147 		OPT_IGNORE_CHECK,
148 	};
149 
150 	static const char short_opts[]
151 			= "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
152 
153 	static const struct option long_opts[] = {
154 		// Operation mode
155 		{ "compress",     no_argument,       NULL,  'z' },
156 		{ "decompress",   no_argument,       NULL,  'd' },
157 		{ "uncompress",   no_argument,       NULL,  'd' },
158 		{ "test",         no_argument,       NULL,  't' },
159 		{ "list",         no_argument,       NULL,  'l' },
160 
161 		// Operation modifiers
162 		{ "keep",         no_argument,       NULL,  'k' },
163 		{ "force",        no_argument,       NULL,  'f' },
164 		{ "stdout",       no_argument,       NULL,  'c' },
165 		{ "to-stdout",    no_argument,       NULL,  'c' },
166 		{ "single-stream", no_argument,      NULL,  OPT_SINGLE_STREAM },
167 		{ "no-sparse",    no_argument,       NULL,  OPT_NO_SPARSE },
168 		{ "suffix",       required_argument, NULL,  'S' },
169 		// { "recursive",      no_argument,       NULL,  'r' }, // TODO
170 		{ "files",        optional_argument, NULL,  OPT_FILES },
171 		{ "files0",       optional_argument, NULL,  OPT_FILES0 },
172 
173 		// Basic compression settings
174 		{ "format",       required_argument, NULL,  'F' },
175 		{ "check",        required_argument, NULL,  'C' },
176 		{ "ignore-check", no_argument,       NULL,  OPT_IGNORE_CHECK },
177 		{ "block-size",   required_argument, NULL,  OPT_BLOCK_SIZE },
178 		{ "block-list",  required_argument, NULL,  OPT_BLOCK_LIST },
179 		{ "memlimit-compress",   required_argument, NULL, OPT_MEM_COMPRESS },
180 		{ "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
181 		{ "memlimit",     required_argument, NULL,  'M' },
182 		{ "memory",       required_argument, NULL,  'M' }, // Old alias
183 		{ "no-adjust",    no_argument,       NULL,  OPT_NO_ADJUST },
184 		{ "threads",      required_argument, NULL,  'T' },
185 		{ "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT },
186 
187 		{ "extreme",      no_argument,       NULL,  'e' },
188 		{ "fast",         no_argument,       NULL,  '0' },
189 		{ "best",         no_argument,       NULL,  '9' },
190 
191 		// Filters
192 		{ "lzma1",        optional_argument, NULL,  OPT_LZMA1 },
193 		{ "lzma2",        optional_argument, NULL,  OPT_LZMA2 },
194 		{ "x86",          optional_argument, NULL,  OPT_X86 },
195 		{ "powerpc",      optional_argument, NULL,  OPT_POWERPC },
196 		{ "ia64",         optional_argument, NULL,  OPT_IA64 },
197 		{ "arm",          optional_argument, NULL,  OPT_ARM },
198 		{ "armthumb",     optional_argument, NULL,  OPT_ARMTHUMB },
199 		{ "sparc",        optional_argument, NULL,  OPT_SPARC },
200 		{ "delta",        optional_argument, NULL,  OPT_DELTA },
201 
202 		// Other options
203 		{ "quiet",        no_argument,       NULL,  'q' },
204 		{ "verbose",      no_argument,       NULL,  'v' },
205 		{ "no-warn",      no_argument,       NULL,  'Q' },
206 		{ "robot",        no_argument,       NULL,  OPT_ROBOT },
207 		{ "info-memory",  no_argument,       NULL,  OPT_INFO_MEMORY },
208 		{ "help",         no_argument,       NULL,  'h' },
209 		{ "long-help",    no_argument,       NULL,  'H' },
210 		{ "version",      no_argument,       NULL,  'V' },
211 
212 		{ NULL,           0,                 NULL,   0 }
213 	};
214 
215 	int c;
216 
217 	while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
218 			!= -1) {
219 		switch (c) {
220 		// Compression preset (also for decompression if --format=raw)
221 		case '0': case '1': case '2': case '3': case '4':
222 		case '5': case '6': case '7': case '8': case '9':
223 			coder_set_preset(c - '0');
224 			break;
225 
226 		// --memlimit-compress
227 		case OPT_MEM_COMPRESS:
228 			parse_memlimit("memlimit-compress",
229 					"memlimit-compress%", optarg,
230 					true, false);
231 			break;
232 
233 		// --memlimit-decompress
234 		case OPT_MEM_DECOMPRESS:
235 			parse_memlimit("memlimit-decompress",
236 					"memlimit-decompress%", optarg,
237 					false, true);
238 			break;
239 
240 		// --memlimit
241 		case 'M':
242 			parse_memlimit("memlimit", "memlimit%", optarg,
243 					true, true);
244 			break;
245 
246 		// --suffix
247 		case 'S':
248 			suffix_set(optarg);
249 			break;
250 
251 		case 'T':
252 			// The max is from src/liblzma/common/common.h.
253 			hardware_threads_set(str_to_uint64("threads",
254 					optarg, 0, 16384));
255 			break;
256 
257 		// --version
258 		case 'V':
259 			// This doesn't return.
260 			message_version();
261 
262 		// --stdout
263 		case 'c':
264 			opt_stdout = true;
265 			break;
266 
267 		// --decompress
268 		case 'd':
269 			opt_mode = MODE_DECOMPRESS;
270 			break;
271 
272 		// --extreme
273 		case 'e':
274 			coder_set_extreme();
275 			break;
276 
277 		// --force
278 		case 'f':
279 			opt_force = true;
280 			break;
281 
282 		// --info-memory
283 		case OPT_INFO_MEMORY:
284 			// This doesn't return.
285 			hardware_memlimit_show();
286 
287 		// --help
288 		case 'h':
289 			// This doesn't return.
290 			message_help(false);
291 
292 		// --long-help
293 		case 'H':
294 			// This doesn't return.
295 			message_help(true);
296 
297 		// --list
298 		case 'l':
299 			opt_mode = MODE_LIST;
300 			break;
301 
302 		// --keep
303 		case 'k':
304 			opt_keep_original = true;
305 			break;
306 
307 		// --quiet
308 		case 'q':
309 			message_verbosity_decrease();
310 			break;
311 
312 		case 'Q':
313 			set_exit_no_warn();
314 			break;
315 
316 		case 't':
317 			opt_mode = MODE_TEST;
318 			break;
319 
320 		// --verbose
321 		case 'v':
322 			message_verbosity_increase();
323 			break;
324 
325 		// --robot
326 		case OPT_ROBOT:
327 			opt_robot = true;
328 
329 			// This is to make sure that floating point numbers
330 			// always have a dot as decimal separator.
331 			setlocale(LC_NUMERIC, "C");
332 			break;
333 
334 		case 'z':
335 			opt_mode = MODE_COMPRESS;
336 			break;
337 
338 		// Filter setup
339 
340 		case OPT_X86:
341 			coder_add_filter(LZMA_FILTER_X86,
342 					options_bcj(optarg));
343 			break;
344 
345 		case OPT_POWERPC:
346 			coder_add_filter(LZMA_FILTER_POWERPC,
347 					options_bcj(optarg));
348 			break;
349 
350 		case OPT_IA64:
351 			coder_add_filter(LZMA_FILTER_IA64,
352 					options_bcj(optarg));
353 			break;
354 
355 		case OPT_ARM:
356 			coder_add_filter(LZMA_FILTER_ARM,
357 					options_bcj(optarg));
358 			break;
359 
360 		case OPT_ARMTHUMB:
361 			coder_add_filter(LZMA_FILTER_ARMTHUMB,
362 					options_bcj(optarg));
363 			break;
364 
365 		case OPT_SPARC:
366 			coder_add_filter(LZMA_FILTER_SPARC,
367 					options_bcj(optarg));
368 			break;
369 
370 		case OPT_DELTA:
371 			coder_add_filter(LZMA_FILTER_DELTA,
372 					options_delta(optarg));
373 			break;
374 
375 		case OPT_LZMA1:
376 			coder_add_filter(LZMA_FILTER_LZMA1,
377 					options_lzma(optarg));
378 			break;
379 
380 		case OPT_LZMA2:
381 			coder_add_filter(LZMA_FILTER_LZMA2,
382 					options_lzma(optarg));
383 			break;
384 
385 		// Other
386 
387 		// --format
388 		case 'F': {
389 			// Just in case, support both "lzma" and "alone" since
390 			// the latter was used for forward compatibility in
391 			// LZMA Utils 4.32.x.
392 			static const struct {
393 				char str[8];
394 				enum format_type format;
395 			} types[] = {
396 				{ "auto",   FORMAT_AUTO },
397 				{ "xz",     FORMAT_XZ },
398 				{ "lzma",   FORMAT_LZMA },
399 				{ "alone",  FORMAT_LZMA },
400 				// { "gzip",   FORMAT_GZIP },
401 				// { "gz",     FORMAT_GZIP },
402 				{ "raw",    FORMAT_RAW },
403 			};
404 
405 			size_t i = 0;
406 			while (strcmp(types[i].str, optarg) != 0)
407 				if (++i == ARRAY_SIZE(types))
408 					message_fatal(_("%s: Unknown file "
409 							"format type"),
410 							optarg);
411 
412 			opt_format = types[i].format;
413 			break;
414 		}
415 
416 		// --check
417 		case 'C': {
418 			static const struct {
419 				char str[8];
420 				lzma_check check;
421 			} types[] = {
422 				{ "none",   LZMA_CHECK_NONE },
423 				{ "crc32",  LZMA_CHECK_CRC32 },
424 				{ "crc64",  LZMA_CHECK_CRC64 },
425 				{ "sha256", LZMA_CHECK_SHA256 },
426 			};
427 
428 			size_t i = 0;
429 			while (strcmp(types[i].str, optarg) != 0) {
430 				if (++i == ARRAY_SIZE(types))
431 					message_fatal(_("%s: Unsupported "
432 							"integrity "
433 							"check type"), optarg);
434 			}
435 
436 			// Use a separate check in case we are using different
437 			// liblzma than what was used to compile us.
438 			if (!lzma_check_is_supported(types[i].check))
439 				message_fatal(_("%s: Unsupported integrity "
440 						"check type"), optarg);
441 
442 			coder_set_check(types[i].check);
443 			break;
444 		}
445 
446 		case OPT_IGNORE_CHECK:
447 			opt_ignore_check = true;
448 			break;
449 
450 		case OPT_BLOCK_SIZE:
451 			opt_block_size = str_to_uint64("block-size", optarg,
452 					0, LZMA_VLI_MAX);
453 			break;
454 
455 		case OPT_BLOCK_LIST: {
456 			parse_block_list(optarg);
457 			break;
458 		}
459 
460 		case OPT_SINGLE_STREAM:
461 			opt_single_stream = true;
462 			break;
463 
464 		case OPT_NO_SPARSE:
465 			io_no_sparse();
466 			break;
467 
468 		case OPT_FILES:
469 			args->files_delim = '\n';
470 
471 		// Fall through
472 
473 		case OPT_FILES0:
474 			if (args->files_name != NULL)
475 				message_fatal(_("Only one file can be "
476 						"specified with `--files' "
477 						"or `--files0'."));
478 
479 			if (optarg == NULL) {
480 				args->files_name = (char *)stdin_filename;
481 				args->files_file = stdin;
482 			} else {
483 				args->files_name = optarg;
484 				args->files_file = fopen(optarg,
485 						c == OPT_FILES ? "r" : "rb");
486 				if (args->files_file == NULL)
487 					message_fatal("%s: %s", optarg,
488 							strerror(errno));
489 			}
490 
491 			break;
492 
493 		case OPT_NO_ADJUST:
494 			opt_auto_adjust = false;
495 			break;
496 
497 		case OPT_FLUSH_TIMEOUT:
498 			opt_flush_timeout = str_to_uint64("flush-timeout",
499 					optarg, 0, UINT64_MAX);
500 			break;
501 
502 		default:
503 			message_try_help();
504 			tuklib_exit(E_ERROR, E_ERROR, false);
505 		}
506 	}
507 
508 	return;
509 }
510 
511 
512 static void
parse_environment(args_info * args,char * argv0,const char * varname)513 parse_environment(args_info *args, char *argv0, const char *varname)
514 {
515 	char *env = getenv(varname);
516 	if (env == NULL)
517 		return;
518 
519 	// We modify the string, so make a copy of it.
520 	env = xstrdup(env);
521 
522 	// Calculate the number of arguments in env. argc stats at one
523 	// to include space for the program name.
524 	int argc = 1;
525 	bool prev_was_space = true;
526 	for (size_t i = 0; env[i] != '\0'; ++i) {
527 		// NOTE: Cast to unsigned char is needed so that correct
528 		// value gets passed to isspace(), which expects
529 		// unsigned char cast to int. Casting to int is done
530 		// automatically due to integer promotion, but we need to
531 		// force char to unsigned char manually. Otherwise 8-bit
532 		// characters would get promoted to wrong value if
533 		// char is signed.
534 		if (isspace((unsigned char)env[i])) {
535 			prev_was_space = true;
536 		} else if (prev_was_space) {
537 			prev_was_space = false;
538 
539 			// Keep argc small enough to fit into a signed int
540 			// and to keep it usable for memory allocation.
541 			if (++argc == my_min(
542 					INT_MAX, SIZE_MAX / sizeof(char *)))
543 				message_fatal(_("The environment variable "
544 						"%s contains too many "
545 						"arguments"), varname);
546 		}
547 	}
548 
549 	// Allocate memory to hold pointers to the arguments. Add one to get
550 	// space for the terminating NULL (if some systems happen to need it).
551 	char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
552 	argv[0] = argv0;
553 	argv[argc] = NULL;
554 
555 	// Go through the string again. Split the arguments using '\0'
556 	// characters and add pointers to the resulting strings to argv.
557 	argc = 1;
558 	prev_was_space = true;
559 	for (size_t i = 0; env[i] != '\0'; ++i) {
560 		if (isspace((unsigned char)env[i])) {
561 			prev_was_space = true;
562 			env[i] = '\0';
563 		} else if (prev_was_space) {
564 			prev_was_space = false;
565 			argv[argc++] = env + i;
566 		}
567 	}
568 
569 	// Parse the argument list we got from the environment. All non-option
570 	// arguments i.e. filenames are ignored.
571 	parse_real(args, argc, argv);
572 
573 	// Reset the state of the getopt_long() so that we can parse the
574 	// command line options too. There are two incompatible ways to
575 	// do it.
576 #ifdef HAVE_OPTRESET
577 	// BSD
578 	optind = 1;
579 	optreset = 1;
580 #else
581 	// GNU, Solaris
582 	optind = 0;
583 #endif
584 
585 	// We don't need the argument list from environment anymore.
586 	free(argv);
587 	free(env);
588 
589 	return;
590 }
591 
592 
593 extern void
args_parse(args_info * args,int argc,char ** argv)594 args_parse(args_info *args, int argc, char **argv)
595 {
596 	// Initialize those parts of *args that we need later.
597 	args->files_name = NULL;
598 	args->files_file = NULL;
599 	args->files_delim = '\0';
600 
601 	// Check how we were called.
602 	{
603 		// Remove the leading path name, if any.
604 		const char *name = strrchr(argv[0], '/');
605 		if (name == NULL)
606 			name = argv[0];
607 		else
608 			++name;
609 
610 		// NOTE: It's possible that name[0] is now '\0' if argv[0]
611 		// is weird, but it doesn't matter here.
612 
613 		// Look for full command names instead of substrings like
614 		// "un", "cat", and "lz" to reduce possibility of false
615 		// positives when the programs have been renamed.
616 		if (strstr(name, "xzcat") != NULL) {
617 			opt_mode = MODE_DECOMPRESS;
618 			opt_stdout = true;
619 		} else if (strstr(name, "unxz") != NULL) {
620 			opt_mode = MODE_DECOMPRESS;
621 		} else if (strstr(name, "lzcat") != NULL) {
622 			opt_format = FORMAT_LZMA;
623 			opt_mode = MODE_DECOMPRESS;
624 			opt_stdout = true;
625 		} else if (strstr(name, "unlzma") != NULL) {
626 			opt_format = FORMAT_LZMA;
627 			opt_mode = MODE_DECOMPRESS;
628 		} else if (strstr(name, "lzma") != NULL) {
629 			opt_format = FORMAT_LZMA;
630 		}
631 	}
632 
633 	// First the flags from the environment
634 	parse_environment(args, argv[0], "XZ_DEFAULTS");
635 	parse_environment(args, argv[0], "XZ_OPT");
636 
637 	// Then from the command line
638 	parse_real(args, argc, argv);
639 
640 	// Never remove the source file when the destination is not on disk.
641 	// In test mode the data is written nowhere, but setting opt_stdout
642 	// will make the rest of the code behave well.
643 	if (opt_stdout || opt_mode == MODE_TEST) {
644 		opt_keep_original = true;
645 		opt_stdout = true;
646 	}
647 
648 	// When compressing, if no --format flag was used, or it
649 	// was --format=auto, we compress to the .xz format.
650 	if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
651 		opt_format = FORMAT_XZ;
652 
653 	// Compression settings need to be validated (options themselves and
654 	// their memory usage) when compressing to any file format. It has to
655 	// be done also when uncompressing raw data, since for raw decoding
656 	// the options given on the command line are used to know what kind
657 	// of raw data we are supposed to decode.
658 	if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
659 		coder_set_compression_settings();
660 
661 	// If no filenames are given, use stdin.
662 	if (argv[optind] == NULL && args->files_name == NULL) {
663 		// We don't modify or free() the "-" constant. The caller
664 		// modifies this so don't make the struct itself const.
665 		static char *names_stdin[2] = { (char *)"-", NULL };
666 		args->arg_names = names_stdin;
667 		args->arg_count = 1;
668 	} else {
669 		// We got at least one filename from the command line, or
670 		// --files or --files0 was specified.
671 		args->arg_names = argv + optind;
672 		args->arg_count = argc - optind;
673 	}
674 
675 	return;
676 }
677 
678 
679 #ifndef NDEBUG
680 extern void
args_free(void)681 args_free(void)
682 {
683 	free(opt_block_list);
684 	return;
685 }
686 #endif
687