1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file args.c
4 /// \brief Argument parsing
5 ///
6 /// \note Filter-specific options parsing is in options.c.
7 //
8 // Author: Lasse Collin
9 //
10 // This file has been put into the public domain.
11 // You can do whatever you want with this file.
12 //
13 ///////////////////////////////////////////////////////////////////////////////
14
15 #include "private.h"
16
17 #include "getopt.h"
18 #include <ctype.h>
19
20
21 bool opt_stdout = false;
22 bool opt_force = false;
23 bool opt_keep_original = false;
24 bool opt_robot = false;
25 bool opt_ignore_check = false;
26
27 // We don't modify or free() this, but we need to assign it in some
28 // non-const pointers.
29 const char stdin_filename[] = "(stdin)";
30
31
32 /// Parse and set the memory usage limit for compression and/or decompression.
33 static void
parse_memlimit(const char * name,const char * name_percentage,char * str,bool set_compress,bool set_decompress)34 parse_memlimit(const char *name, const char *name_percentage, char *str,
35 bool set_compress, bool set_decompress)
36 {
37 bool is_percentage = false;
38 uint64_t value;
39
40 const size_t len = strlen(str);
41 if (len > 0 && str[len - 1] == '%') {
42 str[len - 1] = '\0';
43 is_percentage = true;
44 value = str_to_uint64(name_percentage, str, 1, 100);
45 } else {
46 // On 32-bit systems, SIZE_MAX would make more sense than
47 // UINT64_MAX. But use UINT64_MAX still so that scripts
48 // that assume > 4 GiB values don't break.
49 value = str_to_uint64(name, str, 0, UINT64_MAX);
50 }
51
52 hardware_memlimit_set(
53 value, set_compress, set_decompress, is_percentage);
54 return;
55 }
56
57
58 static void
parse_block_list(char * str)59 parse_block_list(char *str)
60 {
61 // It must be non-empty and not begin with a comma.
62 if (str[0] == '\0' || str[0] == ',')
63 message_fatal(_("%s: Invalid argument to --block-list"), str);
64
65 // Count the number of comma-separated strings.
66 size_t count = 1;
67 for (size_t i = 0; str[i] != '\0'; ++i)
68 if (str[i] == ',')
69 ++count;
70
71 // Prevent an unlikely integer overflow.
72 if (count > SIZE_MAX / sizeof(uint64_t) - 1)
73 message_fatal(_("%s: Too many arguments to --block-list"),
74 str);
75
76 // Allocate memory to hold all the sizes specified.
77 // If --block-list was specified already, its value is forgotten.
78 free(opt_block_list);
79 opt_block_list = xmalloc((count + 1) * sizeof(uint64_t));
80
81 for (size_t i = 0; i < count; ++i) {
82 // Locate the next comma and replace it with \0.
83 char *p = strchr(str, ',');
84 if (p != NULL)
85 *p = '\0';
86
87 if (str[0] == '\0') {
88 // There is no string, that is, a comma follows
89 // another comma. Use the previous value.
90 //
91 // NOTE: We checked earler that the first char
92 // of the whole list cannot be a comma.
93 assert(i > 0);
94 opt_block_list[i] = opt_block_list[i - 1];
95 } else {
96 opt_block_list[i] = str_to_uint64("block-list", str,
97 0, UINT64_MAX);
98
99 // Zero indicates no more new Blocks.
100 if (opt_block_list[i] == 0) {
101 if (i + 1 != count)
102 message_fatal(_("0 can only be used "
103 "as the last element "
104 "in --block-list"));
105
106 opt_block_list[i] = UINT64_MAX;
107 }
108 }
109
110 if (p == NULL)
111 break;
112 str = p + 1;
113 }
114
115 // Terminate the array.
116 opt_block_list[count] = 0;
117 return;
118 }
119
120
121 static void
parse_real(args_info * args,int argc,char ** argv)122 parse_real(args_info *args, int argc, char **argv)
123 {
124 enum {
125 OPT_X86 = INT_MIN,
126 OPT_POWERPC,
127 OPT_IA64,
128 OPT_ARM,
129 OPT_ARMTHUMB,
130 OPT_SPARC,
131 OPT_DELTA,
132 OPT_LZMA1,
133 OPT_LZMA2,
134
135 OPT_SINGLE_STREAM,
136 OPT_NO_SPARSE,
137 OPT_FILES,
138 OPT_FILES0,
139 OPT_BLOCK_SIZE,
140 OPT_BLOCK_LIST,
141 OPT_MEM_COMPRESS,
142 OPT_MEM_DECOMPRESS,
143 OPT_NO_ADJUST,
144 OPT_INFO_MEMORY,
145 OPT_ROBOT,
146 OPT_FLUSH_TIMEOUT,
147 OPT_IGNORE_CHECK,
148 };
149
150 static const char short_opts[]
151 = "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
152
153 static const struct option long_opts[] = {
154 // Operation mode
155 { "compress", no_argument, NULL, 'z' },
156 { "decompress", no_argument, NULL, 'd' },
157 { "uncompress", no_argument, NULL, 'd' },
158 { "test", no_argument, NULL, 't' },
159 { "list", no_argument, NULL, 'l' },
160
161 // Operation modifiers
162 { "keep", no_argument, NULL, 'k' },
163 { "force", no_argument, NULL, 'f' },
164 { "stdout", no_argument, NULL, 'c' },
165 { "to-stdout", no_argument, NULL, 'c' },
166 { "single-stream", no_argument, NULL, OPT_SINGLE_STREAM },
167 { "no-sparse", no_argument, NULL, OPT_NO_SPARSE },
168 { "suffix", required_argument, NULL, 'S' },
169 // { "recursive", no_argument, NULL, 'r' }, // TODO
170 { "files", optional_argument, NULL, OPT_FILES },
171 { "files0", optional_argument, NULL, OPT_FILES0 },
172
173 // Basic compression settings
174 { "format", required_argument, NULL, 'F' },
175 { "check", required_argument, NULL, 'C' },
176 { "ignore-check", no_argument, NULL, OPT_IGNORE_CHECK },
177 { "block-size", required_argument, NULL, OPT_BLOCK_SIZE },
178 { "block-list", required_argument, NULL, OPT_BLOCK_LIST },
179 { "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS },
180 { "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
181 { "memlimit", required_argument, NULL, 'M' },
182 { "memory", required_argument, NULL, 'M' }, // Old alias
183 { "no-adjust", no_argument, NULL, OPT_NO_ADJUST },
184 { "threads", required_argument, NULL, 'T' },
185 { "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT },
186
187 { "extreme", no_argument, NULL, 'e' },
188 { "fast", no_argument, NULL, '0' },
189 { "best", no_argument, NULL, '9' },
190
191 // Filters
192 { "lzma1", optional_argument, NULL, OPT_LZMA1 },
193 { "lzma2", optional_argument, NULL, OPT_LZMA2 },
194 { "x86", optional_argument, NULL, OPT_X86 },
195 { "powerpc", optional_argument, NULL, OPT_POWERPC },
196 { "ia64", optional_argument, NULL, OPT_IA64 },
197 { "arm", optional_argument, NULL, OPT_ARM },
198 { "armthumb", optional_argument, NULL, OPT_ARMTHUMB },
199 { "sparc", optional_argument, NULL, OPT_SPARC },
200 { "delta", optional_argument, NULL, OPT_DELTA },
201
202 // Other options
203 { "quiet", no_argument, NULL, 'q' },
204 { "verbose", no_argument, NULL, 'v' },
205 { "no-warn", no_argument, NULL, 'Q' },
206 { "robot", no_argument, NULL, OPT_ROBOT },
207 { "info-memory", no_argument, NULL, OPT_INFO_MEMORY },
208 { "help", no_argument, NULL, 'h' },
209 { "long-help", no_argument, NULL, 'H' },
210 { "version", no_argument, NULL, 'V' },
211
212 { NULL, 0, NULL, 0 }
213 };
214
215 int c;
216
217 while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
218 != -1) {
219 switch (c) {
220 // Compression preset (also for decompression if --format=raw)
221 case '0': case '1': case '2': case '3': case '4':
222 case '5': case '6': case '7': case '8': case '9':
223 coder_set_preset(c - '0');
224 break;
225
226 // --memlimit-compress
227 case OPT_MEM_COMPRESS:
228 parse_memlimit("memlimit-compress",
229 "memlimit-compress%", optarg,
230 true, false);
231 break;
232
233 // --memlimit-decompress
234 case OPT_MEM_DECOMPRESS:
235 parse_memlimit("memlimit-decompress",
236 "memlimit-decompress%", optarg,
237 false, true);
238 break;
239
240 // --memlimit
241 case 'M':
242 parse_memlimit("memlimit", "memlimit%", optarg,
243 true, true);
244 break;
245
246 // --suffix
247 case 'S':
248 suffix_set(optarg);
249 break;
250
251 case 'T':
252 // The max is from src/liblzma/common/common.h.
253 hardware_threads_set(str_to_uint64("threads",
254 optarg, 0, 16384));
255 break;
256
257 // --version
258 case 'V':
259 // This doesn't return.
260 message_version();
261
262 // --stdout
263 case 'c':
264 opt_stdout = true;
265 break;
266
267 // --decompress
268 case 'd':
269 opt_mode = MODE_DECOMPRESS;
270 break;
271
272 // --extreme
273 case 'e':
274 coder_set_extreme();
275 break;
276
277 // --force
278 case 'f':
279 opt_force = true;
280 break;
281
282 // --info-memory
283 case OPT_INFO_MEMORY:
284 // This doesn't return.
285 hardware_memlimit_show();
286
287 // --help
288 case 'h':
289 // This doesn't return.
290 message_help(false);
291
292 // --long-help
293 case 'H':
294 // This doesn't return.
295 message_help(true);
296
297 // --list
298 case 'l':
299 opt_mode = MODE_LIST;
300 break;
301
302 // --keep
303 case 'k':
304 opt_keep_original = true;
305 break;
306
307 // --quiet
308 case 'q':
309 message_verbosity_decrease();
310 break;
311
312 case 'Q':
313 set_exit_no_warn();
314 break;
315
316 case 't':
317 opt_mode = MODE_TEST;
318 break;
319
320 // --verbose
321 case 'v':
322 message_verbosity_increase();
323 break;
324
325 // --robot
326 case OPT_ROBOT:
327 opt_robot = true;
328
329 // This is to make sure that floating point numbers
330 // always have a dot as decimal separator.
331 setlocale(LC_NUMERIC, "C");
332 break;
333
334 case 'z':
335 opt_mode = MODE_COMPRESS;
336 break;
337
338 // Filter setup
339
340 case OPT_X86:
341 coder_add_filter(LZMA_FILTER_X86,
342 options_bcj(optarg));
343 break;
344
345 case OPT_POWERPC:
346 coder_add_filter(LZMA_FILTER_POWERPC,
347 options_bcj(optarg));
348 break;
349
350 case OPT_IA64:
351 coder_add_filter(LZMA_FILTER_IA64,
352 options_bcj(optarg));
353 break;
354
355 case OPT_ARM:
356 coder_add_filter(LZMA_FILTER_ARM,
357 options_bcj(optarg));
358 break;
359
360 case OPT_ARMTHUMB:
361 coder_add_filter(LZMA_FILTER_ARMTHUMB,
362 options_bcj(optarg));
363 break;
364
365 case OPT_SPARC:
366 coder_add_filter(LZMA_FILTER_SPARC,
367 options_bcj(optarg));
368 break;
369
370 case OPT_DELTA:
371 coder_add_filter(LZMA_FILTER_DELTA,
372 options_delta(optarg));
373 break;
374
375 case OPT_LZMA1:
376 coder_add_filter(LZMA_FILTER_LZMA1,
377 options_lzma(optarg));
378 break;
379
380 case OPT_LZMA2:
381 coder_add_filter(LZMA_FILTER_LZMA2,
382 options_lzma(optarg));
383 break;
384
385 // Other
386
387 // --format
388 case 'F': {
389 // Just in case, support both "lzma" and "alone" since
390 // the latter was used for forward compatibility in
391 // LZMA Utils 4.32.x.
392 static const struct {
393 char str[8];
394 enum format_type format;
395 } types[] = {
396 { "auto", FORMAT_AUTO },
397 { "xz", FORMAT_XZ },
398 { "lzma", FORMAT_LZMA },
399 { "alone", FORMAT_LZMA },
400 // { "gzip", FORMAT_GZIP },
401 // { "gz", FORMAT_GZIP },
402 { "raw", FORMAT_RAW },
403 };
404
405 size_t i = 0;
406 while (strcmp(types[i].str, optarg) != 0)
407 if (++i == ARRAY_SIZE(types))
408 message_fatal(_("%s: Unknown file "
409 "format type"),
410 optarg);
411
412 opt_format = types[i].format;
413 break;
414 }
415
416 // --check
417 case 'C': {
418 static const struct {
419 char str[8];
420 lzma_check check;
421 } types[] = {
422 { "none", LZMA_CHECK_NONE },
423 { "crc32", LZMA_CHECK_CRC32 },
424 { "crc64", LZMA_CHECK_CRC64 },
425 { "sha256", LZMA_CHECK_SHA256 },
426 };
427
428 size_t i = 0;
429 while (strcmp(types[i].str, optarg) != 0) {
430 if (++i == ARRAY_SIZE(types))
431 message_fatal(_("%s: Unsupported "
432 "integrity "
433 "check type"), optarg);
434 }
435
436 // Use a separate check in case we are using different
437 // liblzma than what was used to compile us.
438 if (!lzma_check_is_supported(types[i].check))
439 message_fatal(_("%s: Unsupported integrity "
440 "check type"), optarg);
441
442 coder_set_check(types[i].check);
443 break;
444 }
445
446 case OPT_IGNORE_CHECK:
447 opt_ignore_check = true;
448 break;
449
450 case OPT_BLOCK_SIZE:
451 opt_block_size = str_to_uint64("block-size", optarg,
452 0, LZMA_VLI_MAX);
453 break;
454
455 case OPT_BLOCK_LIST: {
456 parse_block_list(optarg);
457 break;
458 }
459
460 case OPT_SINGLE_STREAM:
461 opt_single_stream = true;
462 break;
463
464 case OPT_NO_SPARSE:
465 io_no_sparse();
466 break;
467
468 case OPT_FILES:
469 args->files_delim = '\n';
470
471 // Fall through
472
473 case OPT_FILES0:
474 if (args->files_name != NULL)
475 message_fatal(_("Only one file can be "
476 "specified with `--files' "
477 "or `--files0'."));
478
479 if (optarg == NULL) {
480 args->files_name = (char *)stdin_filename;
481 args->files_file = stdin;
482 } else {
483 args->files_name = optarg;
484 args->files_file = fopen(optarg,
485 c == OPT_FILES ? "r" : "rb");
486 if (args->files_file == NULL)
487 message_fatal("%s: %s", optarg,
488 strerror(errno));
489 }
490
491 break;
492
493 case OPT_NO_ADJUST:
494 opt_auto_adjust = false;
495 break;
496
497 case OPT_FLUSH_TIMEOUT:
498 opt_flush_timeout = str_to_uint64("flush-timeout",
499 optarg, 0, UINT64_MAX);
500 break;
501
502 default:
503 message_try_help();
504 tuklib_exit(E_ERROR, E_ERROR, false);
505 }
506 }
507
508 return;
509 }
510
511
512 static void
parse_environment(args_info * args,char * argv0,const char * varname)513 parse_environment(args_info *args, char *argv0, const char *varname)
514 {
515 char *env = getenv(varname);
516 if (env == NULL)
517 return;
518
519 // We modify the string, so make a copy of it.
520 env = xstrdup(env);
521
522 // Calculate the number of arguments in env. argc stats at one
523 // to include space for the program name.
524 int argc = 1;
525 bool prev_was_space = true;
526 for (size_t i = 0; env[i] != '\0'; ++i) {
527 // NOTE: Cast to unsigned char is needed so that correct
528 // value gets passed to isspace(), which expects
529 // unsigned char cast to int. Casting to int is done
530 // automatically due to integer promotion, but we need to
531 // force char to unsigned char manually. Otherwise 8-bit
532 // characters would get promoted to wrong value if
533 // char is signed.
534 if (isspace((unsigned char)env[i])) {
535 prev_was_space = true;
536 } else if (prev_was_space) {
537 prev_was_space = false;
538
539 // Keep argc small enough to fit into a signed int
540 // and to keep it usable for memory allocation.
541 if (++argc == my_min(
542 INT_MAX, SIZE_MAX / sizeof(char *)))
543 message_fatal(_("The environment variable "
544 "%s contains too many "
545 "arguments"), varname);
546 }
547 }
548
549 // Allocate memory to hold pointers to the arguments. Add one to get
550 // space for the terminating NULL (if some systems happen to need it).
551 char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
552 argv[0] = argv0;
553 argv[argc] = NULL;
554
555 // Go through the string again. Split the arguments using '\0'
556 // characters and add pointers to the resulting strings to argv.
557 argc = 1;
558 prev_was_space = true;
559 for (size_t i = 0; env[i] != '\0'; ++i) {
560 if (isspace((unsigned char)env[i])) {
561 prev_was_space = true;
562 env[i] = '\0';
563 } else if (prev_was_space) {
564 prev_was_space = false;
565 argv[argc++] = env + i;
566 }
567 }
568
569 // Parse the argument list we got from the environment. All non-option
570 // arguments i.e. filenames are ignored.
571 parse_real(args, argc, argv);
572
573 // Reset the state of the getopt_long() so that we can parse the
574 // command line options too. There are two incompatible ways to
575 // do it.
576 #ifdef HAVE_OPTRESET
577 // BSD
578 optind = 1;
579 optreset = 1;
580 #else
581 // GNU, Solaris
582 optind = 0;
583 #endif
584
585 // We don't need the argument list from environment anymore.
586 free(argv);
587 free(env);
588
589 return;
590 }
591
592
593 extern void
args_parse(args_info * args,int argc,char ** argv)594 args_parse(args_info *args, int argc, char **argv)
595 {
596 // Initialize those parts of *args that we need later.
597 args->files_name = NULL;
598 args->files_file = NULL;
599 args->files_delim = '\0';
600
601 // Check how we were called.
602 {
603 // Remove the leading path name, if any.
604 const char *name = strrchr(argv[0], '/');
605 if (name == NULL)
606 name = argv[0];
607 else
608 ++name;
609
610 // NOTE: It's possible that name[0] is now '\0' if argv[0]
611 // is weird, but it doesn't matter here.
612
613 // Look for full command names instead of substrings like
614 // "un", "cat", and "lz" to reduce possibility of false
615 // positives when the programs have been renamed.
616 if (strstr(name, "xzcat") != NULL) {
617 opt_mode = MODE_DECOMPRESS;
618 opt_stdout = true;
619 } else if (strstr(name, "unxz") != NULL) {
620 opt_mode = MODE_DECOMPRESS;
621 } else if (strstr(name, "lzcat") != NULL) {
622 opt_format = FORMAT_LZMA;
623 opt_mode = MODE_DECOMPRESS;
624 opt_stdout = true;
625 } else if (strstr(name, "unlzma") != NULL) {
626 opt_format = FORMAT_LZMA;
627 opt_mode = MODE_DECOMPRESS;
628 } else if (strstr(name, "lzma") != NULL) {
629 opt_format = FORMAT_LZMA;
630 }
631 }
632
633 // First the flags from the environment
634 parse_environment(args, argv[0], "XZ_DEFAULTS");
635 parse_environment(args, argv[0], "XZ_OPT");
636
637 // Then from the command line
638 parse_real(args, argc, argv);
639
640 // Never remove the source file when the destination is not on disk.
641 // In test mode the data is written nowhere, but setting opt_stdout
642 // will make the rest of the code behave well.
643 if (opt_stdout || opt_mode == MODE_TEST) {
644 opt_keep_original = true;
645 opt_stdout = true;
646 }
647
648 // When compressing, if no --format flag was used, or it
649 // was --format=auto, we compress to the .xz format.
650 if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
651 opt_format = FORMAT_XZ;
652
653 // Compression settings need to be validated (options themselves and
654 // their memory usage) when compressing to any file format. It has to
655 // be done also when uncompressing raw data, since for raw decoding
656 // the options given on the command line are used to know what kind
657 // of raw data we are supposed to decode.
658 if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
659 coder_set_compression_settings();
660
661 // If no filenames are given, use stdin.
662 if (argv[optind] == NULL && args->files_name == NULL) {
663 // We don't modify or free() the "-" constant. The caller
664 // modifies this so don't make the struct itself const.
665 static char *names_stdin[2] = { (char *)"-", NULL };
666 args->arg_names = names_stdin;
667 args->arg_count = 1;
668 } else {
669 // We got at least one filename from the command line, or
670 // --files or --files0 was specified.
671 args->arg_names = argv + optind;
672 args->arg_count = argc - optind;
673 }
674
675 return;
676 }
677
678
679 #ifndef NDEBUG
680 extern void
args_free(void)681 args_free(void)
682 {
683 free(opt_block_list);
684 return;
685 }
686 #endif
687