1*3117ece4Schristos /* 2*3117ece4Schristos * Copyright (c) Meta Platforms, Inc. and affiliates. 3*3117ece4Schristos * All rights reserved. 4*3117ece4Schristos * 5*3117ece4Schristos * This source code is licensed under both the BSD-style license (found in the 6*3117ece4Schristos * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7*3117ece4Schristos * in the COPYING file in the root directory of this source tree). 8*3117ece4Schristos */ 9*3117ece4Schristos #include "Options.h" 10*3117ece4Schristos #include "util.h" 11*3117ece4Schristos #include "utils/ScopeGuard.h" 12*3117ece4Schristos 13*3117ece4Schristos #include <algorithm> 14*3117ece4Schristos #include <cassert> 15*3117ece4Schristos #include <cstdio> 16*3117ece4Schristos #include <cstring> 17*3117ece4Schristos #include <iterator> 18*3117ece4Schristos #include <thread> 19*3117ece4Schristos #include <vector> 20*3117ece4Schristos 21*3117ece4Schristos 22*3117ece4Schristos namespace pzstd { 23*3117ece4Schristos 24*3117ece4Schristos namespace { 25*3117ece4Schristos unsigned defaultNumThreads() { 26*3117ece4Schristos #ifdef PZSTD_NUM_THREADS 27*3117ece4Schristos return PZSTD_NUM_THREADS; 28*3117ece4Schristos #else 29*3117ece4Schristos return std::thread::hardware_concurrency(); 30*3117ece4Schristos #endif 31*3117ece4Schristos } 32*3117ece4Schristos 33*3117ece4Schristos unsigned parseUnsigned(const char **arg) { 34*3117ece4Schristos unsigned result = 0; 35*3117ece4Schristos while (**arg >= '0' && **arg <= '9') { 36*3117ece4Schristos result *= 10; 37*3117ece4Schristos result += **arg - '0'; 38*3117ece4Schristos ++(*arg); 39*3117ece4Schristos } 40*3117ece4Schristos return result; 41*3117ece4Schristos } 42*3117ece4Schristos 43*3117ece4Schristos const char *getArgument(const char *options, const char **argv, int &i, 44*3117ece4Schristos int argc) { 45*3117ece4Schristos if (options[1] != 0) { 46*3117ece4Schristos return options + 1; 47*3117ece4Schristos } 48*3117ece4Schristos ++i; 49*3117ece4Schristos if (i == argc) { 50*3117ece4Schristos std::fprintf(stderr, "Option -%c requires an argument, but none provided\n", 51*3117ece4Schristos *options); 52*3117ece4Schristos return nullptr; 53*3117ece4Schristos } 54*3117ece4Schristos return argv[i]; 55*3117ece4Schristos } 56*3117ece4Schristos 57*3117ece4Schristos const std::string kZstdExtension = ".zst"; 58*3117ece4Schristos constexpr char kStdIn[] = "-"; 59*3117ece4Schristos constexpr char kStdOut[] = "-"; 60*3117ece4Schristos constexpr unsigned kDefaultCompressionLevel = 3; 61*3117ece4Schristos constexpr unsigned kMaxNonUltraCompressionLevel = 19; 62*3117ece4Schristos 63*3117ece4Schristos #ifdef _WIN32 64*3117ece4Schristos const char nullOutput[] = "nul"; 65*3117ece4Schristos #else 66*3117ece4Schristos const char nullOutput[] = "/dev/null"; 67*3117ece4Schristos #endif 68*3117ece4Schristos 69*3117ece4Schristos void notSupported(const char *option) { 70*3117ece4Schristos std::fprintf(stderr, "Operation not supported: %s\n", option); 71*3117ece4Schristos } 72*3117ece4Schristos 73*3117ece4Schristos void usage() { 74*3117ece4Schristos std::fprintf(stderr, "Usage:\n"); 75*3117ece4Schristos std::fprintf(stderr, " pzstd [args] [FILE(s)]\n"); 76*3117ece4Schristos std::fprintf(stderr, "Parallel ZSTD options:\n"); 77*3117ece4Schristos std::fprintf(stderr, " -p, --processes # : number of threads to use for (de)compression (default:<numcpus>)\n"); 78*3117ece4Schristos 79*3117ece4Schristos std::fprintf(stderr, "ZSTD options:\n"); 80*3117ece4Schristos std::fprintf(stderr, " -# : # compression level (1-%d, default:%d)\n", kMaxNonUltraCompressionLevel, kDefaultCompressionLevel); 81*3117ece4Schristos std::fprintf(stderr, " -d, --decompress : decompression\n"); 82*3117ece4Schristos std::fprintf(stderr, " -o file : result stored into `file` (only if 1 input file)\n"); 83*3117ece4Schristos std::fprintf(stderr, " -f, --force : overwrite output without prompting, (de)compress links\n"); 84*3117ece4Schristos std::fprintf(stderr, " --rm : remove source file(s) after successful (de)compression\n"); 85*3117ece4Schristos std::fprintf(stderr, " -k, --keep : preserve source file(s) (default)\n"); 86*3117ece4Schristos std::fprintf(stderr, " -h, --help : display help and exit\n"); 87*3117ece4Schristos std::fprintf(stderr, " -V, --version : display version number and exit\n"); 88*3117ece4Schristos std::fprintf(stderr, " -v, --verbose : verbose mode; specify multiple times to increase log level (default:2)\n"); 89*3117ece4Schristos std::fprintf(stderr, " -q, --quiet : suppress warnings; specify twice to suppress errors too\n"); 90*3117ece4Schristos std::fprintf(stderr, " -c, --stdout : write to standard output (even if it is the console)\n"); 91*3117ece4Schristos #ifdef UTIL_HAS_CREATEFILELIST 92*3117ece4Schristos std::fprintf(stderr, " -r : operate recursively on directories\n"); 93*3117ece4Schristos #endif 94*3117ece4Schristos std::fprintf(stderr, " --ultra : enable levels beyond %i, up to %i (requires more memory)\n", kMaxNonUltraCompressionLevel, ZSTD_maxCLevel()); 95*3117ece4Schristos std::fprintf(stderr, " -C, --check : integrity check (default)\n"); 96*3117ece4Schristos std::fprintf(stderr, " --no-check : no integrity check\n"); 97*3117ece4Schristos std::fprintf(stderr, " -t, --test : test compressed file integrity\n"); 98*3117ece4Schristos std::fprintf(stderr, " -- : all arguments after \"--\" are treated as files\n"); 99*3117ece4Schristos } 100*3117ece4Schristos } // anonymous namespace 101*3117ece4Schristos 102*3117ece4Schristos Options::Options() 103*3117ece4Schristos : numThreads(defaultNumThreads()), maxWindowLog(23), 104*3117ece4Schristos compressionLevel(kDefaultCompressionLevel), decompress(false), 105*3117ece4Schristos overwrite(false), keepSource(true), writeMode(WriteMode::Auto), 106*3117ece4Schristos checksum(true), verbosity(2) {} 107*3117ece4Schristos 108*3117ece4Schristos Options::Status Options::parse(int argc, const char **argv) { 109*3117ece4Schristos bool test = false; 110*3117ece4Schristos bool recursive = false; 111*3117ece4Schristos bool ultra = false; 112*3117ece4Schristos bool forceStdout = false; 113*3117ece4Schristos bool followLinks = false; 114*3117ece4Schristos // Local copy of input files, which are pointers into argv. 115*3117ece4Schristos std::vector<const char *> localInputFiles; 116*3117ece4Schristos for (int i = 1; i < argc; ++i) { 117*3117ece4Schristos const char *arg = argv[i]; 118*3117ece4Schristos // Protect against empty arguments 119*3117ece4Schristos if (arg[0] == 0) { 120*3117ece4Schristos continue; 121*3117ece4Schristos } 122*3117ece4Schristos // Everything after "--" is an input file 123*3117ece4Schristos if (!std::strcmp(arg, "--")) { 124*3117ece4Schristos ++i; 125*3117ece4Schristos std::copy(argv + i, argv + argc, std::back_inserter(localInputFiles)); 126*3117ece4Schristos break; 127*3117ece4Schristos } 128*3117ece4Schristos // Long arguments that don't have a short option 129*3117ece4Schristos { 130*3117ece4Schristos bool isLongOption = true; 131*3117ece4Schristos if (!std::strcmp(arg, "--rm")) { 132*3117ece4Schristos keepSource = false; 133*3117ece4Schristos } else if (!std::strcmp(arg, "--ultra")) { 134*3117ece4Schristos ultra = true; 135*3117ece4Schristos maxWindowLog = 0; 136*3117ece4Schristos } else if (!std::strcmp(arg, "--no-check")) { 137*3117ece4Schristos checksum = false; 138*3117ece4Schristos } else if (!std::strcmp(arg, "--sparse")) { 139*3117ece4Schristos writeMode = WriteMode::Sparse; 140*3117ece4Schristos notSupported("Sparse mode"); 141*3117ece4Schristos return Status::Failure; 142*3117ece4Schristos } else if (!std::strcmp(arg, "--no-sparse")) { 143*3117ece4Schristos writeMode = WriteMode::Regular; 144*3117ece4Schristos notSupported("Sparse mode"); 145*3117ece4Schristos return Status::Failure; 146*3117ece4Schristos } else if (!std::strcmp(arg, "--dictID")) { 147*3117ece4Schristos notSupported(arg); 148*3117ece4Schristos return Status::Failure; 149*3117ece4Schristos } else if (!std::strcmp(arg, "--no-dictID")) { 150*3117ece4Schristos notSupported(arg); 151*3117ece4Schristos return Status::Failure; 152*3117ece4Schristos } else { 153*3117ece4Schristos isLongOption = false; 154*3117ece4Schristos } 155*3117ece4Schristos if (isLongOption) { 156*3117ece4Schristos continue; 157*3117ece4Schristos } 158*3117ece4Schristos } 159*3117ece4Schristos // Arguments with a short option simply set their short option. 160*3117ece4Schristos const char *options = nullptr; 161*3117ece4Schristos if (!std::strcmp(arg, "--processes")) { 162*3117ece4Schristos options = "p"; 163*3117ece4Schristos } else if (!std::strcmp(arg, "--version")) { 164*3117ece4Schristos options = "V"; 165*3117ece4Schristos } else if (!std::strcmp(arg, "--help")) { 166*3117ece4Schristos options = "h"; 167*3117ece4Schristos } else if (!std::strcmp(arg, "--decompress")) { 168*3117ece4Schristos options = "d"; 169*3117ece4Schristos } else if (!std::strcmp(arg, "--force")) { 170*3117ece4Schristos options = "f"; 171*3117ece4Schristos } else if (!std::strcmp(arg, "--stdout")) { 172*3117ece4Schristos options = "c"; 173*3117ece4Schristos } else if (!std::strcmp(arg, "--keep")) { 174*3117ece4Schristos options = "k"; 175*3117ece4Schristos } else if (!std::strcmp(arg, "--verbose")) { 176*3117ece4Schristos options = "v"; 177*3117ece4Schristos } else if (!std::strcmp(arg, "--quiet")) { 178*3117ece4Schristos options = "q"; 179*3117ece4Schristos } else if (!std::strcmp(arg, "--check")) { 180*3117ece4Schristos options = "C"; 181*3117ece4Schristos } else if (!std::strcmp(arg, "--test")) { 182*3117ece4Schristos options = "t"; 183*3117ece4Schristos } else if (arg[0] == '-' && arg[1] != 0) { 184*3117ece4Schristos options = arg + 1; 185*3117ece4Schristos } else { 186*3117ece4Schristos localInputFiles.emplace_back(arg); 187*3117ece4Schristos continue; 188*3117ece4Schristos } 189*3117ece4Schristos assert(options != nullptr); 190*3117ece4Schristos 191*3117ece4Schristos bool finished = false; 192*3117ece4Schristos while (!finished && *options != 0) { 193*3117ece4Schristos // Parse the compression level 194*3117ece4Schristos if (*options >= '0' && *options <= '9') { 195*3117ece4Schristos compressionLevel = parseUnsigned(&options); 196*3117ece4Schristos continue; 197*3117ece4Schristos } 198*3117ece4Schristos 199*3117ece4Schristos switch (*options) { 200*3117ece4Schristos case 'h': 201*3117ece4Schristos case 'H': 202*3117ece4Schristos usage(); 203*3117ece4Schristos return Status::Message; 204*3117ece4Schristos case 'V': 205*3117ece4Schristos std::fprintf(stderr, "PZSTD version: %s.\n", ZSTD_VERSION_STRING); 206*3117ece4Schristos return Status::Message; 207*3117ece4Schristos case 'p': { 208*3117ece4Schristos finished = true; 209*3117ece4Schristos const char *optionArgument = getArgument(options, argv, i, argc); 210*3117ece4Schristos if (optionArgument == nullptr) { 211*3117ece4Schristos return Status::Failure; 212*3117ece4Schristos } 213*3117ece4Schristos if (*optionArgument < '0' || *optionArgument > '9') { 214*3117ece4Schristos std::fprintf(stderr, "Option -p expects a number, but %s provided\n", 215*3117ece4Schristos optionArgument); 216*3117ece4Schristos return Status::Failure; 217*3117ece4Schristos } 218*3117ece4Schristos numThreads = parseUnsigned(&optionArgument); 219*3117ece4Schristos if (*optionArgument != 0) { 220*3117ece4Schristos std::fprintf(stderr, 221*3117ece4Schristos "Option -p expects a number, but %u%s provided\n", 222*3117ece4Schristos numThreads, optionArgument); 223*3117ece4Schristos return Status::Failure; 224*3117ece4Schristos } 225*3117ece4Schristos break; 226*3117ece4Schristos } 227*3117ece4Schristos case 'o': { 228*3117ece4Schristos finished = true; 229*3117ece4Schristos const char *optionArgument = getArgument(options, argv, i, argc); 230*3117ece4Schristos if (optionArgument == nullptr) { 231*3117ece4Schristos return Status::Failure; 232*3117ece4Schristos } 233*3117ece4Schristos outputFile = optionArgument; 234*3117ece4Schristos break; 235*3117ece4Schristos } 236*3117ece4Schristos case 'C': 237*3117ece4Schristos checksum = true; 238*3117ece4Schristos break; 239*3117ece4Schristos case 'k': 240*3117ece4Schristos keepSource = true; 241*3117ece4Schristos break; 242*3117ece4Schristos case 'd': 243*3117ece4Schristos decompress = true; 244*3117ece4Schristos break; 245*3117ece4Schristos case 'f': 246*3117ece4Schristos overwrite = true; 247*3117ece4Schristos forceStdout = true; 248*3117ece4Schristos followLinks = true; 249*3117ece4Schristos break; 250*3117ece4Schristos case 't': 251*3117ece4Schristos test = true; 252*3117ece4Schristos decompress = true; 253*3117ece4Schristos break; 254*3117ece4Schristos #ifdef UTIL_HAS_CREATEFILELIST 255*3117ece4Schristos case 'r': 256*3117ece4Schristos recursive = true; 257*3117ece4Schristos break; 258*3117ece4Schristos #endif 259*3117ece4Schristos case 'c': 260*3117ece4Schristos outputFile = kStdOut; 261*3117ece4Schristos forceStdout = true; 262*3117ece4Schristos break; 263*3117ece4Schristos case 'v': 264*3117ece4Schristos ++verbosity; 265*3117ece4Schristos break; 266*3117ece4Schristos case 'q': 267*3117ece4Schristos --verbosity; 268*3117ece4Schristos // Ignore them for now 269*3117ece4Schristos break; 270*3117ece4Schristos // Unsupported options from Zstd 271*3117ece4Schristos case 'D': 272*3117ece4Schristos case 's': 273*3117ece4Schristos notSupported("Zstd dictionaries."); 274*3117ece4Schristos return Status::Failure; 275*3117ece4Schristos case 'b': 276*3117ece4Schristos case 'e': 277*3117ece4Schristos case 'i': 278*3117ece4Schristos case 'B': 279*3117ece4Schristos notSupported("Zstd benchmarking options."); 280*3117ece4Schristos return Status::Failure; 281*3117ece4Schristos default: 282*3117ece4Schristos std::fprintf(stderr, "Invalid argument: %s\n", arg); 283*3117ece4Schristos return Status::Failure; 284*3117ece4Schristos } 285*3117ece4Schristos if (!finished) { 286*3117ece4Schristos ++options; 287*3117ece4Schristos } 288*3117ece4Schristos } // while (*options != 0); 289*3117ece4Schristos } // for (int i = 1; i < argc; ++i); 290*3117ece4Schristos 291*3117ece4Schristos // Set options for test mode 292*3117ece4Schristos if (test) { 293*3117ece4Schristos outputFile = nullOutput; 294*3117ece4Schristos keepSource = true; 295*3117ece4Schristos } 296*3117ece4Schristos 297*3117ece4Schristos // Input file defaults to standard input if not provided. 298*3117ece4Schristos if (localInputFiles.empty()) { 299*3117ece4Schristos localInputFiles.emplace_back(kStdIn); 300*3117ece4Schristos } 301*3117ece4Schristos 302*3117ece4Schristos // Check validity of input files 303*3117ece4Schristos if (localInputFiles.size() > 1) { 304*3117ece4Schristos const auto it = std::find(localInputFiles.begin(), localInputFiles.end(), 305*3117ece4Schristos std::string{kStdIn}); 306*3117ece4Schristos if (it != localInputFiles.end()) { 307*3117ece4Schristos std::fprintf( 308*3117ece4Schristos stderr, 309*3117ece4Schristos "Cannot specify standard input when handling multiple files\n"); 310*3117ece4Schristos return Status::Failure; 311*3117ece4Schristos } 312*3117ece4Schristos } 313*3117ece4Schristos if (localInputFiles.size() > 1 || recursive) { 314*3117ece4Schristos if (!outputFile.empty() && outputFile != nullOutput) { 315*3117ece4Schristos std::fprintf( 316*3117ece4Schristos stderr, 317*3117ece4Schristos "Cannot specify an output file when handling multiple inputs\n"); 318*3117ece4Schristos return Status::Failure; 319*3117ece4Schristos } 320*3117ece4Schristos } 321*3117ece4Schristos 322*3117ece4Schristos g_utilDisplayLevel = verbosity; 323*3117ece4Schristos // Remove local input files that are symbolic links 324*3117ece4Schristos if (!followLinks) { 325*3117ece4Schristos std::remove_if(localInputFiles.begin(), localInputFiles.end(), 326*3117ece4Schristos [&](const char *path) { 327*3117ece4Schristos bool isLink = UTIL_isLink(path); 328*3117ece4Schristos if (isLink && verbosity >= 2) { 329*3117ece4Schristos std::fprintf( 330*3117ece4Schristos stderr, 331*3117ece4Schristos "Warning : %s is symbolic link, ignoring\n", 332*3117ece4Schristos path); 333*3117ece4Schristos } 334*3117ece4Schristos return isLink; 335*3117ece4Schristos }); 336*3117ece4Schristos } 337*3117ece4Schristos 338*3117ece4Schristos // Translate input files/directories into files to (de)compress 339*3117ece4Schristos if (recursive) { 340*3117ece4Schristos FileNamesTable* const files = UTIL_createExpandedFNT(localInputFiles.data(), localInputFiles.size(), followLinks); 341*3117ece4Schristos if (files == nullptr) { 342*3117ece4Schristos std::fprintf(stderr, "Error traversing directories\n"); 343*3117ece4Schristos return Status::Failure; 344*3117ece4Schristos } 345*3117ece4Schristos auto guard = 346*3117ece4Schristos makeScopeGuard([&] { UTIL_freeFileNamesTable(files); }); 347*3117ece4Schristos if (files->tableSize == 0) { 348*3117ece4Schristos std::fprintf(stderr, "No files found\n"); 349*3117ece4Schristos return Status::Failure; 350*3117ece4Schristos } 351*3117ece4Schristos inputFiles.resize(files->tableSize); 352*3117ece4Schristos std::copy(files->fileNames, files->fileNames + files->tableSize, inputFiles.begin()); 353*3117ece4Schristos } else { 354*3117ece4Schristos inputFiles.resize(localInputFiles.size()); 355*3117ece4Schristos std::copy(localInputFiles.begin(), localInputFiles.end(), 356*3117ece4Schristos inputFiles.begin()); 357*3117ece4Schristos } 358*3117ece4Schristos localInputFiles.clear(); 359*3117ece4Schristos assert(!inputFiles.empty()); 360*3117ece4Schristos 361*3117ece4Schristos // If reading from standard input, default to standard output 362*3117ece4Schristos if (inputFiles[0] == kStdIn && outputFile.empty()) { 363*3117ece4Schristos assert(inputFiles.size() == 1); 364*3117ece4Schristos outputFile = "-"; 365*3117ece4Schristos } 366*3117ece4Schristos 367*3117ece4Schristos if (inputFiles[0] == kStdIn && IS_CONSOLE(stdin)) { 368*3117ece4Schristos assert(inputFiles.size() == 1); 369*3117ece4Schristos std::fprintf(stderr, "Cannot read input from interactive console\n"); 370*3117ece4Schristos return Status::Failure; 371*3117ece4Schristos } 372*3117ece4Schristos if (outputFile == "-" && IS_CONSOLE(stdout) && !(forceStdout && decompress)) { 373*3117ece4Schristos std::fprintf(stderr, "Will not write to console stdout unless -c or -f is " 374*3117ece4Schristos "specified and decompressing\n"); 375*3117ece4Schristos return Status::Failure; 376*3117ece4Schristos } 377*3117ece4Schristos 378*3117ece4Schristos // Check compression level 379*3117ece4Schristos { 380*3117ece4Schristos unsigned maxCLevel = 381*3117ece4Schristos ultra ? ZSTD_maxCLevel() : kMaxNonUltraCompressionLevel; 382*3117ece4Schristos if (compressionLevel > maxCLevel || compressionLevel == 0) { 383*3117ece4Schristos std::fprintf(stderr, "Invalid compression level %u.\n", compressionLevel); 384*3117ece4Schristos return Status::Failure; 385*3117ece4Schristos } 386*3117ece4Schristos } 387*3117ece4Schristos 388*3117ece4Schristos // Check that numThreads is set 389*3117ece4Schristos if (numThreads == 0) { 390*3117ece4Schristos std::fprintf(stderr, "Invalid arguments: # of threads not specified " 391*3117ece4Schristos "and unable to determine hardware concurrency.\n"); 392*3117ece4Schristos return Status::Failure; 393*3117ece4Schristos } 394*3117ece4Schristos 395*3117ece4Schristos // Modify verbosity 396*3117ece4Schristos // If we are piping input and output, turn off interaction 397*3117ece4Schristos if (inputFiles[0] == kStdIn && outputFile == kStdOut && verbosity == 2) { 398*3117ece4Schristos verbosity = 1; 399*3117ece4Schristos } 400*3117ece4Schristos // If we are in multi-file mode, turn off interaction 401*3117ece4Schristos if (inputFiles.size() > 1 && verbosity == 2) { 402*3117ece4Schristos verbosity = 1; 403*3117ece4Schristos } 404*3117ece4Schristos 405*3117ece4Schristos return Status::Success; 406*3117ece4Schristos } 407*3117ece4Schristos 408*3117ece4Schristos std::string Options::getOutputFile(const std::string &inputFile) const { 409*3117ece4Schristos if (!outputFile.empty()) { 410*3117ece4Schristos return outputFile; 411*3117ece4Schristos } 412*3117ece4Schristos // Attempt to add/remove zstd extension from the input file 413*3117ece4Schristos if (decompress) { 414*3117ece4Schristos int stemSize = inputFile.size() - kZstdExtension.size(); 415*3117ece4Schristos if (stemSize > 0 && inputFile.substr(stemSize) == kZstdExtension) { 416*3117ece4Schristos return inputFile.substr(0, stemSize); 417*3117ece4Schristos } else { 418*3117ece4Schristos return ""; 419*3117ece4Schristos } 420*3117ece4Schristos } else { 421*3117ece4Schristos return inputFile + kZstdExtension; 422*3117ece4Schristos } 423*3117ece4Schristos } 424*3117ece4Schristos } 425