1 //===-- Args.cpp ----------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "lldb/Utility/Args.h" 10 #include "lldb/Utility/FileSpec.h" 11 #include "lldb/Utility/Stream.h" 12 #include "lldb/Utility/StringList.h" 13 #include "llvm/ADT/StringSwitch.h" 14 15 using namespace lldb; 16 using namespace lldb_private; 17 18 // A helper function for argument parsing. 19 // Parses the initial part of the first argument using normal double quote 20 // rules: backslash escapes the double quote and itself. The parsed string is 21 // appended to the second argument. The function returns the unparsed portion 22 // of the string, starting at the closing quote. 23 static llvm::StringRef ParseDoubleQuotes(llvm::StringRef quoted, 24 std::string &result) { 25 // Inside double quotes, '\' and '"' are special. 26 static const char *k_escapable_characters = "\"\\"; 27 while (true) { 28 // Skip over regular characters and append them. 29 size_t regular = quoted.find_first_of(k_escapable_characters); 30 result += quoted.substr(0, regular); 31 quoted = quoted.substr(regular); 32 33 // If we have reached the end of string or the closing quote, we're done. 34 if (quoted.empty() || quoted.front() == '"') 35 break; 36 37 // We have found a backslash. 38 quoted = quoted.drop_front(); 39 40 if (quoted.empty()) { 41 // A lone backslash at the end of string, let's just append it. 42 result += '\\'; 43 break; 44 } 45 46 // If the character after the backslash is not an allowed escapable 47 // character, we leave the character sequence untouched. 48 if (strchr(k_escapable_characters, quoted.front()) == nullptr) 49 result += '\\'; 50 51 result += quoted.front(); 52 quoted = quoted.drop_front(); 53 } 54 55 return quoted; 56 } 57 58 static size_t ArgvToArgc(const char **argv) { 59 if (!argv) 60 return 0; 61 size_t count = 0; 62 while (*argv++) 63 ++count; 64 return count; 65 } 66 67 // Trims all whitespace that can separate command line arguments from the left 68 // side of the string. 69 static llvm::StringRef ltrimForArgs(llvm::StringRef str, size_t &shift) { 70 static const char *k_space_separators = " \t"; 71 llvm::StringRef result = str.ltrim(k_space_separators); 72 shift = result.data() - str.data(); 73 return result; 74 } 75 76 // A helper function for SetCommandString. Parses a single argument from the 77 // command string, processing quotes and backslashes in a shell-like manner. 78 // The function returns a tuple consisting of the parsed argument, the quote 79 // char used, and the unparsed portion of the string starting at the first 80 // unqouted, unescaped whitespace character. 81 static std::tuple<std::string, char, llvm::StringRef> 82 ParseSingleArgument(llvm::StringRef command) { 83 // Argument can be split into multiple discontiguous pieces, for example: 84 // "Hello ""World" 85 // this would result in a single argument "Hello World" (without the quotes) 86 // since the quotes would be removed and there is not space between the 87 // strings. 88 std::string arg; 89 90 // Since we can have multiple quotes that form a single command in a command 91 // like: "Hello "world'!' (which will make a single argument "Hello world!") 92 // we remember the first quote character we encounter and use that for the 93 // quote character. 94 char first_quote_char = '\0'; 95 96 bool arg_complete = false; 97 do { 98 // Skip over regular characters and append them. 99 size_t regular = command.find_first_of(" \t\r\"'`\\"); 100 arg += command.substr(0, regular); 101 command = command.substr(regular); 102 103 if (command.empty()) 104 break; 105 106 char special = command.front(); 107 command = command.drop_front(); 108 switch (special) { 109 case '\\': 110 if (command.empty()) { 111 arg += '\\'; 112 break; 113 } 114 115 // If the character after the backslash is not an allowed escapable 116 // character, we leave the character sequence untouched. 117 if (strchr(" \t\\'\"`", command.front()) == nullptr) 118 arg += '\\'; 119 120 arg += command.front(); 121 command = command.drop_front(); 122 123 break; 124 125 case ' ': 126 case '\t': 127 case '\r': 128 // We are not inside any quotes, we just found a space after an argument. 129 // We are done. 130 arg_complete = true; 131 break; 132 133 case '"': 134 case '\'': 135 case '`': 136 // We found the start of a quote scope. 137 if (first_quote_char == '\0') 138 first_quote_char = special; 139 140 if (special == '"') 141 command = ParseDoubleQuotes(command, arg); 142 else { 143 // For single quotes, we simply skip ahead to the matching quote 144 // character (or the end of the string). 145 size_t quoted = command.find(special); 146 arg += command.substr(0, quoted); 147 command = command.substr(quoted); 148 } 149 150 // If we found a closing quote, skip it. 151 if (!command.empty()) 152 command = command.drop_front(); 153 154 break; 155 } 156 } while (!arg_complete); 157 158 return std::make_tuple(arg, first_quote_char, command); 159 } 160 161 Args::ArgEntry::ArgEntry(llvm::StringRef str, char quote, 162 std::optional<uint16_t> column) 163 : quote(quote), column(column) { 164 size_t size = str.size(); 165 ptr.reset(new char[size + 1]); 166 167 ::memcpy(data(), str.data() ? str.data() : "", size); 168 ptr[size] = 0; 169 } 170 171 // Args constructor 172 Args::Args(llvm::StringRef command) { SetCommandString(command); } 173 174 Args::Args(const Args &rhs) { *this = rhs; } 175 176 Args::Args(const StringList &list) : Args() { 177 for (const std::string &arg : list) 178 AppendArgument(arg); 179 } 180 181 Args::Args(llvm::ArrayRef<llvm::StringRef> args) : Args() { 182 for (llvm::StringRef arg : args) 183 AppendArgument(arg); 184 } 185 186 Args &Args::operator=(const Args &rhs) { 187 Clear(); 188 189 m_argv.clear(); 190 m_entries.clear(); 191 for (auto &entry : rhs.m_entries) { 192 m_entries.emplace_back(entry.ref(), entry.quote, entry.column); 193 m_argv.push_back(m_entries.back().data()); 194 } 195 m_argv.push_back(nullptr); 196 return *this; 197 } 198 199 // Destructor 200 Args::~Args() = default; 201 202 void Args::Dump(Stream &s, const char *label_name) const { 203 if (!label_name) 204 return; 205 206 int i = 0; 207 for (auto &entry : m_entries) { 208 s.Indent(); 209 s.Format("{0}[{1}]=\"{2}\"\n", label_name, i++, entry.ref()); 210 } 211 s.Format("{0}[{1}]=NULL\n", label_name, i); 212 s.EOL(); 213 } 214 215 bool Args::GetCommandString(std::string &command) const { 216 command.clear(); 217 218 for (size_t i = 0; i < m_entries.size(); ++i) { 219 if (i > 0) 220 command += ' '; 221 char quote = m_entries[i].quote; 222 if (quote != '\0') 223 command += quote; 224 command += m_entries[i].ref(); 225 if (quote != '\0') 226 command += quote; 227 } 228 229 return !m_entries.empty(); 230 } 231 232 bool Args::GetQuotedCommandString(std::string &command) const { 233 command.clear(); 234 235 for (size_t i = 0; i < m_entries.size(); ++i) { 236 if (i > 0) 237 command += ' '; 238 239 if (m_entries[i].quote) { 240 command += m_entries[i].quote; 241 command += m_entries[i].ref(); 242 command += m_entries[i].quote; 243 } else { 244 command += m_entries[i].ref(); 245 } 246 } 247 248 return !m_entries.empty(); 249 } 250 251 void Args::SetCommandString(llvm::StringRef command) { 252 Clear(); 253 m_argv.clear(); 254 255 uint16_t column = 1; 256 size_t shift = 0; 257 command = ltrimForArgs(command, shift); 258 column += shift; 259 std::string arg; 260 char quote; 261 while (!command.empty()) { 262 const char *prev = command.data(); 263 std::tie(arg, quote, command) = ParseSingleArgument(command); 264 m_entries.emplace_back(arg, quote, column); 265 m_argv.push_back(m_entries.back().data()); 266 command = ltrimForArgs(command, shift); 267 column += shift; 268 column += command.data() - prev; 269 } 270 m_argv.push_back(nullptr); 271 } 272 273 const char *Args::GetArgumentAtIndex(size_t idx) const { 274 if (idx < m_argv.size()) 275 return m_argv[idx]; 276 return nullptr; 277 } 278 279 char **Args::GetArgumentVector() { 280 assert(!m_argv.empty()); 281 // TODO: functions like execve and posix_spawnp exhibit undefined behavior 282 // when argv or envp is null. So the code below is actually wrong. However, 283 // other code in LLDB depends on it being null. The code has been acting 284 // this way for some time, so it makes sense to leave it this way until 285 // someone has the time to come along and fix it. 286 return (m_argv.size() > 1) ? m_argv.data() : nullptr; 287 } 288 289 const char **Args::GetConstArgumentVector() const { 290 assert(!m_argv.empty()); 291 return (m_argv.size() > 1) ? const_cast<const char **>(m_argv.data()) 292 : nullptr; 293 } 294 295 void Args::Shift() { 296 // Don't pop the last NULL terminator from the argv array 297 if (m_entries.empty()) 298 return; 299 m_argv.erase(m_argv.begin()); 300 m_entries.erase(m_entries.begin()); 301 } 302 303 void Args::Unshift(llvm::StringRef arg_str, char quote_char) { 304 InsertArgumentAtIndex(0, arg_str, quote_char); 305 } 306 307 void Args::AppendArguments(const Args &rhs) { 308 assert(m_argv.size() == m_entries.size() + 1); 309 assert(m_argv.back() == nullptr); 310 m_argv.pop_back(); 311 for (auto &entry : rhs.m_entries) { 312 m_entries.emplace_back(entry.ref(), entry.quote, entry.column); 313 m_argv.push_back(m_entries.back().data()); 314 } 315 m_argv.push_back(nullptr); 316 } 317 318 void Args::AppendArguments(const char **argv) { 319 size_t argc = ArgvToArgc(argv); 320 321 assert(m_argv.size() == m_entries.size() + 1); 322 assert(m_argv.back() == nullptr); 323 m_argv.pop_back(); 324 for (auto arg : llvm::ArrayRef(argv, argc)) { 325 m_entries.emplace_back(arg, '\0', std::nullopt); 326 m_argv.push_back(m_entries.back().data()); 327 } 328 329 m_argv.push_back(nullptr); 330 } 331 332 void Args::AppendArgument(llvm::StringRef arg_str, char quote_char) { 333 InsertArgumentAtIndex(GetArgumentCount(), arg_str, quote_char); 334 } 335 336 void Args::InsertArgumentAtIndex(size_t idx, llvm::StringRef arg_str, 337 char quote_char) { 338 assert(m_argv.size() == m_entries.size() + 1); 339 assert(m_argv.back() == nullptr); 340 341 if (idx > m_entries.size()) 342 return; 343 m_entries.emplace(m_entries.begin() + idx, arg_str, quote_char, std::nullopt); 344 m_argv.insert(m_argv.begin() + idx, m_entries[idx].data()); 345 } 346 347 void Args::ReplaceArgumentAtIndex(size_t idx, llvm::StringRef arg_str, 348 char quote_char) { 349 assert(m_argv.size() == m_entries.size() + 1); 350 assert(m_argv.back() == nullptr); 351 352 if (idx >= m_entries.size()) 353 return; 354 355 m_entries[idx] = ArgEntry(arg_str, quote_char, std::nullopt); 356 m_argv[idx] = m_entries[idx].data(); 357 } 358 359 void Args::DeleteArgumentAtIndex(size_t idx) { 360 if (idx >= m_entries.size()) 361 return; 362 363 m_argv.erase(m_argv.begin() + idx); 364 m_entries.erase(m_entries.begin() + idx); 365 } 366 367 void Args::SetArguments(size_t argc, const char **argv) { 368 Clear(); 369 370 auto args = llvm::ArrayRef(argv, argc); 371 m_entries.resize(argc); 372 m_argv.resize(argc + 1); 373 for (size_t i = 0; i < args.size(); ++i) { 374 char quote = 375 ((args[i][0] == '\'') || (args[i][0] == '"') || (args[i][0] == '`')) 376 ? args[i][0] 377 : '\0'; 378 379 m_entries[i] = ArgEntry(args[i], quote, std::nullopt); 380 m_argv[i] = m_entries[i].data(); 381 } 382 } 383 384 void Args::SetArguments(const char **argv) { 385 SetArguments(ArgvToArgc(argv), argv); 386 } 387 388 void Args::Clear() { 389 m_entries.clear(); 390 m_argv.clear(); 391 m_argv.push_back(nullptr); 392 } 393 394 std::string Args::GetShellSafeArgument(const FileSpec &shell, 395 llvm::StringRef unsafe_arg) { 396 struct ShellDescriptor { 397 llvm::StringRef m_basename; 398 llvm::StringRef m_escapables; 399 }; 400 401 static ShellDescriptor g_Shells[] = {{"bash", " '\"<>()&;"}, 402 {"fish", " '\"<>()&\\|;"}, 403 {"tcsh", " '\"<>()&;"}, 404 {"zsh", " '\"<>()&;\\|"}, 405 {"sh", " '\"<>()&;"}}; 406 407 // safe minimal set 408 llvm::StringRef escapables = " '\""; 409 410 auto basename = shell.GetFilename().GetStringRef(); 411 if (!basename.empty()) { 412 for (const auto &Shell : g_Shells) { 413 if (Shell.m_basename == basename) { 414 escapables = Shell.m_escapables; 415 break; 416 } 417 } 418 } 419 420 std::string safe_arg; 421 safe_arg.reserve(unsafe_arg.size()); 422 // Add a \ before every character that needs to be escaped. 423 for (char c : unsafe_arg) { 424 if (escapables.contains(c)) 425 safe_arg.push_back('\\'); 426 safe_arg.push_back(c); 427 } 428 return safe_arg; 429 } 430 431 lldb::Encoding Args::StringToEncoding(llvm::StringRef s, 432 lldb::Encoding fail_value) { 433 return llvm::StringSwitch<lldb::Encoding>(s) 434 .Case("uint", eEncodingUint) 435 .Case("sint", eEncodingSint) 436 .Case("ieee754", eEncodingIEEE754) 437 .Case("vector", eEncodingVector) 438 .Default(fail_value); 439 } 440 441 uint32_t Args::StringToGenericRegister(llvm::StringRef s) { 442 if (s.empty()) 443 return LLDB_INVALID_REGNUM; 444 uint32_t result = llvm::StringSwitch<uint32_t>(s) 445 .Case("pc", LLDB_REGNUM_GENERIC_PC) 446 .Case("sp", LLDB_REGNUM_GENERIC_SP) 447 .Case("fp", LLDB_REGNUM_GENERIC_FP) 448 .Cases("ra", "lr", LLDB_REGNUM_GENERIC_RA) 449 .Case("flags", LLDB_REGNUM_GENERIC_FLAGS) 450 .Case("arg1", LLDB_REGNUM_GENERIC_ARG1) 451 .Case("arg2", LLDB_REGNUM_GENERIC_ARG2) 452 .Case("arg3", LLDB_REGNUM_GENERIC_ARG3) 453 .Case("arg4", LLDB_REGNUM_GENERIC_ARG4) 454 .Case("arg5", LLDB_REGNUM_GENERIC_ARG5) 455 .Case("arg6", LLDB_REGNUM_GENERIC_ARG6) 456 .Case("arg7", LLDB_REGNUM_GENERIC_ARG7) 457 .Case("arg8", LLDB_REGNUM_GENERIC_ARG8) 458 .Case("tp", LLDB_REGNUM_GENERIC_TP) 459 .Default(LLDB_INVALID_REGNUM); 460 return result; 461 } 462 463 void Args::EncodeEscapeSequences(const char *src, std::string &dst) { 464 dst.clear(); 465 if (src) { 466 for (const char *p = src; *p != '\0'; ++p) { 467 size_t non_special_chars = ::strcspn(p, "\\"); 468 if (non_special_chars > 0) { 469 dst.append(p, non_special_chars); 470 p += non_special_chars; 471 if (*p == '\0') 472 break; 473 } 474 475 if (*p == '\\') { 476 ++p; // skip the slash 477 switch (*p) { 478 case 'a': 479 dst.append(1, '\a'); 480 break; 481 case 'b': 482 dst.append(1, '\b'); 483 break; 484 case 'f': 485 dst.append(1, '\f'); 486 break; 487 case 'n': 488 dst.append(1, '\n'); 489 break; 490 case 'r': 491 dst.append(1, '\r'); 492 break; 493 case 't': 494 dst.append(1, '\t'); 495 break; 496 case 'v': 497 dst.append(1, '\v'); 498 break; 499 case '\\': 500 dst.append(1, '\\'); 501 break; 502 case '\'': 503 dst.append(1, '\''); 504 break; 505 case '"': 506 dst.append(1, '"'); 507 break; 508 case '0': 509 // 1 to 3 octal chars 510 { 511 // Make a string that can hold onto the initial zero char, up to 3 512 // octal digits, and a terminating NULL. 513 char oct_str[5] = {'\0', '\0', '\0', '\0', '\0'}; 514 515 int i; 516 for (i = 0; (p[i] >= '0' && p[i] <= '7') && i < 4; ++i) 517 oct_str[i] = p[i]; 518 519 // We don't want to consume the last octal character since the main 520 // for loop will do this for us, so we advance p by one less than i 521 // (even if i is zero) 522 p += i - 1; 523 unsigned long octal_value = ::strtoul(oct_str, nullptr, 8); 524 if (octal_value <= UINT8_MAX) { 525 dst.append(1, static_cast<char>(octal_value)); 526 } 527 } 528 break; 529 530 case 'x': 531 // hex number in the format 532 if (isxdigit(p[1])) { 533 ++p; // Skip the 'x' 534 535 // Make a string that can hold onto two hex chars plus a 536 // NULL terminator 537 char hex_str[3] = {*p, '\0', '\0'}; 538 if (isxdigit(p[1])) { 539 ++p; // Skip the first of the two hex chars 540 hex_str[1] = *p; 541 } 542 543 unsigned long hex_value = strtoul(hex_str, nullptr, 16); 544 if (hex_value <= UINT8_MAX) 545 dst.append(1, static_cast<char>(hex_value)); 546 } else { 547 dst.append(1, 'x'); 548 } 549 break; 550 551 default: 552 // Just desensitize any other character by just printing what came 553 // after the '\' 554 dst.append(1, *p); 555 break; 556 } 557 } 558 } 559 } 560 } 561 562 void Args::ExpandEscapedCharacters(const char *src, std::string &dst) { 563 dst.clear(); 564 if (src) { 565 for (const char *p = src; *p != '\0'; ++p) { 566 if (llvm::isPrint(*p)) 567 dst.append(1, *p); 568 else { 569 switch (*p) { 570 case '\a': 571 dst.append("\\a"); 572 break; 573 case '\b': 574 dst.append("\\b"); 575 break; 576 case '\f': 577 dst.append("\\f"); 578 break; 579 case '\n': 580 dst.append("\\n"); 581 break; 582 case '\r': 583 dst.append("\\r"); 584 break; 585 case '\t': 586 dst.append("\\t"); 587 break; 588 case '\v': 589 dst.append("\\v"); 590 break; 591 case '\'': 592 dst.append("\\'"); 593 break; 594 case '"': 595 dst.append("\\\""); 596 break; 597 case '\\': 598 dst.append("\\\\"); 599 break; 600 default: { 601 // Just encode as octal 602 dst.append("\\0"); 603 char octal_str[32]; 604 snprintf(octal_str, sizeof(octal_str), "%o", *p); 605 dst.append(octal_str); 606 } break; 607 } 608 } 609 } 610 } 611 } 612 613 std::string Args::EscapeLLDBCommandArgument(const std::string &arg, 614 char quote_char) { 615 const char *chars_to_escape = nullptr; 616 switch (quote_char) { 617 case '\0': 618 chars_to_escape = " \t\\'\"`"; 619 break; 620 case '"': 621 chars_to_escape = "$\"`\\"; 622 break; 623 case '`': 624 case '\'': 625 return arg; 626 default: 627 assert(false && "Unhandled quote character"); 628 return arg; 629 } 630 631 std::string res; 632 res.reserve(arg.size()); 633 for (char c : arg) { 634 if (::strchr(chars_to_escape, c)) 635 res.push_back('\\'); 636 res.push_back(c); 637 } 638 return res; 639 } 640 641 OptionsWithRaw::OptionsWithRaw(llvm::StringRef arg_string) { 642 SetFromString(arg_string); 643 } 644 645 void OptionsWithRaw::SetFromString(llvm::StringRef arg_string) { 646 const llvm::StringRef original_args = arg_string; 647 648 size_t shift; 649 arg_string = ltrimForArgs(arg_string, shift); 650 std::string arg; 651 char quote; 652 653 // If the string doesn't start with a dash, we just have no options and just 654 // a raw part. 655 if (!arg_string.starts_with("-")) { 656 m_suffix = std::string(original_args); 657 return; 658 } 659 660 bool found_suffix = false; 661 while (!arg_string.empty()) { 662 // The length of the prefix before parsing. 663 std::size_t prev_prefix_length = original_args.size() - arg_string.size(); 664 665 // Parse the next argument from the remaining string. 666 std::tie(arg, quote, arg_string) = ParseSingleArgument(arg_string); 667 668 // If we get an unquoted '--' argument, then we reached the suffix part 669 // of the command. 670 Args::ArgEntry entry(arg, quote, std::nullopt); 671 if (!entry.IsQuoted() && arg == "--") { 672 // The remaining line is the raw suffix, and the line we parsed so far 673 // needs to be interpreted as arguments. 674 m_has_args = true; 675 m_suffix = std::string(arg_string); 676 found_suffix = true; 677 678 // The length of the prefix after parsing. 679 std::size_t prefix_length = original_args.size() - arg_string.size(); 680 681 // Take the string we know contains all the arguments and actually parse 682 // it as proper arguments. 683 llvm::StringRef prefix = original_args.take_front(prev_prefix_length); 684 m_args = Args(prefix); 685 m_arg_string = prefix; 686 687 // We also record the part of the string that contains the arguments plus 688 // the delimiter. 689 m_arg_string_with_delimiter = original_args.take_front(prefix_length); 690 691 // As the rest of the string became the raw suffix, we are done here. 692 break; 693 } 694 695 arg_string = ltrimForArgs(arg_string, shift); 696 } 697 698 // If we didn't find a suffix delimiter, the whole string is the raw suffix. 699 if (!found_suffix) 700 m_suffix = std::string(original_args); 701 } 702